Skip to content

Commit 3115d92

Browse files
Merge pull request #45 from UBC-MDS/feature/scatter_plot
Added docstrings and removed vertical gridlines in plot script
2 parents 88f0f4b + 180672c commit 3115d92

File tree

1 file changed

+60
-6
lines changed

1 file changed

+60
-6
lines changed

src/components/scatter_plot.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,42 @@
22
from dash import Dash, dcc, html, Input, Output
33
import pandas as pd
44

5-
PROCESSED_DATA_PATH = "data/processed/tsunami-events.csv"
5+
PROCESSED_DATA_PATH = 'data/processed/tsunami-events.csv'
6+
7+
tsunami_df = pd.read_csv('data/processed/tsunami-events.csv')
8+
9+
years = tsunami_df['year'].dropna().unique()
10+
countries = tsunami_df['country'].dropna().unique()
11+
country_list = sorted(list(countries))
12+
613

714
def create_scatter_plot(year_start=1900, year_end=2022, countries=[]):
8-
chart = alt.Chart(get_data(year_start, year_end, countries)).mark_point(opacity=0.65, size=20).encode(
15+
"""
16+
The function to create a scatter plot of earthquake intensity
17+
versus total deaths recorded (on a log-scale) per tsunami event
18+
between the year_start and year_end for countries specified.
19+
20+
Parameters
21+
----------
22+
year_start : int
23+
the lower bound of the range of years selected by user
24+
year_end : int
25+
the upper bound of the range of years selected by user
26+
countries : list
27+
the selection of countries selected by user
28+
29+
Returns
30+
-------
31+
scatter plot object
32+
scatter plot chart of earthquake intensity versus total deaths
33+
recorded (on a log-scale)
34+
"""
35+
chart = alt.Chart(get_data(year_start, year_end, countries)
36+
).mark_point(opacity=0.65, size=20).encode(
937
x=alt.X('earthquake_magnitude',
1038
title='Earthquake Magnitude (Richter Scale)',
11-
scale=alt.Scale(domain=(5.5, 10))),
39+
scale=alt.Scale(domain=(5.5, 10)),
40+
axis=alt.Axis(grid=False)),
1241
y=alt.Y('total_deaths',
1342
title='Total Deaths (log-scale), per Event',
1443
scale=alt.Scale(type='log')),
@@ -25,7 +54,30 @@ def create_scatter_plot(year_start=1900, year_end=2022, countries=[]):
2554

2655

2756
def get_data(year_start=1802, year_end=2022, countries=[]):
57+
"""
58+
The function to return the processed dataframe of original data including
59+
a new column computing the Mercalli Intensity scale per tsunami event,
60+
subsetting for observations between year_start and year_end, for the
61+
countries specified. In addition, should more than 10 countries be
62+
specified by the user, the function only returns observations for
63+
the 10 countries whose individual earthquake observations recorded
64+
largest magnitude per the year range and countries specified.
65+
66+
Parameters
67+
----------
68+
year_start : int
69+
the lower bound of the range of years selected by user
70+
year_end : int
71+
the upper bound of the range of years selected by user
72+
countries : list
73+
the selection of countries selected by user
2874
75+
Returns
76+
-------
77+
df:
78+
a processed dataframe with additional columns and filtered
79+
data, comprising no more than 10 countries
80+
"""
2981
tsunami_events = pd.read_csv(PROCESSED_DATA_PATH)
3082

3183
if not (year_start and year_end and year_start <= year_end):
@@ -52,9 +104,11 @@ def get_data(year_start=1802, year_end=2022, countries=[]):
52104

53105
return tsunami_events.loc[(tsunami_events['year'] > year_start) &
54106
(tsunami_events['year'] < year_end) &
55-
(tsunami_events['country'].isin(countries)) &
56-
(tsunami_events['country'].isin(countries_top10))]
107+
(tsunami_events['country'].isin(
108+
countries)) &
109+
(tsunami_events['country'].isin(
110+
countries_top10))]
57111
else:
58112
return tsunami_events.loc[(tsunami_events['year'] > year_start) &
59113
(tsunami_events['year'] < year_end) &
60-
(tsunami_events['country'].isin(countries))]
114+
(tsunami_events['country'].isin(countries))]

0 commit comments

Comments
 (0)