22from dash import Dash , dcc , html , Input , Output
33import pandas as pd
44
5- PROCESSED_DATA_PATH = "data/processed/tsunami-events.csv"
5+ PROCESSED_DATA_PATH = 'data/processed/tsunami-events.csv'
6+
7+ tsunami_df = pd .read_csv ('data/processed/tsunami-events.csv' )
8+
9+ years = tsunami_df ['year' ].dropna ().unique ()
10+ countries = tsunami_df ['country' ].dropna ().unique ()
11+ country_list = sorted (list (countries ))
12+
613
714def create_scatter_plot (year_start = 1900 , year_end = 2022 , countries = []):
8- chart = alt .Chart (get_data (year_start , year_end , countries )).mark_point (opacity = 0.65 , size = 20 ).encode (
15+ """
16+ The function to create a scatter plot of earthquake intensity
17+ versus total deaths recorded (on a log-scale) per tsunami event
18+ between the year_start and year_end for countries specified.
19+
20+ Parameters
21+ ----------
22+ year_start : int
23+ the lower bound of the range of years selected by user
24+ year_end : int
25+ the upper bound of the range of years selected by user
26+ countries : list
27+ the selection of countries selected by user
28+
29+ Returns
30+ -------
31+ scatter plot object
32+ scatter plot chart of earthquake intensity versus total deaths
33+ recorded (on a log-scale)
34+ """
35+ chart = alt .Chart (get_data (year_start , year_end , countries )
36+ ).mark_point (opacity = 0.65 , size = 20 ).encode (
937 x = alt .X ('earthquake_magnitude' ,
1038 title = 'Earthquake Magnitude (Richter Scale)' ,
11- scale = alt .Scale (domain = (5.5 , 10 ))),
39+ scale = alt .Scale (domain = (5.5 , 10 )),
40+ axis = alt .Axis (grid = False )),
1241 y = alt .Y ('total_deaths' ,
1342 title = 'Total Deaths (log-scale), per Event' ,
1443 scale = alt .Scale (type = 'log' )),
@@ -25,7 +54,30 @@ def create_scatter_plot(year_start=1900, year_end=2022, countries=[]):
2554
2655
2756def get_data (year_start = 1802 , year_end = 2022 , countries = []):
57+ """
58+ The function to return the processed dataframe of original data including
59+ a new column computing the Mercalli Intensity scale per tsunami event,
60+ subsetting for observations between year_start and year_end, for the
61+ countries specified. In addition, should more than 10 countries be
62+ specified by the user, the function only returns observations for
63+ the 10 countries whose individual earthquake observations recorded
64+ largest magnitude per the year range and countries specified.
65+
66+ Parameters
67+ ----------
68+ year_start : int
69+ the lower bound of the range of years selected by user
70+ year_end : int
71+ the upper bound of the range of years selected by user
72+ countries : list
73+ the selection of countries selected by user
2874
75+ Returns
76+ -------
77+ df:
78+ a processed dataframe with additional columns and filtered
79+ data, comprising no more than 10 countries
80+ """
2981 tsunami_events = pd .read_csv (PROCESSED_DATA_PATH )
3082
3183 if not (year_start and year_end and year_start <= year_end ):
@@ -52,9 +104,11 @@ def get_data(year_start=1802, year_end=2022, countries=[]):
52104
53105 return tsunami_events .loc [(tsunami_events ['year' ] > year_start ) &
54106 (tsunami_events ['year' ] < year_end ) &
55- (tsunami_events ['country' ].isin (countries )) &
56- (tsunami_events ['country' ].isin (countries_top10 ))]
107+ (tsunami_events ['country' ].isin (
108+ countries )) &
109+ (tsunami_events ['country' ].isin (
110+ countries_top10 ))]
57111 else :
58112 return tsunami_events .loc [(tsunami_events ['year' ] > year_start ) &
59113 (tsunami_events ['year' ] < year_end ) &
60- (tsunami_events ['country' ].isin (countries ))]
114+ (tsunami_events ['country' ].isin (countries ))]
0 commit comments