1212 logger .removeHandler (handler )
1313logging .basicConfig (format = '%(asctime)s %(message)s' , level = logging .INFO )
1414
15+
1516class PostgressDB ():
1617 def __init__ (self , username , password , port , host , database ):
1718 self .user_name = username
@@ -45,7 +46,7 @@ def process_json_file(jsonf):
4546 """
4647 logger .info ("Starting conversion JSON format to table format." )
4748 logger .info ("Detecting {} valid JSON structures in the objects" .format (str (len (jsonf ))))
48- #JsonF is a list but the cols and metrics will always be the same across multiple jsons for 1 file
49+ # JsonF is a list but the cols and metrics will always be the same across multiple jsons for 1 file
4950 cols = []
5051 try :
5152 cols = [r for r in jsonf [0 ]['reports' ][0 ]['columnHeader' ]['dimensions' ]]
@@ -57,15 +58,14 @@ def process_json_file(jsonf):
5758 except :
5859 logger .warning ("No metrics specified." )
5960
60-
6161 pd_result = None
6262
6363 for list_index in range (len (jsonf )):
6464 data_rows = [r for r in jsonf [list_index ]['reports' ][0 ]['data' ]['rows' ]]
6565 dim_result_dict = {}
6666
6767 for row in data_rows :
68- #if there are dimensions, extract the dimension data and add values per key
68+ # if there are dimensions, extract the dimension data and add values per key
6969 for i in range (len (cols )):
7070 if cols [i ] in dim_result_dict .keys ():
7171 data_list = dim_result_dict [cols [i ]]
@@ -82,25 +82,26 @@ def process_json_file(jsonf):
8282 dim_result_dict .update ({metrics [i ]: data_list })
8383 else :
8484 dim_result_dict [metrics [i ]] = [row ['metrics' ][0 ]['values' ][i ]]
85- #Create dataframe for the first JSON object otherwise append to existing
85+ # Create dataframe for the first JSON object otherwise append to existing
8686 if list_index == 0 :
8787 pd_result = pd .DataFrame .from_dict (dim_result_dict )
8888 else :
8989 pd_result = pd_result .append (pd .DataFrame .from_dict (dim_result_dict ))
9090 logger .info ("Finished conversion JSON format to table format." )
9191 return pd_result
9292
93+
9394def lambda_handler (event , context ):
9495 logger .info ("Starting appflow conversion" )
9596 bucket_name = event ['Records' ][0 ]['s3' ]['bucket' ]['name' ]
9697 object_key = urllib .parse .unquote_plus (event ['Records' ][0 ]['s3' ]['object' ]['key' ])
9798 s3_client = boto3 .client ('s3' )
9899
99- logger .info ("Processing bucket {}, filename {}" .format (bucket_name ,object_key ))
100+ logger .info ("Processing bucket {}, filename {}" .format (bucket_name , object_key ))
100101
101102 raw_object = s3_client .get_object (Bucket = bucket_name , Key = object_key )
102103 raw_data = json .loads ('[' + raw_object ['Body' ].read ().decode ('utf-8' ).replace ('}\n {' , '},{' ) + ']' )
103- #Raw data is always a list of JSON objects
104+ # Raw data is always a list of JSON objects
104105 pd_result = process_json_file (raw_data )
105106
106107 db = PostgressDB (username = os .getenv ("DB_USERNAME" ),
0 commit comments