Modified python examples to be compatible with Python 3. Removed bad/unneeded imports reported in aws-samples#62.

moomindani · moomindani · commit 2a0e75659b2a · 2020-03-19T16:51:03.000+09:00
diff --git a/examples/data_cleaning_and_lambda.md b/examples/data_cleaning_and_lambda.md
@@ -42,13 +42,12 @@ Begin by pasting some boilerplate into the DevEndpoint notebook to import the
 AWS Glue libraries we'll need and set up a single `GlueContext`.
 
     import sys
-    from awsglue.transforms import *
     from awsglue.utils import getResolvedOptions
     from pyspark.context import SparkContext
     from awsglue.context import GlueContext
+    from awsglue.dynamicframe import DynamicFrame
     from awsglue.job import Job
-    from pyspark.sql import SparkSession
-
+    
     glueContext = GlueContext(SparkContext.getOrCreate())
 
 
diff --git a/examples/data_cleaning_and_lambda.py b/examples/data_cleaning_and_lambda.py
@@ -1,14 +1,12 @@
-#  Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#  Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #  SPDX-License-Identifier: MIT-0
 
 import sys
-from awsglue.transforms import *
 from awsglue.utils import getResolvedOptions
 from pyspark.context import SparkContext
 from awsglue.context import GlueContext
 from awsglue.dynamicframe import DynamicFrame
 from awsglue.job import Job
-from pyspark.sql import SparkSession
 from pyspark.sql.functions import udf
 from pyspark.sql.types import StringType
 
diff --git a/examples/join_and_relationalize.md b/examples/join_and_relationalize.md
@@ -53,7 +53,7 @@ Begin by pasting some boilerplate into the DevEndpoint notebook to import the
 AWS Glue libraries we'll need and set up a single `GlueContext`.
 
     import sys
-    from awsglue.transforms import *
+    from awsglue.transforms import Join
     from awsglue.utils import getResolvedOptions
     from pyspark.context import SparkContext
     from awsglue.context import GlueContext
@@ -68,7 +68,7 @@ Next, you can easily examine the schemas that the crawler recorded in the Data C
 to see the schema of the `persons_json` table, enter the following in your notebook:
 
     persons = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="persons_json")
-    print "Count: ", persons.count()
+    print("Count: ", persons.count())
     persons.printSchema()
 
 Here's the output from the print calls:
@@ -110,7 +110,7 @@ Each person in the table is a member of some congressional body.
 To look at the schema of the `memberships_json` table, enter the following:
 
     memberships = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="memberships_json")
-    print "Count: ", memberships.count()
+    print("Count: ", memberships.count())
     memberships.printSchema()
 
 The output is:
@@ -130,7 +130,7 @@ Organizations are parties and the two chambers of congress, the Senate and House
 To look at the schema of the `organizations_json` table, enter:
 
     orgs = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="organizations_json")
-    print "Count: ", orgs.count()
+    print("Count: ", orgs.count())
     orgs.printSchema()
 
 The output is:
@@ -219,7 +219,7 @@ We can do all these operations in one (extended) line of code:
     l_history = Join.apply(orgs,
                            Join.apply(persons, memberships, 'id', 'person_id'),
                            'org_id', 'organization_id').drop_fields(['person_id', 'org_id'])
-    print "Count: ", l_history.count()
+    print("Count: ", l_history.count())
     l_history.printSchema()
 
 The output is:
diff --git a/examples/join_and_relationalize.py b/examples/join_and_relationalize.py
@@ -1,8 +1,8 @@
-#  Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#  Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #  SPDX-License-Identifier: MIT-0
 
 import sys
-from awsglue.transforms import *
+from awsglue.transforms import Join
 from awsglue.utils import getResolvedOptions
 from pyspark.context import SparkContext
 from awsglue.context import GlueContext
@@ -36,26 +36,26 @@
 # ---- Write out the history ----
 
 # Write out the dynamic frame into parquet in "legislator_history" directory
-print "Writing to /legislator_history ..."
+print("Writing to /legislator_history ...")
 glueContext.write_dynamic_frame.from_options(frame = l_history, connection_type = "s3", connection_options = {"path": output_history_dir}, format = "parquet")
 
 # Write out a single file to directory "legislator_single"
 s_history = l_history.toDF().repartition(1)
-print "Writing to /legislator_single ..."
+print("Writing to /legislator_single ...")
 s_history.write.parquet(output_lg_single_dir)
 
 # Convert to data frame, write to directory "legislator_part", partitioned by (separate) Senate and House.
-print "Writing to /legislator_part, partitioned by Senate and House ..."
+print("Writing to /legislator_part, partitioned by Senate and House ...")
 l_history.toDF().write.parquet(output_lg_partitioned_dir, partitionBy=['org_name'])
 
 # ---- Write out to relational databases ----
 
 # Convert the data to flat tables
-print "Converting to flat tables ..."
+print("Converting to flat tables ...")
 dfc = l_history.relationalize("hist_root", redshift_temp_dir)
 
 # Cycle through and write to Redshift.
 for df_name in dfc.keys():
     m_df = dfc.select(df_name)
-    print "Writing to Redshift table: ", df_name, " ..."
+    print("Writing to Redshift table: ", df_name, " ...")
     glueContext.write_dynamic_frame.from_jdbc_conf(frame = m_df, catalog_connection = "redshift3", connection_options = {"dbtable": df_name, "database": "testdb"}, redshift_tmp_dir = redshift_temp_dir)
diff --git a/examples/resolve_choice.md b/examples/resolve_choice.md
@@ -42,12 +42,11 @@ AWS Glue libraries we'll need and set up a single `GlueContext`. We also initial
 the spark session variable for executing Spark SQL queries later in this script.
 
     import sys
-    from awsglue.transforms import *
     from awsglue.utils import getResolvedOptions
     from pyspark.context import SparkContext
     from awsglue.context import GlueContext
+    from awsglue.dynamicframe import DynamicFrame
     from awsglue.job import Job
-    from pyspark.sql import SparkSession
 
     glueContext = GlueContext(SparkContext.getOrCreate())
     spark = glueContext.spark_session
diff --git a/examples/resolve_choice.py b/examples/resolve_choice.py
@@ -1,22 +1,18 @@
-#  Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#  Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #  SPDX-License-Identifier: MIT-0
 
 import sys
-from awsglue.transforms import *
 from awsglue.utils import getResolvedOptions
 from pyspark.context import SparkContext
 from awsglue.context import GlueContext
 from awsglue.dynamicframe import DynamicFrame
 from awsglue.job import Job
-from pyspark.sql import SparkSession
-from pyspark.sql.functions import udf
-from pyspark.sql.types import StringType
 
 glueContext = GlueContext(SparkContext.getOrCreate())
 spark = glueContext.spark_session
 
 # catalog: database and table name
-db_name = "medicare"
+db_name = "payments"
 tbl_name = "medicare"
 
 # s3 output directories