Skip to content

Commit 2a0e756

Browse files
committed
Modified python examples to be compatible with Python 3. Removed bad/unneeded imports reported in aws-samples#62.
1 parent 8f72271 commit 2a0e756

File tree

6 files changed

+18
-26
lines changed

6 files changed

+18
-26
lines changed

examples/data_cleaning_and_lambda.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,12 @@ Begin by pasting some boilerplate into the DevEndpoint notebook to import the
4242
AWS Glue libraries we'll need and set up a single `GlueContext`.
4343

4444
import sys
45-
from awsglue.transforms import *
4645
from awsglue.utils import getResolvedOptions
4746
from pyspark.context import SparkContext
4847
from awsglue.context import GlueContext
48+
from awsglue.dynamicframe import DynamicFrame
4949
from awsglue.job import Job
50-
from pyspark.sql import SparkSession
51-
50+
5251
glueContext = GlueContext(SparkContext.getOrCreate())
5352

5453

examples/data_cleaning_and_lambda.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
# Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# SPDX-License-Identifier: MIT-0
33

44
import sys
5-
from awsglue.transforms import *
65
from awsglue.utils import getResolvedOptions
76
from pyspark.context import SparkContext
87
from awsglue.context import GlueContext
98
from awsglue.dynamicframe import DynamicFrame
109
from awsglue.job import Job
11-
from pyspark.sql import SparkSession
1210
from pyspark.sql.functions import udf
1311
from pyspark.sql.types import StringType
1412

examples/join_and_relationalize.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Begin by pasting some boilerplate into the DevEndpoint notebook to import the
5353
AWS Glue libraries we'll need and set up a single `GlueContext`.
5454

5555
import sys
56-
from awsglue.transforms import *
56+
from awsglue.transforms import Join
5757
from awsglue.utils import getResolvedOptions
5858
from pyspark.context import SparkContext
5959
from awsglue.context import GlueContext
@@ -68,7 +68,7 @@ Next, you can easily examine the schemas that the crawler recorded in the Data C
6868
to see the schema of the `persons_json` table, enter the following in your notebook:
6969

7070
persons = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="persons_json")
71-
print "Count: ", persons.count()
71+
print("Count: ", persons.count())
7272
persons.printSchema()
7373

7474
Here's the output from the print calls:
@@ -110,7 +110,7 @@ Each person in the table is a member of some congressional body.
110110
To look at the schema of the `memberships_json` table, enter the following:
111111

112112
memberships = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="memberships_json")
113-
print "Count: ", memberships.count()
113+
print("Count: ", memberships.count())
114114
memberships.printSchema()
115115

116116
The output is:
@@ -130,7 +130,7 @@ Organizations are parties and the two chambers of congress, the Senate and House
130130
To look at the schema of the `organizations_json` table, enter:
131131

132132
orgs = glueContext.create_dynamic_frame.from_catalog(database="legislators", table_name="organizations_json")
133-
print "Count: ", orgs.count()
133+
print("Count: ", orgs.count())
134134
orgs.printSchema()
135135

136136
The output is:
@@ -219,7 +219,7 @@ We can do all these operations in one (extended) line of code:
219219
l_history = Join.apply(orgs,
220220
Join.apply(persons, memberships, 'id', 'person_id'),
221221
'org_id', 'organization_id').drop_fields(['person_id', 'org_id'])
222-
print "Count: ", l_history.count()
222+
print("Count: ", l_history.count())
223223
l_history.printSchema()
224224

225225
The output is:

examples/join_and_relationalize.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# SPDX-License-Identifier: MIT-0
33

44
import sys
5-
from awsglue.transforms import *
5+
from awsglue.transforms import Join
66
from awsglue.utils import getResolvedOptions
77
from pyspark.context import SparkContext
88
from awsglue.context import GlueContext
@@ -36,26 +36,26 @@
3636
# ---- Write out the history ----
3737

3838
# Write out the dynamic frame into parquet in "legislator_history" directory
39-
print "Writing to /legislator_history ..."
39+
print("Writing to /legislator_history ...")
4040
glueContext.write_dynamic_frame.from_options(frame = l_history, connection_type = "s3", connection_options = {"path": output_history_dir}, format = "parquet")
4141

4242
# Write out a single file to directory "legislator_single"
4343
s_history = l_history.toDF().repartition(1)
44-
print "Writing to /legislator_single ..."
44+
print("Writing to /legislator_single ...")
4545
s_history.write.parquet(output_lg_single_dir)
4646

4747
# Convert to data frame, write to directory "legislator_part", partitioned by (separate) Senate and House.
48-
print "Writing to /legislator_part, partitioned by Senate and House ..."
48+
print("Writing to /legislator_part, partitioned by Senate and House ...")
4949
l_history.toDF().write.parquet(output_lg_partitioned_dir, partitionBy=['org_name'])
5050

5151
# ---- Write out to relational databases ----
5252

5353
# Convert the data to flat tables
54-
print "Converting to flat tables ..."
54+
print("Converting to flat tables ...")
5555
dfc = l_history.relationalize("hist_root", redshift_temp_dir)
5656

5757
# Cycle through and write to Redshift.
5858
for df_name in dfc.keys():
5959
m_df = dfc.select(df_name)
60-
print "Writing to Redshift table: ", df_name, " ..."
60+
print("Writing to Redshift table: ", df_name, " ...")
6161
glueContext.write_dynamic_frame.from_jdbc_conf(frame = m_df, catalog_connection = "redshift3", connection_options = {"dbtable": df_name, "database": "testdb"}, redshift_tmp_dir = redshift_temp_dir)

examples/resolve_choice.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@ AWS Glue libraries we'll need and set up a single `GlueContext`. We also initial
4242
the spark session variable for executing Spark SQL queries later in this script.
4343

4444
import sys
45-
from awsglue.transforms import *
4645
from awsglue.utils import getResolvedOptions
4746
from pyspark.context import SparkContext
4847
from awsglue.context import GlueContext
48+
from awsglue.dynamicframe import DynamicFrame
4949
from awsglue.job import Job
50-
from pyspark.sql import SparkSession
5150

5251
glueContext = GlueContext(SparkContext.getOrCreate())
5352
spark = glueContext.spark_session

examples/resolve_choice.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
1-
# Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# SPDX-License-Identifier: MIT-0
33

44
import sys
5-
from awsglue.transforms import *
65
from awsglue.utils import getResolvedOptions
76
from pyspark.context import SparkContext
87
from awsglue.context import GlueContext
98
from awsglue.dynamicframe import DynamicFrame
109
from awsglue.job import Job
11-
from pyspark.sql import SparkSession
12-
from pyspark.sql.functions import udf
13-
from pyspark.sql.types import StringType
1410

1511
glueContext = GlueContext(SparkContext.getOrCreate())
1612
spark = glueContext.spark_session
1713

1814
# catalog: database and table name
19-
db_name = "medicare"
15+
db_name = "payments"
2016
tbl_name = "medicare"
2117

2218
# s3 output directories

0 commit comments

Comments
 (0)