Skip to content

Commit 8502d97

Browse files
authored
Merge pull request #5 from open-datastudio/spark_301
Support spark 3.0.1
2 parents e52f51b + 51f15a4 commit 8502d97

File tree

4 files changed

+45
-8
lines changed

4 files changed

+45
-8
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Let's get started!
2727
pip install ods
2828
```
2929

30+
Python `3.6`, `3.7`, `3.8` are supported.
31+
3032
### Initialize
3133

3234
1. Login staroid.com and get an [access token](https://staroid.com/settings/accesstokens). And set the `STAROID_ACCESS_TOKEN` environment variable. See [here](https://github.com/staroids/staroid-python#configuration) for more detail.
@@ -81,6 +83,16 @@ spark = ods.spark(spark_conf = {
8183
}).session()
8284
```
8385

86+
configure spark version
87+
88+
```python
89+
import ods
90+
spark = ods.spark(spark_version = "3.0.1").session()
91+
```
92+
93+
Currently, spark `3.0.1`, `3.0.0` are supported.
94+
95+
8496
Check [tests/test_spark.py](https://github.com/open-datastudio/ods/blob/master/tests/test_spark.py) for complete working example.
8597

8698
## Dask
@@ -104,7 +116,6 @@ import ods
104116
ods.ray(cluster_name="")
105117
```
106118

107-
108119
## Get involved
109120

110121
Open data studio is an open source project. Please give us feedback and feel free to get involved!

ods/ods.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pathlib import Path
77

88
class Ods:
9-
def __init__(self, staroid=None, ske=None, cache_dir=None, chisel_path=None):
9+
def __init__(self, staroid=None, ske=None, cache_dir=None):
1010
self.__tunnel_processes = {}
1111
self.__ske = None
1212

@@ -28,8 +28,6 @@ def __init__(self, staroid=None, ske=None, cache_dir=None, chisel_path=None):
2828
if ske != None:
2929
self.__ske = ske
3030

31-
self.__chisel_path = chisel_path
32-
3331
def create_or_get_cache_dir(self, module = ""):
3432
"create (if not exists) or return cache dir path for module"
3533
cache_dir = "{}/{}".format(self.__cache_dir, module)
@@ -127,8 +125,27 @@ def init(ske=None, reinit=True):
127125
__singleton["instance"] = Ods(ske=ske)
128126
return __singleton["instance"]
129127

130-
def spark(name, spark_conf=None, chisel_path=None, worker_num=1, worker_type="standard-4", worker_isolation="dedicated", delta=False, aws=True):
128+
def spark(
129+
name,
130+
spark_conf=None,
131+
spark_version="3.0.1",
132+
spark_home=None,
133+
worker_num=1,
134+
worker_type="standard-4",
135+
worker_isolation="dedicated",
136+
delta=False,
137+
aws=True):
131138
init(reinit=False)
132139

133-
cluster = SparkCluster(__singleton["instance"], name, spark_conf=spark_conf, worker_num=worker_num, worker_type=worker_type, worker_isolation=worker_isolation, delta=delta, aws=aws)
140+
cluster = SparkCluster(
141+
__singleton["instance"],
142+
name,
143+
spark_conf=spark_conf,
144+
spark_version=spark_version,
145+
spark_home=spark_home,
146+
worker_num=worker_num,
147+
worker_type=worker_type,
148+
worker_isolation=worker_isolation,
149+
delta=delta,
150+
aws=aws)
134151
return cluster

ods/spark_cluster/spark_cluster.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
from kubernetes import client
99

1010
SPARK_ARTIFACTS={
11+
"3.0.1": {
12+
"image": "opendatastudio/spark-py:v3.0.1-staroid-20200908-01",
13+
"dist": "https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz",
14+
"commit_url": "GITHUB/open-datastudio/spark-serverless:master",
15+
"jars": {
16+
"hadoop-aws": "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.2.0/hadoop-aws-3.2.0.jar",
17+
"aws-java-sdk": "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.563/aws-java-sdk-bundle-1.11.563.jar"
18+
}
19+
},
1120
"3.0.0": {
1221
"image": "opendatastudio/spark-py:v3.0.0-staroid-20200830-01",
1322
"dist": "https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop3.2.tgz",
@@ -31,7 +40,7 @@ def __init__(
3140
opends,
3241
cluster_name,
3342
spark_conf=None,
34-
spark_version="3.0.0",
43+
spark_version="3.0.1",
3544
spark_home=None,
3645
worker_num=1,
3746
worker_type="standard-4",

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="ods",
8-
version="0.0.4",
8+
version="0.0.5",
99
license='MIT',
1010
author="Open Data Studio",
1111
author_email="moon@staroid.com",

0 commit comments

Comments
 (0)