Skip to content

Commit 3d4296b

Browse files
authored
Add gpuCI support (dask-contrib#240)
* Add dask-cudf tests with GPU marker * Change --gpu option to --rungpu * Add build scripts for gpuCI * xfail GPU table creation from memory * Activate correct environment * Force reinstall openjdk to resolve Maven issues * Try to install Java side with debug switches * Pass options to correct commnand * Set JAVA_HOME to gpuCI workspace * Specifiy MAVEN_OPTS * Move pytest marker to pytest.ini * Add GPU tests for Context * Bump RAPIDS_VER to 21.12 * xfail GPU tests that aren't currently working * Rerun tests * Use gpu param for table creation in test_input_types
1 parent 3f9b7e3 commit 3d4296b

File tree

6 files changed

+257
-61
lines changed

6 files changed

+257
-61
lines changed

conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
11
import pytest
22

33
pytest_plugins = ["distributed.utils_test", "tests.integration.fixtures"]
4+
5+
6+
def pytest_addoption(parser):
7+
parser.addoption("--rungpu", action="store_true", help="run tests meant for GPU")
8+
9+
10+
def pytest_runtest_setup(item):
11+
if "gpu" in item.keywords and not item.config.getoption("--rungpu"):
12+
pytest.skip("need --rungpu option to run")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
PYTHON_VER:
2+
- "3.8"
3+
4+
CUDA_VER:
5+
- "11.2"
6+
7+
LINUX_VER:
8+
- ubuntu18.04
9+
10+
RAPIDS_VER:
11+
- "21.12"
12+
13+
excludes:
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
##################################################
2+
# dask-sql GPU build and test script for CI #
3+
##################################################
4+
set -e
5+
NUMARGS=$#
6+
ARGS=$*
7+
8+
# Arg parsing function
9+
function hasArg {
10+
(( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
11+
}
12+
13+
# Set path and build parallel level
14+
export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
15+
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
16+
17+
# Set home to the job's workspace
18+
export HOME="$WORKSPACE"
19+
20+
# specify maven options
21+
export MAVEN_OPTS="-Dmaven.repo.local=${WORKSPACE}/.m2/repository"
22+
23+
# Switch to project root; also root of repo checkout
24+
cd "$WORKSPACE"
25+
26+
# Determine CUDA release version
27+
export CUDA_REL=${CUDA_VERSION%.*}
28+
29+
################################################################################
30+
# SETUP - Check environment
31+
################################################################################
32+
33+
gpuci_logger "Check environment variables"
34+
env
35+
36+
gpuci_logger "Check GPU usage"
37+
nvidia-smi
38+
39+
gpuci_logger "Activate conda env"
40+
. /opt/conda/etc/profile.d/conda.sh
41+
conda activate dask_sql
42+
43+
gpuci_logger "Install dask"
44+
python -m pip install git+https://github.com/dask/dask
45+
46+
gpuci_logger "Install distributed"
47+
python -m pip install git+https://github.com/dask/distributed
48+
49+
gpuci_logger "Install dask-sql"
50+
pip install -e ".[dev]"
51+
python setup.py java
52+
53+
gpuci_logger "Check Python version"
54+
python --version
55+
56+
gpuci_logger "Check conda environment"
57+
conda info
58+
conda config --show-sources
59+
conda list --show-channel-urls
60+
61+
gpuci_logger "Python py.test for dask-sql"
62+
py.test $WORKSPACE -n 4 -v -m gpu --rungpu --junitxml="$WORKSPACE/junit-dask-sql.xml" --cov-config="$WORKSPACE/.coveragerc" --cov=dask_sql --cov-report=xml:"$WORKSPACE/dask-sql-coverage.xml" --cov-report term

pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ addopts =
55
--cov-report=term-missing
66
testpaths =
77
tests
8+
markers =
9+
gpu: marks tests that require GPUs (skipped by default, run with '--rungpu')

tests/integration/test_create.py

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99

1010
@skip_if_external_scheduler
11-
def test_create_from_csv(c, df, temporary_data_file):
11+
@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
12+
def test_create_from_csv(c, df, temporary_data_file, gpu):
1213
df.to_csv(temporary_data_file, index=False)
1314

1415
c.sql(
@@ -17,7 +18,8 @@ def test_create_from_csv(c, df, temporary_data_file):
1718
new_table
1819
WITH (
1920
location = '{temporary_data_file}',
20-
format = 'csv'
21+
format = 'csv',
22+
gpu = {gpu}
2123
)
2224
"""
2325
)
@@ -28,10 +30,28 @@ def test_create_from_csv(c, df, temporary_data_file):
2830
"""
2931
).compute()
3032

33+
if gpu:
34+
result_df = result_df.to_pandas()
35+
3136
assert_frame_equal(result_df, df)
3237

3338

34-
def test_cluster_memory(client, c, df):
39+
@pytest.mark.parametrize(
40+
"gpu",
41+
[
42+
False,
43+
pytest.param(
44+
True,
45+
marks=[
46+
pytest.mark.gpu,
47+
pytest.mark.xfail(
48+
reason="dataframes on memory currently aren't being converted to dask-cudf"
49+
),
50+
],
51+
),
52+
],
53+
)
54+
def test_cluster_memory(client, c, df, gpu):
3555
client.publish_dataset(df=dd.from_pandas(df, npartitions=1))
3656

3757
c.sql(
@@ -40,7 +60,8 @@ def test_cluster_memory(client, c, df):
4060
new_table
4161
WITH (
4262
location = 'df',
43-
format = 'memory'
63+
format = 'memory',
64+
gpu = {gpu}
4465
)
4566
"""
4667
)
@@ -51,11 +72,15 @@ def test_cluster_memory(client, c, df):
5172
"""
5273
).compute()
5374

75+
if gpu:
76+
return_df = return_df.to_pandas()
77+
5478
assert_frame_equal(df, return_df)
5579

5680

5781
@skip_if_external_scheduler
58-
def test_create_from_csv_persist(c, df, temporary_data_file):
82+
@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
83+
def test_create_from_csv_persist(c, df, temporary_data_file, gpu):
5984
df.to_csv(temporary_data_file, index=False)
6085

6186
c.sql(
@@ -65,7 +90,8 @@ def test_create_from_csv_persist(c, df, temporary_data_file):
6590
WITH (
6691
location = '{temporary_data_file}',
6792
format = 'csv',
68-
persist = True
93+
persist = True,
94+
gpu = {gpu}
6995
)
7096
"""
7197
)
@@ -76,6 +102,9 @@ def test_create_from_csv_persist(c, df, temporary_data_file):
76102
"""
77103
).compute()
78104

105+
if gpu:
106+
return_df = return_df.to_pandas()
107+
79108
assert_frame_equal(df, return_df)
80109

81110

@@ -143,15 +172,29 @@ def test_create_from_query(c, df):
143172

144173

145174
@skip_if_external_scheduler
146-
def test_view_table_persist(c, temporary_data_file, df):
175+
@pytest.mark.parametrize(
176+
"gpu",
177+
[
178+
False,
179+
pytest.param(
180+
True,
181+
marks=(
182+
pytest.mark.gpu,
183+
pytest.mark.xfail(reason="to_pandas() changes int precision"),
184+
),
185+
),
186+
],
187+
)
188+
def test_view_table_persist(c, temporary_data_file, df, gpu):
147189
df.to_csv(temporary_data_file, index=False)
148190
c.sql(
149191
f"""
150192
CREATE TABLE
151193
new_table
152194
WITH (
153195
location = '{temporary_data_file}',
154-
format = 'csv'
196+
format = 'csv',
197+
gpu = {gpu}
155198
)
156199
"""
157200
)
@@ -177,21 +220,27 @@ def test_view_table_persist(c, temporary_data_file, df):
177220
"""
178221
)
179222

180-
assert_frame_equal(
181-
c.sql("SELECT c FROM count_view").compute(), pd.DataFrame({"c": [700]})
182-
)
183-
assert_frame_equal(
184-
c.sql("SELECT c FROM count_table").compute(), pd.DataFrame({"c": [700]})
185-
)
223+
from_view = c.sql("SELECT c FROM count_view").compute()
224+
from_table = c.sql("SELECT c FROM count_table").compute()
225+
226+
if gpu:
227+
from_view = from_view.to_pandas()
228+
from_table = from_table.to_pandas()
229+
230+
assert_frame_equal(from_view, pd.DataFrame({"c": [700]}))
231+
assert_frame_equal(from_table, pd.DataFrame({"c": [700]}))
186232

187233
df.iloc[:10].to_csv(temporary_data_file, index=False)
188234

189-
assert_frame_equal(
190-
c.sql("SELECT c FROM count_view").compute(), pd.DataFrame({"c": [10]})
191-
)
192-
assert_frame_equal(
193-
c.sql("SELECT c FROM count_table").compute(), pd.DataFrame({"c": [700]})
194-
)
235+
from_view = c.sql("SELECT c FROM count_view").compute()
236+
from_table = c.sql("SELECT c FROM count_table").compute()
237+
238+
if gpu:
239+
from_view = from_view.to_pandas()
240+
from_table = from_table.to_pandas()
241+
242+
assert_frame_equal(from_view, pd.DataFrame({"c": [10]}))
243+
assert_frame_equal(from_table, pd.DataFrame({"c": [700]}))
195244

196245

197246
def test_replace_and_error(c, temporary_data_file, df):

0 commit comments

Comments
 (0)