|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 4 | +# or more contributor license agreements. See the NOTICE file |
| 5 | +# distributed with this work for additional information |
| 6 | +# regarding copyright ownership. The ASF licenses this file |
| 7 | +# to you under the Apache License, Version 2.0 (the |
| 8 | +# "License"); you may not use this file except in compliance |
| 9 | +# with the License. You may obtain a copy of the License at |
| 10 | +# |
| 11 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +# |
| 13 | +# Unless required by applicable law or agreed to in writing, |
| 14 | +# software distributed under the License is distributed on an |
| 15 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 16 | +# KIND, either express or implied. See the License for the |
| 17 | +# specific language governing permissions and limitations |
| 18 | +# under the License. |
| 19 | +# |
| 20 | +# Usage: ./run.sh |
| 21 | +# Builds necessary JARs, generates data and queries, and runs fuzz tests for Comet Spark. |
| 22 | +# Environment variables: |
| 23 | +# SPARK_HOME - path to Spark installation |
| 24 | +# SPARK_MASTER - Spark master URL (default: local[*]) |
| 25 | +# SCALA_MAJOR_VERSION - Scala major version to use (default: 2.12) |
| 26 | +# SPARK_MAJOR_VERSION - Spark major version to use (default: 3.5) |
| 27 | +# NUM_FILES - number of data files to generate (default: 2) |
| 28 | +# NUM_ROWS - number of rows per file (default: 200) |
| 29 | +# NUM_QUERIES - number of queries to generate (default: 500) |
| 30 | + |
| 31 | +set -eux |
| 32 | + |
| 33 | +DIR="$(cd "$(dirname "$0")" && pwd)" |
| 34 | +PARENT_DIR="${DIR}/.." |
| 35 | +MVN_CMD="${PARENT_DIR}/mvnw" |
| 36 | +SPARK_MASTER="${SPARK_MASTER:-local[*]}" |
| 37 | +SCALA_MAJOR_VERSION="${SCALA_MAJOR_VERSION:-2.12}" |
| 38 | +SPARK_MAJOR_VERSION="${SPARK_MAJOR_VERSION:-3.5}" |
| 39 | +PROFILES="-Pscala-${SCALA_MAJOR_VERSION},spark-${SPARK_MAJOR_VERSION}" |
| 40 | +PROJECT_VERSION=$("${MVN_CMD}" -f "${DIR}/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout) |
| 41 | +COMET_SPARK_JAR="${PARENT_DIR}/spark/target/comet-spark${SPARK_MAJOR_VERSION}_${SCALA_MAJOR_VERSION}-${PROJECT_VERSION}.jar" |
| 42 | +COMET_FUZZ_JAR="${DIR}/target/comet-fuzz-spark${SPARK_MAJOR_VERSION}_${SCALA_MAJOR_VERSION}-${PROJECT_VERSION}-jar-with-dependencies.jar" |
| 43 | +NUM_FILES="${NUM_FILES:-2}" |
| 44 | +NUM_ROWS="${NUM_ROWS:-200}" |
| 45 | +NUM_QUERIES="${NUM_QUERIES:-500}" |
| 46 | + |
| 47 | +if [ ! -f "${COMET_SPARK_JAR}" ]; then |
| 48 | + echo "Building Comet Spark jar..." |
| 49 | + pushd "${PARENT_DIR}" |
| 50 | + PROFILES="${PROFILES}" make |
| 51 | + popd |
| 52 | +else |
| 53 | + echo "Building Fuzz testing jar..." |
| 54 | + "${MVN_CMD}" -f "${DIR}/pom.xml" package -DskipTests "${PROFILES}" |
| 55 | +fi |
| 56 | + |
| 57 | +echo "Generating data..." |
| 58 | +"${SPARK_HOME}/bin/spark-submit" \ |
| 59 | + --master "${SPARK_MASTER}" \ |
| 60 | + --class org.apache.comet.fuzz.Main \ |
| 61 | + "${COMET_FUZZ_JAR}" \ |
| 62 | + data --num-files="${NUM_FILES}" --num-rows="${NUM_ROWS}" \ |
| 63 | + --exclude-negative-zero \ |
| 64 | + --generate-arrays --generate-structs --generate-maps |
| 65 | + |
| 66 | +echo "Generating queries..." |
| 67 | +"${SPARK_HOME}/bin/spark-submit" \ |
| 68 | + --master "${SPARK_MASTER}" \ |
| 69 | + --class org.apache.comet.fuzz.Main \ |
| 70 | + "${COMET_FUZZ_JAR}" \ |
| 71 | + queries --num-files="${NUM_FILES}" --num-queries="${NUM_QUERIES}" |
| 72 | + |
| 73 | +echo "Running fuzz tests..." |
| 74 | +"${SPARK_HOME}/bin/spark-submit" \ |
| 75 | + --master "${SPARK_MASTER}" \ |
| 76 | + --conf spark.memory.offHeap.enabled=true \ |
| 77 | + --conf spark.memory.offHeap.size=16G \ |
| 78 | + --conf spark.plugins=org.apache.spark.CometPlugin \ |
| 79 | + --conf spark.comet.enabled=true \ |
| 80 | + --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \ |
| 81 | + --conf spark.comet.exec.shuffle.enabled=true \ |
| 82 | + --jars "${COMET_SPARK_JAR}" \ |
| 83 | + --conf spark.driver.extraClassPath="${COMET_SPARK_JAR}" \ |
| 84 | + --conf spark.executor.extraClassPath="${COMET_SPARK_JAR}" \ |
| 85 | + --class org.apache.comet.fuzz.Main \ |
| 86 | + "${COMET_FUZZ_JAR}" \ |
| 87 | + run --num-files="${NUM_FILES}" --filename="queries.sql" |
0 commit comments