Cache Spark download in CI to speed up builds #200
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PyDeequ V2 Tests | |
| on: | |
| push: | |
| branches: | |
| - "**" | |
| pull_request: | |
| branches: | |
| - "master" | |
| jobs: | |
| # V2 tests with Spark Connect (Python 3.12) | |
| v2-tests: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| - uses: actions/setup-python@v5 | |
| name: Install Python 3.12 | |
| with: | |
| python-version: "3.12" | |
| - uses: actions/setup-java@v4 | |
| name: Setup Java 17 | |
| with: | |
| distribution: "corretto" | |
| java-version: "17" | |
| - name: Cache Spark and Deequ JAR | |
| id: cache-spark | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| spark-3.5.0-bin-hadoop3 | |
| deequ_2.12-2.1.0b-spark-3.5.jar | |
| key: spark-3.5.0-deequ-2.1.0b | |
| - name: Download Spark 3.5 | |
| if: steps.cache-spark.outputs.cache-hit != 'true' | |
| run: | | |
| curl -L -o spark-3.5.0-bin-hadoop3.tgz \ | |
| https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz | |
| tar -xzf spark-3.5.0-bin-hadoop3.tgz | |
| rm spark-3.5.0-bin-hadoop3.tgz | |
| - name: Download Deequ JAR | |
| if: steps.cache-spark.outputs.cache-hit != 'true' | |
| run: | | |
| curl -L -o deequ_2.12-2.1.0b-spark-3.5.jar \ | |
| https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/deequ_2.12-2.1.0b-spark-3.5.jar | |
| - name: Set SPARK_HOME | |
| run: echo "SPARK_HOME=$PWD/spark-3.5.0-bin-hadoop3" >> $GITHUB_ENV | |
| - name: Install Python dependencies | |
| run: | | |
| uv pip install -e ".[dev]" --system | |
| uv pip install "pyspark[connect]==3.5.0" --system | |
| - name: Run V2 unit tests | |
| run: | | |
| pytest tests/v2/test_unit.py -v | |
| - name: Start Spark Connect Server | |
| run: | | |
| $SPARK_HOME/sbin/start-connect-server.sh \ | |
| --packages org.apache.spark:spark-connect_2.12:3.5.0 \ | |
| --jars ${{ github.workspace }}/deequ_2.12-2.1.0b-spark-3.5.jar \ | |
| --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin | |
| sleep 20 | |
| ps aux | grep SparkConnectServer | grep -v grep | |
| - name: Run V2 integration tests | |
| env: | |
| SPARK_REMOTE: "sc://localhost:15002" | |
| run: | | |
| pytest tests/v2/ -v --ignore=tests/v2/test_unit.py | |
| - name: Stop Spark Connect Server | |
| if: always() | |
| run: | | |
| $SPARK_HOME/sbin/stop-connect-server.sh || true |