diff --git a/.github/actions/test-python/action.yml b/.github/actions/test-python/action.yml index 745b2ff8..b5a7dfc5 100644 --- a/.github/actions/test-python/action.yml +++ b/.github/actions/test-python/action.yml @@ -15,6 +15,9 @@ inputs: spark-compat-version: description: Spark compatibility version, e.g. 3.4 required: true + hadoop-version: + description: Hadoop version, e.g. 2.7 or 2 + required: true scala-compat-version: description: Scala compatibility version, e.g. 2.12 required: true @@ -40,6 +43,26 @@ runs: name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }} path: . + - name: Cache Spark Binaries + uses: actions/cache@v4 + if: inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT') + with: + path: ~/spark + key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }} + + - name: Setup Spark Binaries + if: inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT') + env: + SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}${{ inputs.scala-compat-version == '2.13' && '-scala2.13' || '' }}.tgz + run: | + if [[ ! -e ~/spark ]] + then + wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}" + archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark" + fi + echo "SPARK_BIN_HOME=$(cd ~/spark; pwd)" >> $GITHUB_ENV + shell: bash + - name: Cache Maven packages if: github.event_name != 'merge_group' uses: actions/cache@v4 @@ -120,10 +143,20 @@ runs: if [[ "$state" == "fail" ]]; then exit 1; fi shell: bash + - name: Start Spark Connect + id: spark-connect + if: inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT') + run: | + $SPARK_BIN_HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_${{ inputs.scala-compat-version }}:${{ inputs.spark-version }} + shell: bash + - name: Python Integration Tests (Spark Connect) + if: steps.spark-connect.outcome == 'success' env: PYTHONPATH: python:python/test + TEST_SPARK_CONNECT_SERVER: sc://localhost:15002 run: | + pip install pyspark[connect] find python/test -name 'test*.py' > tests while read test do @@ -135,6 +168,16 @@ runs: if [[ "$state" == "fail" ]]; then exit 1; fi shell: bash + - name: Stop Spark Connect + if: always() && steps.spark-connect.outcome == 'success' + run: | + $SPARK_BIN_HOME/sbin/stop-connect-server.sh + echo "::group::Spark Connect server log" + ls -lah $SPARK_BIN_HOME/logs || true + cat $SPARK_BIN_HOME/logs/spark-*-org.apache.spark.sql.connect.service.SparkConnectServer-*.out || true + echo "::endgroup::" + shell: bash + - name: Python Release Test run: | $SPARK_HOME/bin/spark-submit --packages uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:$SPARK_EXTENSION_VERSION test-release.py diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 444e80f5..c807cb69 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -19,28 +19,34 @@ jobs: include: - spark-compat-version: '3.0' spark-version: '3.0.3' + hadoop-version: '2.7' scala-compat-version: '2.12' scala-version: '2.12.10' python-version: '3.8' - spark-compat-version: '3.1' spark-version: '3.1.3' + hadoop-version: '2.7' scala-compat-version: '2.12' scala-version: '2.12.10' python-version: '3.8' - spark-compat-version: '3.2' spark-version: '3.2.4' + hadoop-version: '2.7' scala-compat-version: '2.12' scala-version: '2.12.15' - spark-compat-version: '3.3' spark-version: '3.3.4' + hadoop-version: '3' scala-compat-version: '2.12' scala-version: '2.12.15' - spark-compat-version: '3.4' spark-version: '3.4.2' + hadoop-version: '3' scala-compat-version: '2.12' scala-version: '2.12.17' - spark-compat-version: '3.5' spark-version: '3.5.1' + hadoop-version: '3' scala-compat-version: '2.12' scala-version: '2.12.18'