Skip to content

Commit

Permalink
Start and stop Spark Connect server, use for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
EnricoMi committed Aug 1, 2024
1 parent 152bbff commit c7b10ad
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 12 deletions.
60 changes: 48 additions & 12 deletions .github/actions/test-python/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ inputs:
spark-compat-version:
description: Spark compatibility version, e.g. 3.4
required: true
hadoop-version:
description: Hadoop version, e.g. 2.7 or 2
required: true
scala-compat-version:
description: Scala compatibility version, e.g. 2.12
required: true
Expand All @@ -40,6 +43,26 @@ runs:
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }}
path: .

- name: Cache Spark Binaries
uses: actions/cache@v4
if: inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT')
with:
path: ~/spark
key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}

- name: Setup Spark Binaries
if: inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT')
env:
SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}${{ inputs.scala-compat-version == '2.13' && '-scala2.13' || '' }}.tgz
run: |
if [[ ! -e ~/spark ]]
then
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}"
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark"
fi
echo "SPARK_BIN_HOME=$(cd ~/spark; pwd)" >> $GITHUB_ENV
shell: bash

- name: Cache Maven packages
if: github.event_name != 'merge_group'
uses: actions/cache@v4
Expand Down Expand Up @@ -105,29 +128,42 @@ runs:
run: mvn --batch-mode --update-snapshots install -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true -Dgpg.skip
shell: bash

- name: Python Integration Tests
- name: Start Spark Connect
id: spark-connect
if: (inputs.spark-compat-version == '3.4' || inputs.spark-compat-version == '3.5' || startsWith('4.', inputs.spark-compat-version)) && inputs.scala-compat-version == '2.12' && ! contains(inputs.spark-version, '-SNAPSHOT')
run: |
$SPARK_BIN_HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_${{ inputs.scala-compat-version }}:${{ inputs.spark-version }}
shell: bash

- name: Python Unit Tests (Spark Connect)
if: steps.spark-connect.outcome == 'success'
env:
PYTHONPATH: python:python/test
TEST_SPARK_CONNECT_SERVER: sc://localhost:15002
run: |
find python/test -name 'test*.py' > tests
while read test
do
if ! $SPARK_HOME/bin/spark-submit --master "local[2]" --packages uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:$SPARK_EXTENSION_VERSION "$test" test-results-submit
then
state="fail"
fi
done < tests
if [[ "$state" == "fail" ]]; then exit 1; fi
pip install pyspark[connect]
python -m pytest python/test --junit-xml test-results-connect/pytest-$(date +%s.%N)-$RANDOM.xml
shell: bash

- name: Stop Spark Connect
if: always() && steps.spark-connect.outcome == 'success'
run: |
$SPARK_BIN_HOME/sbin/stop-connect-server.sh
echo "::group::Spark Connect server log"
# thoughs started in $SPARK_BIN_HOME/sbin, logs go to $SPARK_HOME/logs
ls -lah $SPARK_HOME/logs || true
cat $SPARK_HOME/logs/spark-*-org.apache.spark.sql.connect.service.SparkConnectServer-*.out || true
echo "::endgroup::"
shell: bash

- name: Python Integration Tests (Spark Connect)
- name: Python Integration Tests
env:
PYTHONPATH: python:python/test
run: |
find python/test -name 'test*.py' > tests
while read test
do
if ! $SPARK_HOME/bin/spark-submit --master "local[2]" --packages uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:$SPARK_EXTENSION_VERSION "$test" test-results-connect
if ! $SPARK_HOME/bin/spark-submit --master "local[2]" --packages uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:$SPARK_EXTENSION_VERSION "$test" test-results-submit
then
state="fail"
fi
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,34 @@ jobs:
include:
- spark-compat-version: '3.0'
spark-version: '3.0.3'
hadoop-version: '2.7'
scala-compat-version: '2.12'
scala-version: '2.12.10'
python-version: '3.8'
- spark-compat-version: '3.1'
spark-version: '3.1.3'
hadoop-version: '2.7'
scala-compat-version: '2.12'
scala-version: '2.12.10'
python-version: '3.8'
- spark-compat-version: '3.2'
spark-version: '3.2.4'
hadoop-version: '2.7'
scala-compat-version: '2.12'
scala-version: '2.12.15'
- spark-compat-version: '3.3'
spark-version: '3.3.4'
hadoop-version: '3'
scala-compat-version: '2.12'
scala-version: '2.12.15'
- spark-compat-version: '3.4'
spark-version: '3.4.2'
hadoop-version: '3'
scala-compat-version: '2.12'
scala-version: '2.12.17'
- spark-compat-version: '3.5'
spark-version: '3.5.1'
hadoop-version: '3'
scala-compat-version: '2.12'
scala-version: '2.12.18'

Expand Down

0 comments on commit c7b10ad

Please sign in to comment.