Skip to content

Commit b1a84f8

Browse files
authored
CI cache improvements (#276)
Moves populating caches into `Prime Caches` workflow. Run this on `master` to prepare caches. Any branch will then use those caches. Caching Spark binaries brings most performance benefits.
1 parent 4e80696 commit b1a84f8

File tree

10 files changed

+172
-108
lines changed

10 files changed

+172
-108
lines changed

.github/actions/build-whl/action.yml

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@ runs:
5353
java-version: ${{ inputs.java-compat-version }}
5454
distribution: 'zulu'
5555

56-
- name: Restore Pip packages cache
57-
if: github.event_name != 'schedule'
58-
uses: actions/cache/restore@v4
59-
with:
60-
path: ~/.cache/pip
61-
key: ${{ runner.os }}-pip-whl-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}
62-
restore-keys:
63-
${{ runner.os }}-pip-whl-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}
64-
${{ runner.os }}-pip-whl-${{ inputs.python-version }}-
65-
6656
- name: Setup Python
6757
uses: actions/setup-python@v5
6858
with:
@@ -86,13 +76,6 @@ runs:
8676
python test-release.py
8777
shell: bash
8878

89-
- name: Save Pip packages cache
90-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master'
91-
uses: actions/cache/save@v4
92-
with:
93-
path: ~/.cache/pip
94-
key: ${{ runner.os }}-pip-whl-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}-${{ github.run_id }}
95-
9679
- name: Upload whl
9780
uses: actions/upload-artifact@v4
9881
with:

.github/actions/build/action.yml

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -56,42 +56,6 @@ runs:
5656
mvn --batch-mode install -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true -Dgpg.skip
5757
shell: bash
5858

59-
- name: Save Maven packages cache
60-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master'
61-
uses: actions/cache/save@v4
62-
with:
63-
path: ~/.m2/repository
64-
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}-${{ github.run_id }}
65-
66-
# merely populate the cache for test-jvm and test-python actions
67-
- name: Restore Spark Binaries cache
68-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master' && ! contains(inputs.spark-version, '-SNAPSHOT')
69-
uses: actions/cache/restore@v4
70-
with:
71-
path: ~/spark
72-
key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}
73-
restore-keys:
74-
${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}
75-
76-
- name: Setup Spark Binaries
77-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master' && ! contains(inputs.spark-version, '-SNAPSHOT')
78-
env:
79-
SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}${{ inputs.scala-compat-version == '2.13' && '-scala2.13' || '' }}.tgz
80-
run: |
81-
if [[ ! -e ~/spark ]]
82-
then
83-
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}"
84-
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark"
85-
fi
86-
shell: bash
87-
88-
- name: Save Spark Binaries cache
89-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master' && ! contains(inputs.spark-version, '-SNAPSHOT')
90-
uses: actions/cache/save@v4
91-
with:
92-
path: ~/spark
93-
key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}-${{ github.run_id }}
94-
9559
- name: Upload Binaries
9660
uses: actions/upload-artifact@v4
9761
with:

.github/actions/check-compat/action.yml

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ runs:
3939
uses: actions/cache/restore@v4
4040
with:
4141
path: ~/.m2/repository
42-
key: ${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
42+
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
4343
restore-keys:
44-
${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
45-
${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}-
44+
${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
45+
${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-
4646

4747
- name: Setup JDK 1.8
4848
uses: actions/setup-java@v4
@@ -77,13 +77,6 @@ runs:
7777
japi-compliance-checker ~/.m2/repository/uk/co/gresearch/spark/spark-extension_${{ inputs.scala-compat-version }}/${{ inputs.package-version }}-${{ inputs.spark-compat-version }}/spark-extension_${{ inputs.scala-compat-version }}-${{ inputs.package-version }}-${{ inputs.spark-compat-version }}.jar target/spark-extension*.jar
7878
shell: bash
7979

80-
- name: Save Maven packages cache
81-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master'
82-
uses: actions/cache/save@v4
83-
with:
84-
path: ~/.m2/repository
85-
key: ${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}-${{ github.run_id }}
86-
8780
- name: Upload Report
8881
uses: actions/upload-artifact@v4
8982
if: always() && steps.exists.outcome == 'success'
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
name: 'Prime caches'
2+
author: 'EnricoMi'
3+
description: 'A GitHub Action that primes caches'
4+
5+
inputs:
6+
spark-version:
7+
description: Spark version, e.g. 3.4.0 or 3.4.0-SNAPSHOT
8+
required: true
9+
scala-version:
10+
description: Scala version, e.g. 2.12.15
11+
required: true
12+
spark-compat-version:
13+
description: Spark compatibility version, e.g. 3.4
14+
required: true
15+
scala-compat-version:
16+
description: Scala compatibility version, e.g. 2.12
17+
required: true
18+
java-compat-version:
19+
description: Java compatibility version, e.g. 8
20+
required: true
21+
hadoop-version:
22+
description: Hadoop version, e.g. 2.7 or 2
23+
required: true
24+
25+
runs:
26+
using: 'composite'
27+
steps:
28+
- name: Set versions in pom.xml
29+
run: |
30+
./set-version.sh ${{ inputs.spark-version }} ${{ inputs.scala-version }}
31+
git diff
32+
shell: bash
33+
34+
- name: Setup JDK ${{ inputs.java-compat-version }}
35+
uses: actions/setup-java@v4
36+
with:
37+
java-version: ${{ inputs.java-compat-version }}
38+
distribution: 'zulu'
39+
40+
- name: Build
41+
env:
42+
JDK_JAVA_OPTIONS: --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-exports java.base/sun.util.calendar=ALL-UNNAMED
43+
run: |
44+
mvn --batch-mode --update-snapshots -Dspotless.check.skip clean compile test-compile
45+
mvn --batch-mode package -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true
46+
mvn --batch-mode install -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true -Dgpg.skip
47+
shell: bash
48+
49+
- name: Save Maven packages cache
50+
uses: actions/cache/save@v4
51+
with:
52+
path: ~/.m2/repository
53+
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}-${{ github.run_id }}
54+
55+
- name: Setup Spark Binaries
56+
if: ( ! contains(inputs.spark-version, '-SNAPSHOT') )
57+
env:
58+
SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}${{ inputs.scala-compat-version == '2.13' && '-scala2.13' || '' }}.tgz
59+
run: |
60+
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}"
61+
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark"
62+
shell: bash
63+
64+
- name: Save Spark Binaries cache
65+
if: ( ! contains(inputs.spark-version, '-SNAPSHOT') )
66+
uses: actions/cache/save@v4
67+
with:
68+
path: ~/spark
69+
key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}-${{ github.run_id }}
70+
71+
branding:
72+
icon: 'check-circle'
73+
color: 'green'

.github/actions/test-python/action.yml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -81,18 +81,6 @@ runs:
8181
java-version: '8'
8282
distribution: 'zulu'
8383

84-
- name: Restore Pip packages cache
85-
if: github.event_name != 'schedule'
86-
uses: actions/cache/restore@v4
87-
with:
88-
path: ~/.cache/pip
89-
key: ${{ runner.os }}-pip-test-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}
90-
restore-keys:
91-
${{ runner.os }}-pip-test-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}
92-
${{ runner.os }}-pip-whl-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}
93-
${{ runner.os }}-pip-test-${{ inputs.python-version }}-
94-
${{ runner.os }}-pip-whl-${{ inputs.python-version }}-
95-
9684
- name: Setup Python
9785
uses: actions/setup-python@v5
9886
with:
@@ -206,13 +194,6 @@ runs:
206194
$SPARK_HOME/bin/spark-shell --packages uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:$SPARK_EXTENSION_VERSION < test-release.scala
207195
shell: bash
208196

209-
- name: Save Pip packages cache
210-
if: github.event_name != 'schedule' && github.ref == 'refs/heads/master'
211-
uses: actions/cache/save@v4
212-
with:
213-
path: ~/.cache/pip
214-
key: ${{ runner.os }}-pip-test-${{ inputs.python-version }}-${{ hashFiles(format('python/requirements-{0}_{1}.txt', inputs.spark-compat-version, inputs.scala-compat-version)) }}-${{ github.run_id }}}
215-
216197
- name: Upload Test Results
217198
if: always()
218199
uses: actions/upload-artifact@v4

.github/workflows/check.yml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@ jobs:
1414
with:
1515
fetch-depth: 0
1616

17-
- name: Cache Maven packages
18-
if: github.event_name != 'merge_group'
19-
uses: actions/cache@v4
20-
with:
21-
path: ~/.m2/repository
22-
key: ${{ runner.os }}-mvn-lint-${{ hashFiles('pom.xml') }}
23-
2417
- name: Setup JDK ${{ inputs.java-compat-version }}
2518
uses: actions/setup-java@v4
2619
with:

.github/workflows/prime-caches.yml

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
name: Prime caches
2+
3+
on:
4+
workflow_call:
5+
6+
jobs:
7+
test:
8+
name: Spark ${{ matrix.spark-compat-version }}.${{ matrix.spark-patch-version }} Scala ${{ matrix.scala-version }}
9+
runs-on: ubuntu-latest
10+
11+
strategy:
12+
fail-fast: false
13+
# keep in-sync with .github/workflows/test-jvm.yml
14+
matrix:
15+
scala-compat-version: ['2.12', '2.13']
16+
spark-compat-version: ['3.2', '3.3', '3.4', '3.5']
17+
spark-patch-version: ['0', '1', '2', '3', '4']
18+
19+
include:
20+
- spark-compat-version: '3.0'
21+
scala-compat-version: '2.12'
22+
scala-version: '2.12.10'
23+
spark-patch-version: '3'
24+
hadoop-version: '2.7'
25+
- spark-compat-version: '3.1'
26+
scala-compat-version: '2.12'
27+
scala-version: '2.12.10'
28+
spark-patch-version: '3'
29+
hadoop-version: '2.7'
30+
- spark-compat-version: '3.2'
31+
scala-compat-version: '2.12'
32+
scala-version: '2.12.15'
33+
hadoop-version: '2.7'
34+
- spark-compat-version: '3.3'
35+
scala-compat-version: '2.12'
36+
scala-version: '2.12.15'
37+
hadoop-version: '2'
38+
- spark-compat-version: '3.3'
39+
scala-compat-version: '2.12'
40+
scala-version: '2.12.15'
41+
spark-patch-version: '3'
42+
hadoop-version: '3'
43+
- spark-compat-version: '3.3'
44+
scala-compat-version: '2.12'
45+
scala-version: '2.12.15'
46+
spark-patch-version: '4'
47+
hadoop-version: '3'
48+
- spark-compat-version: '3.4'
49+
scala-compat-version: '2.12'
50+
scala-version: '2.12.17'
51+
hadoop-version: '3'
52+
- spark-compat-version: '3.5'
53+
scala-compat-version: '2.12'
54+
scala-version: '2.12.18'
55+
hadoop-version: '3'
56+
57+
- spark-compat-version: '3.2'
58+
scala-compat-version: '2.13'
59+
scala-version: '2.13.5'
60+
hadoop-version: '3.2'
61+
- spark-compat-version: '3.3'
62+
scala-compat-version: '2.13'
63+
scala-version: '2.13.8'
64+
hadoop-version: '3'
65+
- spark-compat-version: '3.4'
66+
scala-compat-version: '2.13'
67+
scala-version: '2.13.8'
68+
hadoop-version: '3'
69+
- spark-compat-version: '3.5'
70+
scala-compat-version: '2.13'
71+
scala-version: '2.13.8'
72+
hadoop-version: '3'
73+
74+
exclude:
75+
- spark-compat-version: '3.5'
76+
spark-patch-version: '4'
77+
78+
steps:
79+
- name: Checkout
80+
uses: actions/checkout@v4
81+
82+
- name: Test
83+
uses: ./.github/actions/prime-caches
84+
with:
85+
spark-version: ${{ matrix.spark-compat-version }}.${{ matrix.spark-patch-version }}
86+
scala-version: ${{ matrix.scala-version }}
87+
spark-compat-version: ${{ matrix.spark-compat-version }}
88+
scala-compat-version: ${{ matrix.scala-compat-version }}
89+
hadoop-version: ${{ matrix.hadoop-version }}
90+
java-compat-version: '8'

.github/workflows/publish-release.yml

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,21 +125,14 @@ jobs:
125125
with:
126126
python-version: ${{ env.PYTHON_VERSION }}
127127

128-
- name: Cache Maven packages
128+
- name: Restore Maven packages cache
129129
id: cache-maven
130-
uses: actions/cache@v4
130+
uses: actions/cache/restore@v4
131131
with:
132132
path: ~/.m2/repository
133133
key: ${{ runner.os }}-mvn-build-${{ matrix.params.spark-version }}-${{ matrix.params.scala-version }}-${{ hashFiles('pom.xml') }}
134134
restore-keys: ${{ runner.os }}-mvn-build-${{ matrix.params.spark-version }}-${{ matrix.params.scala-version }}-
135135

136-
- name: Cache Pip packages
137-
id: cache-pip
138-
uses: actions/cache@v4
139-
with:
140-
path: ~/.cache/pip
141-
key: ${{ runner.os }}-pip-whl-${{ env.PYTHON_VERSION }}-${{ matrix.params.spark-version }}
142-
143136
- name: Publish maven artifacts
144137
id: publish-maven
145138
run: |
@@ -268,4 +261,4 @@ jobs:
268261
echo "Pushing release commit to origin"
269262
git push origin "master"
270263
# NOTE: This push will not trigger a CI as we are using GITHUB_TOKEN to push
271-
# More info on: https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow
264+
# More info on: https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow

.github/workflows/publish-snapshot.yml

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,14 @@ jobs:
5757
with:
5858
python-version: ${{ env.PYTHON_VERSION }}
5959

60-
- name: Cache Maven packages
60+
- name: Restore Maven packages cache
6161
id: cache-maven
62-
uses: actions/cache@v4
62+
uses: actions/cache/restore@v4
6363
with:
6464
path: ~/.m2/repository
6565
key: ${{ runner.os }}-mvn-build-${{ matrix.params.spark-version }}-${{ matrix.params.scala-version }}-${{ hashFiles('pom.xml') }}
6666
restore-keys: ${{ runner.os }}-mvn-build-${{ matrix.params.spark-version }}-${{ matrix.params.scala-version }}-
6767

68-
- name: Cache Pip packages
69-
id: cache-pip
70-
uses: actions/cache@v4
71-
with:
72-
path: ~/.cache/pip
73-
key: ${{ runner.os }}-pip-whl-${{ env.PYTHON_VERSION }}-${{ matrix.params.spark-version }}
74-
7568
- name: Check if this is a SNAPSHOT version
7669
id: check-snapshot
7770
run: |

.github/workflows/test-jvm.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ jobs:
1010

1111
strategy:
1212
fail-fast: false
13+
# keep in-sync with .github/workflows/prime-caches.yml
1314
matrix:
1415
scala-compat-version: ['2.12', '2.13']
1516
spark-compat-version: ['3.2', '3.3', '3.4', '3.5']

0 commit comments

Comments
 (0)