Skip to content

Commit

Permalink
Use more shards, use MurmurHash3 for more even distribution amongst s…
Browse files Browse the repository at this point in the history
…hards and groups
  • Loading branch information
scottsand-db committed Sep 24, 2024
1 parent 437db30 commit 854842b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/spark_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ jobs:
# These Scala versions must match those in the build.sbt
scala: [2.12.18, 2.13.13]
# Important: This list of shards must be [0..NUM_SHARDS - 1]
shard: [0, 1, 2]
shard: [0, 1, 2, 3]
env:
SCALA_VERSION: ${{ matrix.scala }}
# Important: This must be the same as the length of shards in matrix
NUM_SHARDS: 3
NUM_SHARDS: 4
steps:
- uses: actions/checkout@v3
- uses: technote-space/get-diff-action@v4
Expand Down
8 changes: 6 additions & 2 deletions project/TestParallelization.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import scala.util.Random
import scala.util.hashing.MurmurHash3

import sbt.Keys._
import sbt._

Expand Down Expand Up @@ -146,13 +149,14 @@ object TestParallelization {
}

val testIsAssignedToShard =
math.abs(testDefinition.name.hashCode % numShards.get) == shardId.get
math.abs(MurmurHash3.stringHash(testDefinition.name) % numShards.get) == shardId.get

if(!testIsAssignedToShard) {
return new SimpleHashStrategy(groups, shardId)
}
}

val groupIdx = math.abs(testDefinition.name.hashCode % groupCount)
val groupIdx = math.abs(MurmurHash3.stringHash(testDefinition.name) % groupCount)
val currentGroup = groups(groupIdx)
val updatedGroup = currentGroup.withTests(
currentGroup.tests :+ testDefinition
Expand Down

0 comments on commit 854842b

Please sign in to comment.