Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test asm slice reads/writes in race tests #286

Merged
merged 3 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
build:
strategy:
matrix:
go-version: [1.20.x, 1.21.x, 1.22.x]
go-version: [1.21.x, 1.22.x, 1.23.x]
os: [ubuntu-latest, macos-latest, windows-latest]
env:
CGO_ENABLED: 0
Expand Down
40 changes: 36 additions & 4 deletions _gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma
if pshufb {
w.WriteString(`

func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := stop-start
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) {
n = stop - start
if raceEnabled {
defer func() {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}()
}

`)

Expand All @@ -197,8 +203,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}

func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
n := (stop-start)
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) {
n = stop - start
if raceEnabled {
defer func() {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}()
}

`)

Expand All @@ -223,6 +235,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (64 - 1))

if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}

`)

w.WriteString(`switch len(in) {
Expand All @@ -242,6 +259,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (64 - 1))

if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}

`)

w.WriteString(`switch len(in) {
Expand All @@ -264,6 +286,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int
func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (32 - 1))

if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}

`)

w.WriteString(`switch len(in) {
Expand All @@ -283,6 +310,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int
func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (32 - 1))

if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}

`)

w.WriteString(`switch len(in) {
Expand Down
117 changes: 102 additions & 15 deletions galois_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) {
}
if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
if len(in) > 32 {
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
Expand All @@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {

if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
if len(in) >= 32 {
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
Expand All @@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
if len(in) >= bigSwitchover {
if o.useAVX2 {
avx2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
avx2XorSlice_64(in, out)
in = in[done:]
out = out[done:]
} else {
sSE2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
sSE2XorSlice_64(in, out)
in = in[done:]
out = out[done:]
}
}
if len(in) >= 16 {
sSE2XorSlice(in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
sSE2XorSlice(in, out)
in = in[done:]
out = out[done:]
}
Expand Down Expand Up @@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
fftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
fftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
Expand All @@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) {
}

if o.useAVX2 {
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
Expand All @@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
}

if o.useAVX2 {
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
Expand All @@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
func mulAdd8(x, y []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
y = y[done:]
x = x[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3Xor(t[:16], t[16:32], y, x)
done := (len(y) >> 4) << 4
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
galMulSSSE3Xor(t[:16], t[16:32], y, x)
y = y[done:]
x = x[done:]
}
Expand All @@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}

ifftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}

ifftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
Expand All @@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
mulgf16_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
mulgf16_ssse3(x, y, tmp)
} else {
refMul(x, y, log_m)
Expand All @@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
func mulgf8(out, in []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2_64(t[:16], t[16:32], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}

galMulAVX2_64(t[:16], t[16:32], in, out)
in = in[done:]
out = out[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3(t[:16], t[16:32], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3(t[:16], t[16:32], in, out)
in = in[done:]
out = out[done:]
}
Expand Down
13 changes: 10 additions & 3 deletions galois_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ func galMulSlice(c byte, in, out []byte, o *options) {
return
}
var done int
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)

remain := len(in) - done
if remain > 0 {
Expand All @@ -50,9 +54,12 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
sliceXor(in, out, o)
return
}
var done int
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5

remain := len(in) - done
if remain > 0 {
Expand Down
Loading
Loading