-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
192 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#if 0 | ||
|
||
void | ||
hist_rvv_no_conflict(uint16_t hist[100], float *x, float *y, size_t n) | ||
{ | ||
for (size_t vl; n > 0; n -= vl, x += vl, y += vl) { | ||
vl = __riscv_vsetvl_e32m8(n); | ||
vfloat32m8_t vx = __riscv_vle32_v_f32m8(x, vl); | ||
vfloat32m8_t vy = __riscv_vle32_v_f32m8(y, vl); | ||
vfloat32m8_t vsq = __riscv_vfmacc(__riscv_vfmul(vx, vx, vl), vy, vy, vl); | ||
vfloat32m8_t v = __riscv_vfsqrt(vsq, vl); | ||
vuint16m4_t vidx = __riscv_vminu(__riscv_vfncvt_rtz_xu(v, vl), 100, vl); | ||
vidx = __riscv_vadd(vidx, vidx, vl); | ||
vuint16m4_t vcnt =__riscv_vluxei16(hist, vidx, vl); | ||
vcnt = __riscv_vadd(vcnt, 1, vl); | ||
__riscv_vsuxei16(hist, vidx, vcnt, vl); | ||
} | ||
} | ||
|
||
void | ||
hist_rvv_slidedown(uint16_t hist[100], float *x, float *y, size_t n) | ||
{ | ||
for (size_t vl; n > 0; n -= vl, x += vl, y += vl) { | ||
vl = __riscv_vsetvl_e32m8(n); | ||
vfloat32m8_t vx = __riscv_vle32_v_f32m8(x, vl); | ||
vfloat32m8_t vy = __riscv_vle32_v_f32m8(y, vl); | ||
vfloat32m8_t vsq = __riscv_vfmacc(__riscv_vfmul(vx, vx, vl), vy, vy, vl); | ||
vfloat32m8_t v = __riscv_vfsqrt(vsq, vl); | ||
vuint16m4_t vidx = __riscv_vminu(__riscv_vfncvt_rtz_xu(v, vl), 100, vl); | ||
|
||
for (size_t i = 0; i < vl; ++i) { | ||
size_t idx = __riscv_vmv_x(__riscv_vslidedown(vidx, i, 1)); | ||
++hist[idx]; | ||
} | ||
} | ||
} | ||
#endif | ||
|
||
#ifdef MX | ||
|
||
.global MX(LUT4_rvv_vloxei8_) | ||
MX(LUT4_rvv_vloxei8_): | ||
1: | ||
vsetvli a3, a2, e8, MX(), ta, ma | ||
vle8.v v8, (a1) | ||
vand.vi v8, v8, 15 | ||
vloxei8.v v8, (a0), v8 | ||
vse8.v v8, (a1) | ||
sub a2, a2, a3 | ||
add a1, a1, a3 | ||
bnez a2, 1b | ||
ret | ||
|
||
/* assumes no conflicts, which causes the wrong result */ | ||
.global MX(hist_rvv_no_conflict_) | ||
MX(hist_rvv_no_conflict_): | ||
li a4, 100 | ||
1: | ||
vsetvli a5, a3, e32, MX(), ta, ma | ||
vle32.v v8, (a1) | ||
vle32.v v16, (a2) | ||
vfmul.vv v8, v8, v8 | ||
vfmacc.vv v8, v16, v16 | ||
vfsqrt.v v8, v8 | ||
vsetvli zero, zero, e16, MXf2(), ta, ma | ||
vfncvt.rtz.xu.f.w v16, v8 | ||
vminu.vx v8, v16, a4 | ||
vadd.vv v8, v8, v8 | ||
vluxei16.v v12, (a0), v8 | ||
vadd.vi v12, v12, 1 | ||
vsuxei16.v v12, (a0), v8 | ||
sub a3, a3, a5 | ||
slli a5, a5, 2 | ||
add a1, a1, a5 | ||
add a2, a2, a5 | ||
bnez a3, 1b | ||
ret | ||
|
||
.global MX(hist_rvv_slidedown_) | ||
MX(hist_rvv_slidedown_): | ||
li a6, 100 | ||
j 2f | ||
1: | ||
sub a3, a3, a7 | ||
slli a5, a7, 2 | ||
add a1, a1, a5 | ||
add a2, a2, a5 | ||
beqz a3, 4f | ||
2: | ||
vsetvli a7, a3, e32, MX(), ta, ma | ||
beqz a7, 1b | ||
vle32.v v8, (a1) | ||
vle32.v v16, (a2) | ||
li a4, 0 | ||
vfmul.vv v8, v8, v8 | ||
vfmacc.vv v8, v16, v16 | ||
vfsqrt.v v8, v8 | ||
vsetvli zero, zero, e16, MXf2(), ta, ma | ||
vfncvt.rtz.xu.f.w v16, v8 | ||
vminu.vx v8, v16, a6 | ||
vadd.vv v8, v8, v8 | ||
vsetivli zero, 1, e16, MXf2(), ta, ma | ||
3: | ||
vslidedown.vx v12, v8, a4 | ||
vmv.x.s a5, v12 | ||
add t0, a0, a5 | ||
lh a5, 0(t0) | ||
addi a5, a5, 1 | ||
addi a4, a4, 1 | ||
sh a5, 0(t0) | ||
bne a7, a4, 3b | ||
j 1b | ||
4: | ||
ret | ||
|
||
#endif | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#include "bench.h" | ||
|
||
#if __STDC_HOSTED__ | ||
#include <math.h> | ||
#endif | ||
|
||
void | ||
hist_scalar(uint16_t hist[100], float *x, float *y, size_t n) | ||
{ | ||
for (size_t i = 0; i < n; ++i) { | ||
float dist = x[i]*x[i] + y[i]*y[i]; | ||
#if __STDC_HOSTED__ | ||
dist = sqrtf(dist); | ||
#else | ||
__asm volatile("fsqrt.s %0, %0\n" : "+f"(dist)); | ||
#endif | ||
size_t idx = dist; | ||
idx = idx > 100 ? 100 : dist; | ||
++hist[idx]; | ||
|
||
} | ||
} | ||
|
||
#define IMPLS(f) \ | ||
f(scalar) \ | ||
MX(f, rvv_no_conflict) \ | ||
MX(f, rvv_slidedown) \ | ||
|
||
typedef void Func(uint16_t hist[100], float *x, float *y, size_t n); | ||
|
||
#define DECLARE(f) extern Func hist_##f; | ||
IMPLS(DECLARE) | ||
|
||
#define EXTRACT(f) { #f, &hist_##f }, | ||
Impl impls[] = { IMPLS(EXTRACT) }; | ||
|
||
static uint16_t hist[100]; | ||
float *inx, *iny; | ||
|
||
void init(void) { | ||
inx = (float*)mem; | ||
iny = (float*)(mem + MAX_MEM/2); | ||
} | ||
|
||
ux checksum(size_t n) { | ||
ux sum = 0; | ||
for (size_t i = 0; i < 100; ++i) | ||
sum = hist[i]; | ||
return sum <= n; // sanity check for no_conflict | ||
} | ||
|
||
BENCH_BEG(base) { | ||
n /= sizeof(float); | ||
memset(hist, 0, sizeof hist); | ||
float max = 70.71; // approx. sqrtf(100*100/2); | ||
for (size_t i = 0; i < n; ++i) { | ||
inx[i] = bench_urandf() * 2 * max - max; | ||
iny[i] = bench_urandf() * 2 * max - max; | ||
} | ||
TIME f(hist, inx, iny, n); | ||
} BENCH_END | ||
|
||
Bench benches[] = { | ||
BENCH( impls, MAX_MEM/2, "hist", bench_base) | ||
}; BENCH_MAIN(benches) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters