Skip to content

Commit

Permalink
Prepare for C tiling
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Oct 27, 2023
1 parent 5d46e76 commit 820ee27
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 23 deletions.
28 changes: 15 additions & 13 deletions c/model.h
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
#define N_BUNDLES 7
Bundle_t bundles [N_BUNDLES] = {
{.n=8, .l=3, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=1256, .x_bpt_p0=1256, .is_bias=1, .conv2dense=0, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414349857415757824, .x_header_p0=414349857415757824, .w_header=414596233919725568, .w_header_p0=414349857415757824 },
{.n=8, .l=3, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=19976, .x_bpt_p0=19976, .is_bias=0, .conv2dense=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700973171777470464, .x_header_p0=8700973171777470464, .w_header=8701219591231111168, .w_header_p0=8700973171777470464 },
{.n=8, .l=3, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=2504, .x_bpt_p0=2504, .is_bias=1, .conv2dense=0, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846695421643325440, .x_header_p0=846695421643325440, .w_header=846941823917096960, .w_header_p0=846695421643325440 },
{.n=8, .l=3, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=5000, .x_bpt_p0=5000, .is_bias=0, .conv2dense=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927559332212244480, .x_header_p0=1927559332212244480, .w_header=1927805786025623552, .w_header_p0=1927559332212244480 },
{.n=8, .l=3, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=18, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=7496, .x_bpt_p0=5000, .is_bias=1, .conv2dense=0, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008423242781163520, .x_header_p0=1855501738174316544, .w_header=3008669679414673408, .w_header_p0=1855501738174316544 },
{.n=8, .l=3, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=24968, .x_bpt_p0=5000, .is_bias=0, .conv2dense=1, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006816180991164416, .x_header_p0=1783444144136388608, .w_header=11007062634804543488, .w_header_p0=1783444144136388608 },
{.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=7200, .co=10, .w_kw2=1, .t=1, .p=360, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .conv2dense=0, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
{.n=8, .l=3, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=2504, .x_bpt_p0=2504, .o_bytes=39936, .is_bias=1, .conv2dense=0, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=8, .ca_pl_scale=0, .x_header=414349857415757824, .x_header_p0=414349857415757824, .w_header=414596233919725568, .w_header_p0=414349857415757824 },
{.n=8, .l=3, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=39944, .x_bpt_p0=39944, .o_bytes=39936, .is_bias=0, .conv2dense=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=3, .ca_pl_scale=0, .x_header=8700973171777470464, .x_header_p0=8700973171777470464, .w_header=8701219591231111168, .w_header_p0=8700973171777470464 },
{.n=8, .l=3, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=5000, .x_bpt_p0=5000, .o_bytes=39936, .is_bias=1, .conv2dense=0, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=8, .ca_pl_scale=0, .x_header=846695421643325440, .x_header_p0=846695421643325440, .w_header=846941823917096960, .w_header_p0=846695421643325440 },
{.n=8, .l=3, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=9992, .x_bpt_p0=9992, .o_bytes=39936, .is_bias=0, .conv2dense=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=1927559332212244480, .x_header_p0=1927559332212244480, .w_header=1927805786025623552, .w_header_p0=1927559332212244480 },
{.n=8, .l=3, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=18, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=14984, .x_bpt_p0=9992, .o_bytes=59904, .is_bias=1, .conv2dense=0, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=8, .ca_pl_scale=0, .x_header=3008423242781163520, .x_header_p0=1855501738174316544, .w_header=3008669679414673408, .w_header_p0=1855501738174316544 },
{.n=8, .l=3, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=49928, .x_bpt_p0=9992, .o_bytes=93600, .is_bias=0, .conv2dense=1, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=11006816180991164416, .x_header_p0=1783444144136388608, .w_header=11007062634804543488, .w_header_p0=1783444144136388608 },
{.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=7200, .co=10, .w_kw2=1, .t=1, .p=360, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=268, .x_bpt_p0=268, .o_bytes=80, .is_bias=1, .conv2dense=0, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=11, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
};

#define X_BITS_L2 2
#define W_BITS_L2 3
#define X_BITS_L2 3
#define W_BITS_L2 2
#define X_PAD 5
#define KH_MAX 11
#define PE_ROWS 8
#define PE_COLS 24

#define WB_BYTES 212388
#define W_BYTES 212224
#define X_BYTES 3768
#define X_BYTES_ALL 163416
#define WB_BYTES 108132
#define W_BYTES 107968
#define X_BYTES 7512
#define O_WORDS 80
#define O_BYTES_MAX 93600
#define X_BYTES_ALL 323784
#define Y_BYTES 442376
#define B_TYPE signed short
#define B_WORDS 82
Expand Down
12 changes: 10 additions & 2 deletions c/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

typedef struct {
const int n, l, kw, coe, coe_tl, r_ll, h, w, ci, co, w_kw2, t, p, cm, cm_p0;
const int w_bpt, w_bpt_p0, x_bpt, x_bpt_p0; // bytes per transfer
const int w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_bytes; // bytes per transfer
const char is_bias, conv2dense;
const int b_offset, b_val_shift, b_bias_shift;
const signed char ca_nzero, ca_shift, ca_pl_scale;
Expand All @@ -22,7 +22,8 @@ typedef struct {
char w [W_BYTES ];
B_TYPE b [B_WORDS ]; // keep next to w. weights are loaded to w_ptr
char x [X_BYTES_ALL ];
int y [Y_BYTES/4 ];
char nx [O_BYTES_MAX ];
int y [O_WORDS ];
int p_sum [Y_BYTES/4 ];
} Memory_st;
Memory_st mem;
Expand Down Expand Up @@ -134,6 +135,13 @@ extern EXT_C void load_y (unsigned char *p_done, unsigned char *pt_done_proc, c
++ip; if (ip >= p_bundle->p) { ip = 0; //after_each(ib) = after_all(ip):

printf("done bundle!! iw:%d in:%d il:%d it:%d ip:%d ib:%d\n", iw, in, il, it, ip, ib);

char f_path_tiled [1000];
sprintf(f_path_tiled, "%s/%0d_y_tiled_sim.txt", DATA_DIR, ib);
FILE *fp_tiled = fopen(f_path_tiled, "w");
for (int i=0; i<p_bundle->o_bytes; i++)
fprintf(fp_tiled,"%d\n", ib == N_BUNDLES-1 ? mem.y[i] : mem.nx[i]);
fclose(fp_tiled);

++ib; if (ib >= N_BUNDLES) { ib = 0; // after_all(ib):
*p_done = 1;
Expand Down
4 changes: 2 additions & 2 deletions fpga/scripts/vivado_config.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
set RAM_WEIGHTS_DEPTH 20
set ROWS 8
set COLS 24
set X_BITS 4
set K_BITS 8
set X_BITS 8
set K_BITS 4
set Y_BITS 24
set DELAY_W_RAM 2
set RAM_EDGES_DEPTH 288
Expand Down
4 changes: 2 additions & 2 deletions rtl/include/params_input.svh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

`define ROWS 8 // PE rows, constrained by resources
`define COLS 24 // PE cols, constrained by resources
`define X_BITS 4 // Bits per word in input
`define K_BITS 8 // Bits per word in input
`define X_BITS 8 // Bits per word in input
`define K_BITS 4 // Bits per word in input
`define Y_BITS 24 // Bits per word in output of conv

`define KH_MAX 11 // max of kernel height, across layers
Expand Down
20 changes: 16 additions & 4 deletions test/py/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ class Config:


@pytest.mark.parametrize("COMPILE", list(product_dict(
X_BITS = [4 ],
K_BITS = [8 ],
X_BITS = [8 ],
K_BITS = [4 ],
B_BITS = [16 ],
Y_BITS = [24 ],
INT_BITS = [32 ], # size of integer in target CPU
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_dnn_engine(COMPILE):
'''
Write Runtime Headers
'''
x_bytes_all, x_bytes, w_bytes, b_words, x_bytes_max, y_bytes_max = 0, 0, 0, 0, 0, 0
x_bytes_all, x_bytes, w_bytes, b_words, x_bytes_max, y_bytes_max, o_bytes_max = 0, 0, 0, 0, 0, 0, 0
with open ('../c/model.h', 'w') as ch:

ch.write(f"#define N_BUNDLES {len(bundles)}\n")
Expand All @@ -261,13 +261,23 @@ def test_dnn_engine(COMPILE):
w_bpt_p0 = (c.K_BITS*b.we[0][0].size + c.IN_BITS )//8
x_bpt = (c.X_BITS*b.xe[-1].size + c.IN_BITS )//8
x_bpt_p0 = (c.X_BITS*b.xe[0].size + c.IN_BITS )//8

if ib == len(bundles)-1:
o_bytes_b = b.o_int.size # int or float
o_words = o_bytes_b
else:
b_next = bundles[ib+1]
o_bpt = b_next.xe[-1].size #(c.X_BITS*b_next.xe[-1].size + c.IN_BITS )//8
o_bpt_p0 = b_next.xe[0].size #(c.X_BITS*b_next.xe[0].size + c.IN_BITS )//8
o_bytes_b = o_bpt_p0 + (b_next.r.CP-1)*o_bpt

w_bytes_b = (w_bpt_p0 + (b.r.CP-1)*w_bpt)*b.r.IT
x_bytes_b = (x_bpt_p0 + (b.r.CP-1)*x_bpt)
y_bytes_b = (32*b.ye_exp.size + c.IN_BITS)//8

x_bytes_max = max(x_bytes_max, x_bytes_b)
y_bytes_max = max(y_bytes_max, y_bytes_b)
o_bytes_max = max(o_bytes_max, o_bytes_b)
w_bytes += w_bytes_b
x_bytes_all += x_bytes_b

Expand All @@ -280,7 +290,7 @@ def test_dnn_engine(COMPILE):

ca_nzero, ca_shift, ca_pl_scale = b.core['act']['non_zero'], b.core['act']['shift_bits'], b.core['act']['plog_slope']

ch.write(f" {{.n={b.r.XN}, .l={b.r.L}, .kw={b.r.KW}, .coe={y_coe}, .coe_tl={y_coe_tl}, .r_ll={y_r_ll}, .h={b.r.XH}, .w={b.r.XW}, .ci={b.r.CI}, .co={b.r.CO}, .w_kw2={b.r.XW-b.r.KW//2}, .t={b.r.IT}, .p={b.r.CP}, .cm={b.r.CM}, .cm_p0={b.r.CM_0}, .w_bpt={w_bpt}, .w_bpt_p0={w_bpt_p0}, .x_bpt={x_bpt}, .x_bpt_p0={x_bpt_p0}, .is_bias={1*(b.b is not None)}, .conv2dense={1*b.flatten}, .b_offset={b_words}, .b_val_shift={b.bias_val_shift}, .b_bias_shift={b.bias_b_shift}, .ca_nzero={ca_nzero}, .ca_shift={ca_shift}, .ca_pl_scale={ca_pl_scale}, .x_header={b.r.x_header_be_p[-1][0]}, .x_header_p0={b.r.x_header_be_p[0][0]}, .w_header={b.r.w_header_be_p[-1][0]}, .w_header_p0={b.r.x_header_be_p[0][0]} }}")
ch.write(f" {{.n={b.r.XN}, .l={b.r.L}, .kw={b.r.KW}, .coe={y_coe}, .coe_tl={y_coe_tl}, .r_ll={y_r_ll}, .h={b.r.XH}, .w={b.r.XW}, .ci={b.r.CI}, .co={b.r.CO}, .w_kw2={b.r.XW-b.r.KW//2}, .t={b.r.IT}, .p={b.r.CP}, .cm={b.r.CM}, .cm_p0={b.r.CM_0}, .w_bpt={w_bpt}, .w_bpt_p0={w_bpt_p0}, .x_bpt={x_bpt}, .x_bpt_p0={x_bpt_p0}, .o_bytes={o_bytes_b}, .is_bias={1*(b.b is not None)}, .conv2dense={1*b.flatten}, .b_offset={b_words}, .b_val_shift={b.bias_val_shift}, .b_bias_shift={b.bias_b_shift}, .ca_nzero={ca_nzero}, .ca_shift={ca_shift}, .ca_pl_scale={ca_pl_scale}, .x_header={b.r.x_header_be_p[-1][0]}, .x_header_p0={b.r.x_header_be_p[0][0]}, .w_header={b.r.w_header_be_p[-1][0]}, .w_header_p0={b.r.x_header_be_p[0][0]} }}")

b_words += b.be.size if b.b else 0
if b.idx != len(bundles)-1:
Expand All @@ -297,6 +307,8 @@ def test_dnn_engine(COMPILE):
ch.write(f"#define WB_BYTES {w_bytes + (b_words*c.B_BITS)//8}\n")
ch.write(f"#define W_BYTES {w_bytes}\n")
ch.write(f"#define X_BYTES {x_bytes}\n")
ch.write(f"#define O_WORDS {o_words}\n")
ch.write(f"#define O_BYTES_MAX {o_bytes_max}\n")
ch.write(f"#define X_BYTES_ALL {x_bytes_all}\n")
ch.write(f"#define Y_BYTES {y_bytes_max}\n")
ch.write(f"#define B_TYPE {type_d['c'][c.B_BITS]}\n")
Expand Down

0 comments on commit 820ee27

Please sign in to comment.