Skip to content

Commit

Permalink
Fix fp issue, lightning fast now
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Jul 19, 2024
1 parent b665667 commit eedd937
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 43 deletions.
14 changes: 6 additions & 8 deletions deepsocflow/c/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,6 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
int32_t ph_end, ph_beg_const, ixh_beg, xh_sweep;
int32_t pw_end, pw_beg_const, ixw_beg, xw_sweep;

#ifdef SIM
char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
sprintf(f_path_raw, "%s/%0d_%0d_%0d_y_raw_sim.txt", DATA_DIR, ib, ip, it);
sprintf(f_path_sum, "%s/%0d_y_sum_sim.txt", DATA_DIR, ib);
FILE *fp_raw = fopen(f_path_raw, "a");
FILE *fp_sum = fopen(f_path_sum, "a");
#endif

static int8_t ocm_bank = 1;
int32_t w_last, sram_addr;

Expand Down Expand Up @@ -314,6 +306,12 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
// if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
if (!get_config(4*(A_DONE_WRITE + ocm_bank)))
return;

char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
sprintf(f_path_raw, "%s/%0d_%0d_%0d_y_raw_sim.txt", DATA_DIR, ib, ip, it);
sprintf(f_path_sum, "%s/%0d_y_sum_sim.txt", DATA_DIR, ib);
FILE *fp_raw = fopen(f_path_raw, "a");
FILE *fp_sum = fopen(f_path_sum, "a");
#else
//start_wait_output((UINTPTR)*p_base_addr_next, *p_bpt_next);
// in FPGA, wait for write done
Expand Down
1 change: 1 addition & 0 deletions deepsocflow/rtl/ext/xilinx_spwf.v
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Single-Port Block RAM Write-First Mode (recommended template)
// File: rams_sp_wf.v
`timescale 1ns/1ps
module rams_sp_wf (clk, we, en, addr, di, dout);
parameter WIDTH = 16;
parameter DEPTH = 1024;
Expand Down
3 changes: 2 additions & 1 deletion run/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

# Simulator: xsim on windows, verilator otherwise
#(SIM, SIM_PATH) = ('xsim', "/opt/Xilinx/Vivado/2022.2/bin/")
(SIM, SIM_PATH) = ('verilator', "")
# (SIM, SIM_PATH) = ('verilator', "")
(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '')
def product_dict(**kwargs):
for instance in itertools.product(*(kwargs.values())):
yield dict(zip(kwargs.keys(), instance))
Expand Down
2 changes: 1 addition & 1 deletion run/work/config_fw.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Bundle_t bundles [N_BUNDLES] = {
{.n=1 , .l=1 , .kw=5 , .coe=4 , .coe_tl=4 , .r_ll=5 , .h=5 , .w=6 , .ci=8 , .co=8 , .w_kw2=4 , .t=2 , .p=2 , .cm=4 , .cm_p0=4 , .xp_words=84 , .ib_out=4 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=184 , .x_bpt_p0=184 , .o_words=672 , .o_bytes=368 , .x_pad=6 , .in_buffer_idx=0 , .out_buffer_idx=1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=0 , .is_bias=1 , .is_flatten=0 , .is_softmax=0 , .b_offset=32 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=1 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=5 , .ow=6 , .oc=8 , .x_header= 81946u, .x_header_p0= 81946u, .w_header= 652835110938u, .w_header_p0= 81946u , .debug_nhwc_words=240 },
{.n=1 , .l=1 , .kw=3 , .coe=8 , .coe_tl=8 , .r_ll=5 , .h=5 , .w=6 , .ci=8 , .co=24 , .w_kw2=5 , .t=3 , .p=2 , .cm=6 , .cm_p0=2 , .xp_words=84 , .ib_out=5 , .w_bpt=232 , .w_bpt_p0=88 , .x_bpt=268 , .x_bpt_p0=100 , .o_words=1152 , .o_bytes=608 , .x_pad=6 , .in_buffer_idx=1 , .out_buffer_idx=0 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=0 , .b_offset=40 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=0 , .ca_shift=12 , .ca_pl_scale=0 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=5 , .ow=6 , .oc=24 , .x_header= 81961u, .x_header_p0= 81929u, .w_header= 584115634217u, .w_header_p0= 81929u , .debug_nhwc_words=720 },
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=5 , .h=5 , .w=6 , .ci=24 , .co=10 , .w_kw2=6 , .t=1 , .p=2 , .cm=20 , .cm_p0=4 , .xp_words=48 , .ib_out=6 , .w_bpt=256 , .w_bpt_p0=64 , .x_bpt=496 , .x_bpt_p0=112 , .o_words=2400 , .o_bytes=1440 , .x_pad=0 , .in_buffer_idx=0 , .out_buffer_idx=1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=1 , .is_softmax=0 , .b_offset=64 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=300 , .x_header= 82072u, .x_header_p0= 81944u, .w_header= 652835111064u, .w_header_p0= 81944u , .debug_nhwc_words=300 },
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=1 , .h=1 , .w=1 , .ci=300 , .co=10 , .w_kw2=1 , .t=1 , .p=15 , .cm=20 , .cm_p0=20 , .xp_words=8 , .ib_out=-1 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=96 , .x_bpt_p0=96 , .o_words=10 , .o_bytes=40 , .x_pad=0 , .in_buffer_idx=1 , .out_buffer_idx=-1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=1 , .b_offset=88 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=3 , .softmax_max_f=0.875 , .csh=1 , .ch=1 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=1 , .psh_shift=0 , .csw=1 , .cw=1 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=1 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=10 , .x_header= 152u, .x_header_p0= 152u, .w_header= 652835029144u, .w_header_p0= 152u , .debug_nhwc_words=10 }
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=1 , .h=1 , .w=1 , .ci=300 , .co=10 , .w_kw2=1 , .t=1 , .p=15 , .cm=20 , .cm_p0=20 , .xp_words=8 , .ib_out=-1 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=96 , .x_bpt_p0=96 , .o_words=10 , .o_bytes=40 , .x_pad=0 , .in_buffer_idx=1 , .out_buffer_idx=-1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=1 , .b_offset=88 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=3 , .softmax_max_f=0.75 , .csh=1 , .ch=1 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=1 , .psh_shift=0 , .csw=1 , .cw=1 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=1 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=10 , .x_header= 152u, .x_header_p0= 152u, .w_header= 652835029144u, .w_header_p0= 152u , .debug_nhwc_words=10 }
};

#define X_BITS_L2 2
Expand Down
60 changes: 30 additions & 30 deletions run/work/sources.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/axis_tb.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/axi_sys_tb.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/rtl_sim_top.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/ext/demofull.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/ext/axi_addr.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/test/sv/ext/skidbuffer.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/rtl_oc_top.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/dnn_engine.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axi_dma_rd.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axis_register.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axi_dma_wr.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axilite_ram.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axis_pipeline_register.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axilite_rd.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axilite_wr.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/xilinx_spwf.v
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/axis_out_shift.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/n_delay.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ram.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/proc_engine.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/cyclic_bram.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/counter.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/axis_weight_rotator.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/axis_pixels.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/dma_controller.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/xilinx_sdp.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axis_adapter.sv
/home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl/ext/alex_axis_adapter_any.sv
/home/dominus/axi-tb-sys/ndsf-final/run/work/config_hw.svh
/home/dominus/axi-tb-sys/ndsf-final/run/work/config_tb.svh
D:\cgra4ml\deepsocflow\test\sv\axis_tb.sv
D:\cgra4ml\deepsocflow\test\sv\axi_sys_tb.sv
D:\cgra4ml\deepsocflow\test\sv\rtl_sim_top.sv
D:\cgra4ml\deepsocflow\test\sv\ext\axi_addr.v
D:\cgra4ml\deepsocflow\test\sv\ext\demofull.v
D:\cgra4ml\deepsocflow\test\sv\ext\skidbuffer.v
D:\cgra4ml\deepsocflow\rtl\dnn_engine.v
D:\cgra4ml\deepsocflow\rtl\rtl_oc_top.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axilite_ram.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axilite_rd.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axilite_wr.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axis_pipeline_register.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axis_register.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axi_dma_rd.v
D:\cgra4ml\deepsocflow\rtl\ext\alex_axi_dma_wr.v
D:\cgra4ml\deepsocflow\rtl\ext\xilinx_spwf.v
D:\cgra4ml\deepsocflow\rtl\axis_out_shift.sv
D:\cgra4ml\deepsocflow\rtl\axis_pixels.sv
D:\cgra4ml\deepsocflow\rtl\axis_weight_rotator.sv
D:\cgra4ml\deepsocflow\rtl\counter.sv
D:\cgra4ml\deepsocflow\rtl\cyclic_bram.sv
D:\cgra4ml\deepsocflow\rtl\dma_controller.sv
D:\cgra4ml\deepsocflow\rtl\n_delay.sv
D:\cgra4ml\deepsocflow\rtl\proc_engine.sv
D:\cgra4ml\deepsocflow\rtl\ram.sv
D:\cgra4ml\deepsocflow\rtl\ext\alex_axis_adapter.sv
D:\cgra4ml\deepsocflow\rtl\ext\alex_axis_adapter_any.sv
D:\cgra4ml\deepsocflow\rtl\ext\xilinx_sdp.sv
D:\cgra4ml\run\work\config_hw.svh
D:\cgra4ml\run\work\config_tb.svh
6 changes: 3 additions & 3 deletions run/work/vivado_flow.tcl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

set PROJECT_NAME dsf_zcu104
set RTL_DIR /home/dominus/axi-tb-sys/ndsf-final/deepsocflow/rtl
set RTL_DIR D:/cgra4ml/deepsocflow/rtl
set CONFIG_DIR .

source config_hw.tcl
source /home/dominus/axi-tb-sys/ndsf-final/deepsocflow/tcl/fpga/zcu104.tcl
source /home/dominus/axi-tb-sys/ndsf-final/deepsocflow/tcl/fpga/vivado.tcl
source D:/cgra4ml/deepsocflow/tcl/fpga/zcu104.tcl
source D:/cgra4ml/deepsocflow/tcl/fpga/vivado.tcl

0 comments on commit eedd937

Please sign in to comment.