Skip to content

Commit

Permalink
cleanup, added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
awengz committed Jul 18, 2024
1 parent 2579c40 commit e05c7ac
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 49 deletions.
35 changes: 8 additions & 27 deletions deepsocflow/rtl/axis_weight_rotator.sv
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ module axis_weight_rotator #(
logic [COLS-1:0] bram_m_valid, bram_reg_m_valid;
logic [COLS-1:0] sb_valid, sb_ready;
logic [COLS-1:0][WORD_WIDTH-1:0] sb_data;
logic [COLS-1:0][BITS_SB_CNTR-1:0] fill_skid_buffer_cntr; // count cycles for skid buffer to get filled
logic [COLS-1:0][BITS_SB_CNTR-1:0] fill_skid_buffer_cntr;
logic [COLS-1:0] en_count_config, l_config, l_kw, l_cin, l_cols, l_blocks, l_xn, f_kw, f_cin, f_cols, lc_config, lc_kw, lc_cin, lc_cols, lc_blocks, lc_xn;
logic [COLS-1:0] last_config;
typedef struct packed {
Expand Down Expand Up @@ -151,6 +151,11 @@ module axis_weight_rotator #(
endcase
end


// FILL_SKID_BUFFER_CNTR
// This counter counts cycles for skid buffer to get filled.
// The read state machine stays in IDLE state with RAM rden=1 for 2*DELAY_W_RAM cycles so that
// the skid buffer is completely filled with data when it enters the read state.
always_ff @(posedge aclk `OR_NEGEDGE(aresetn))
for(int col=0; col<COLS; col = col+1) begin
if (!aresetn || state_read[col]==R_SWITCH_S) fill_skid_buffer_cntr[col]<= 0;
Expand Down Expand Up @@ -249,7 +254,7 @@ module axis_weight_rotator #(
end

case (state_read[j])
R_PASS_CONFIG_S, R_READ_S : bram_m_ready [i][j] = m_axis_tready[j]; // TODO check in sim if working correctly
R_PASS_CONFIG_S, R_READ_S : bram_m_ready [i][j] = m_axis_tready[j];
R_SWITCH_S : done_read_next [i][j] = 1;
endcase
end
Expand Down Expand Up @@ -460,39 +465,15 @@ module axis_sync #(
input logic pixels_m_valid,
input tuser_st [COLS-1:0] weights_m_user,
input logic [COLS-1:0] pixels_m_valid_pipe,
//input logic [1:0] weights_rd_state,
output logic [COLS-1:0] m_axis_tvalid, weights_m_ready,
output logic pixels_m_ready
);

//logic pixels_m_valid_pipe_0; // verilator compile

//assign pixels_m_valid_pipe[0] = (m_axis_tready[0]) ? pixels_m_valid: 1'b0;
//assign pixels_m_valid_pipe[0] = (m_axis_tready[0] && (weights_rd_state[1:0]==2'b10)) ? pixels_m_valid: 1'b0;
//assign pixels_m_valid_pipe[0] = pixels_m_ready ? pixels_m_valid: 1'b0;

generate //TODO: pixels_m_valid should be pipelined?
generate
for (genvar i=0; i<COLS; i++) begin
//always_ff@(posedge aclk) begin
// if (i>0) begin
//if (m_axis_tready[i]) pixels_m_valid_pipe[i] <= pixels_m_valid_pipe[i-1]; // is if() condition necessary?
//pixels_m_valid_pipe[i] <= (|m_axis_tready[i:0]) ? pixels_m_valid_pipe[i-1] : 1'b0;
// pixels_m_valid_pipe[i] <= pixels_m_valid_pipe[i-1];
//weights_m_ready[i] <= weights_m_ready[i-1];
//m_axis_tvalid[i] <= m_axis_tvalid[i-1];
// end
//else if (m_axis_tready[i]) pixels_m_valid_pipe[i] <= pixels_m_valid_pipe_0;
//else if (m_axis_tready[i]) pixels_m_valid_pipe[i] <= pixels_m_valid_pipe_0;
//else begin
// if (m_axis_tready[1]) pixels_m_valid_pipe[i] <= pixels_m_valid_pipe_0;
//end

//end
assign m_axis_tvalid[i] = weights_m_valid[i];// && (pixels_m_valid_pipe[i] || weights_m_user[i].is_config);
assign weights_m_ready[i] = m_axis_tready[i] && (pixels_m_valid_pipe[i] || weights_m_user[i].is_config);
end
endgenerate
//assign m_axis_tvalid[0] = weights_m_valid[0] && (pixels_m_valid_pipe[0] || weights_m_user[0].is_config);
//assign weights_m_ready[0] = m_axis_tready[0] && (pixels_m_valid_pipe[0] || weights_m_user[0].is_config);
assign pixels_m_ready = m_axis_tready[0] && weights_m_valid[0] && !weights_m_user[0].is_config;
endmodule
28 changes: 11 additions & 17 deletions deepsocflow/rtl/proc_engine.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ module proc_engine #(
output logic [W_BPT-1:0] m_bytes_per_transfer
);

logic [COLS-1:1] pixels_m_valid_pipe_reg; // fix verilator compile
logic [COLS-1:1] pixels_m_valid_pipe_reg; // fix verilator compile - does not allow variable to be both continuous and procedurally assigned.
logic [COLS-1:0] en;
logic force_en, force_en_reset;
logic [COLS-1:0] acc_m_valid_next, acc_m_valid;
Expand Down Expand Up @@ -91,16 +91,10 @@ module proc_engine #(
assign s_valid_cols_sel[c_1] = acc_m_user[c_1].is_w_last ? lut_valid_last[acc_m_user[c_1].kw2][c_1] : lut_valid[acc_m_user[c_1].kw2][c_1];
assign s_last_cols_sel[c_1] = acc_m_user[c_1].is_w_last ? lut_last_pkt [acc_m_user[c_1].kw2][c_1] : lut_last [acc_m_user[c_1].kw2][c_1];
end
//assign valid_mask = !acc_m_user[0].is_w_first_kw2 && !acc_m_user[0].is_config;
//assign s_valid_cols_sel = acc_m_user[COLS-1].is_w_last ? lut_valid_last[acc_m_user[COLS-1].kw2] : lut_valid[acc_m_user[COLS-1].kw2];
//assign s_last_cols_sel = acc_m_user[COLS-1].is_w_last ? lut_last_pkt [acc_m_user[COLS-1].kw2] : lut_last [acc_m_user[COLS-1].kw2];

//logic [$clog2(COLS+1)-1:0] counter;
enum {IDLE, SHIFT} state;


assign s_ready = clken_mul;

// assign pixels_m_valid_pipe[0] = (s_ready[0]) ? pixels_m_valid: 1'b0;
// pixel_valid_pipe[i] indicates whether column i has a valid pixel or not.
assign pixels_m_valid_pipe[0] = pixels_m_valid;

generate
Expand All @@ -113,6 +107,7 @@ generate
end
end
//assign weights_m_ready[i] = s_ready[i] && (pixels_m_valid_pipe[i] || s_user[i].is_config);
// s_valid is valid from weights_rotator. it is ANDed with pixels_valid to get the combined valid signal to send to the MAC.
assign s_axis_tvalid[i] = s_valid[i] && (pixels_m_valid_pipe[i] || s_user[i].is_config);
if (i>0) assign pixels_m_valid_pipe[i] = pixels_m_valid_pipe_reg[i];
end
Expand Down Expand Up @@ -191,7 +186,9 @@ endgenerate

n_delay #(.N(DELAY_MUL-1), .W(M_BITS)) MUL_PIPE (.c(clk), .rng(resetn), .rnl(1'b1), .e(clken_mul[c]), .i(mul_comb), .o (mul_m_data[c][r]));

// changed shift_data to FF instead of wire, so that it has previous cycle data. This is because the column i will get sel_shift one cycle after column i-1.
// changed shift_data to FF instead of wire, so that it has previous cycle data.
// shift_data[i] is loaded with acc_m_data[i-1] when acc_m_valid[i-1] is high.
// This is because the column i will get sel_shift one cycle after column i-1.
if(c == 0) begin
always_ff @ (posedge clk `OR_NEGEDGE(resetn)) begin
if(!resetn) shift_data [c][r] <= '0;
Expand Down Expand Up @@ -259,7 +256,7 @@ endgenerate
end
end

// en_outshift enables the output shifter. The first condition is to shift data out,
// en_outshift enables the output shifter register. The first condition is to shift data out,
// and the second condition is for the accumulator to write to the output shifter.
assign en_outshift[co] = (m_ready & outshift_flag[co]) | ~sel_outshift[co];

Expand Down Expand Up @@ -301,16 +298,12 @@ endgenerate
shift_last_pkt[co] <= (sel_outshift[co]) ? shift_last_pkt[co] : {acc_m_last[co]} & lut_last_pkt[acc_m_user[co].kw2][co];
shift_valid[co] <= (sel_outshift[co]) ? shift_valid[co] : s_valid_cols_sel[co] & valid_mask[co];
shift_last[co] <= (sel_outshift[co]) ? shift_last[co] :s_last_cols_sel[co];
shift_out_ready[co] <= (sel_outshift[co]) ? 1'b1 : 1'b0;
shift_out_ready[co] <= (sel_outshift[co]) ? 1'b1 : 1'b0; // shift_out_ready[0] becomes 1 when data is shifted out, becomes 0 if it is loaded with acculumator data.
end
end
end
end

//always_ff @(posedge clk `OR_NEGEDGE(resetn))
//if (!resetn) counter <= 0;
//else if (!shift_out_ready[COLS-1] && m_ready) counter <= counter == COLS ? 0 : counter + 1;

assign m_data = shift_data_out [COLS-1];
assign m_valid = shift_valid[COLS-1] & outshift_flag[COLS-1];
assign m_last = shift_last [COLS-1];
Expand All @@ -322,6 +315,7 @@ endgenerate
//assign en[0] = ~acc_m_valid[0] | shift_out_ready[0];
for(c=0; c<COLS; c++) begin
if(c<COLS-1) begin
// If current column and next column output shifter regs both have valid data, and accumulator has valid data column gets frozen
assign mac_freeze[c] = (acc_m_valid[c] & ~(shift_out_ready[c] & shift_out_ready[c+1]));
//assign en[c] = (~acc_m_valid[c] | shift_out_ready[c] & shift_out_ready[c+1]);
always_ff @(posedge clk `OR_NEGEDGE(resetn))
Expand All @@ -333,7 +327,7 @@ endgenerate
else if (~en[c] & acc_m_valid[c] & shift_out_ready[c] & shift_out_ready[c+1]) acc_m_valid[c] <= acc_m_valid_next[c]; // if handshake happens when en is 0, acc_m_valid should become low
end
end
else begin
else begin // Final Column
assign mac_freeze[c] = (acc_m_valid[c] & ~shift_out_ready[c]);
//assign en[c] = (~acc_m_valid[c] | shift_out_ready[c]);
always_ff @(posedge clk `OR_NEGEDGE(resetn))
Expand Down
8 changes: 4 additions & 4 deletions run/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import sys
sys.path.append("../../")
from deepsocflow import Bundle, Hardware, QModel, QInput
import tensorflow as tf
tf.keras.utils.set_random_seed(0)
# import tensorflow as tf
# tf.keras.utils.set_random_seed(0)
# Simulator: xsim on windows, verilator otherwise
(SIM, SIM_PATH) = ('xsim', "E:/Vivado/2023.2/bin/") if os.name=='nt' else ('verilator', '')

Expand All @@ -28,8 +28,8 @@ def product_dict(**kwargs):
ram_edges_depth = [ 288 ],
axi_width = [ 128 ],
target_cpu_int_bits = [ 32 ],
valid_prob = [ 1 ],
ready_prob = [ 1 ],
valid_prob = [ 0.1 ],
ready_prob = [ 0.01 ],
data_dir = ['vectors'],
)))
def test_dnn_engine(PARAMS):
Expand Down
2 changes: 1 addition & 1 deletion run/work/config_fw.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Bundle_t bundles [N_BUNDLES] = {
{.n=1 , .l=1 , .kw=5 , .coe=4 , .coe_tl=4 , .r_ll=5 , .h=5 , .w=6 , .ci=8 , .co=8 , .w_kw2=4 , .t=2 , .p=2 , .cm=4 , .cm_p0=4 , .xp_words=84 , .ib_out=4 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=184 , .x_bpt_p0=184 , .o_words=672 , .o_bytes=368 , .x_pad=6 , .in_buffer_idx=0 , .out_buffer_idx=1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=0 , .is_bias=1 , .is_flatten=0 , .is_softmax=0 , .b_offset=32 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=1 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=5 , .ow=6 , .oc=8 , .x_header= 81946u, .x_header_p0= 81946u, .w_header= 652835110938u, .w_header_p0= 81946u , .debug_nhwc_words=240 },
{.n=1 , .l=1 , .kw=3 , .coe=8 , .coe_tl=8 , .r_ll=5 , .h=5 , .w=6 , .ci=8 , .co=24 , .w_kw2=5 , .t=3 , .p=2 , .cm=6 , .cm_p0=2 , .xp_words=84 , .ib_out=5 , .w_bpt=232 , .w_bpt_p0=88 , .x_bpt=268 , .x_bpt_p0=100 , .o_words=1152 , .o_bytes=608 , .x_pad=6 , .in_buffer_idx=1 , .out_buffer_idx=0 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=0 , .b_offset=40 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=0 , .ca_shift=12 , .ca_pl_scale=0 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=5 , .ow=6 , .oc=24 , .x_header= 81961u, .x_header_p0= 81929u, .w_header= 584115634217u, .w_header_p0= 81929u , .debug_nhwc_words=720 },
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=5 , .h=5 , .w=6 , .ci=24 , .co=10 , .w_kw2=6 , .t=1 , .p=2 , .cm=20 , .cm_p0=4 , .xp_words=48 , .ib_out=6 , .w_bpt=256 , .w_bpt_p0=64 , .x_bpt=496 , .x_bpt_p0=112 , .o_words=2400 , .o_bytes=1440 , .x_pad=0 , .in_buffer_idx=0 , .out_buffer_idx=1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=1 , .is_softmax=0 , .b_offset=64 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=0 , .softmax_max_f=0 , .csh=1 , .ch=5 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=5 , .psh_shift=0 , .csw=1 , .cw=6 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=6 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=300 , .x_header= 82072u, .x_header_p0= 81944u, .w_header= 652835111064u, .w_header_p0= 81944u , .debug_nhwc_words=300 },
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=1 , .h=1 , .w=1 , .ci=300 , .co=10 , .w_kw2=1 , .t=1 , .p=15 , .cm=20 , .cm_p0=20 , .xp_words=8 , .ib_out=-1 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=96 , .x_bpt_p0=96 , .o_words=10 , .o_bytes=40 , .x_pad=0 , .in_buffer_idx=1 , .out_buffer_idx=-1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=1 , .b_offset=88 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=3 , .softmax_max_f=0.875 , .csh=1 , .ch=1 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=1 , .psh_shift=0 , .csw=1 , .cw=1 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=1 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=10 , .x_header= 152u, .x_header_p0= 152u, .w_header= 652835029144u, .w_header_p0= 152u , .debug_nhwc_words=10 }
{.n=1 , .l=1 , .kw=1 , .coe=24 , .coe_tl=0 , .r_ll=1 , .h=1 , .w=1 , .ci=300 , .co=10 , .w_kw2=1 , .t=1 , .p=15 , .cm=20 , .cm_p0=20 , .xp_words=8 , .ib_out=-1 , .w_bpt=256 , .w_bpt_p0=256 , .x_bpt=96 , .x_bpt_p0=96 , .o_words=10 , .o_bytes=40 , .x_pad=0 , .in_buffer_idx=1 , .out_buffer_idx=-1 , .add_out_buffer_idx=-1, .add_in_buffer_idx=-1, .is_bias=1 , .is_flatten=0 , .is_softmax=1 , .b_offset=88 , .b_val_shift=9 , .b_bias_shift=0 , .ca_nzero=1 , .ca_shift=15 , .ca_pl_scale=3 , .aa_nzero=0 , .aa_shift=0 , .aa_pl_scale=0 , .pa_nzero=0 , .pa_shift=0 , .pa_pl_scale=0 , .softmax_frac=3 , .softmax_max_f=0.625 , .csh=1 , .ch=1 , .csh_shift=0 , .pkh=1 , .psh=1 , .ph=1 , .psh_shift=0 , .csw=1 , .cw=1 , .csw_shift=0 , .pkw=1 , .psw=1 , .pw=1 , .psw_shift=0 , .pool=POOL_NONE , .on=1 , .oh=1 , .ow=1 , .oc=10 , .x_header= 152u, .x_header_p0= 152u, .w_header= 652835029144u, .w_header_p0= 152u , .debug_nhwc_words=10 }
};

#define X_BITS_L2 2
Expand Down

0 comments on commit e05c7ac

Please sign in to comment.