Skip to content

Commit

Permalink
Rename load_y, demo_full
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Jul 20, 2024
1 parent aa505e6 commit 5ba0f97
Show file tree
Hide file tree
Showing 13 changed files with 86 additions and 308 deletions.
7 changes: 2 additions & 5 deletions deepsocflow/c/deepsocflow_xilinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#define debug_xil_printf xil_printf
#endif

static volatile uint8_t done_all = 0;

// Helper functions that might vary for different hardware platforms

static inline void write_flush_u8(u8* addr, u8 val) {
Expand Down Expand Up @@ -97,10 +95,9 @@ static inline void model_setup(){

XTime time_start, time_end;

static inline void model_run(){
static inline void model_run_timing(){
XTime_GetTime(&time_start);
set_config(4*A_START, 1);
load_y (&done_all);
model_run();
XTime_GetTime(&time_end);
}

Expand Down
46 changes: 16 additions & 30 deletions deepsocflow/c/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <stdint.h>
#include <stdio.h>
#include <math.h>
//#include <svdpi.h>

typedef const struct {
const int32_t n, l, kw, coe, coe_tl, r_ll, h, w, ci, co, w_kw2, t, p, cm, cm_p0, xp_words, ib_out;
Expand Down Expand Up @@ -229,7 +228,7 @@ static inline void tile_write( int32_t out_val, int8_t *p_out_buffer, int32_t ib

}

extern EXT_C void load_y (volatile uint8_t *p_done) {
extern EXT_C uint8_t model_run() {

static Bundle_t *pb = &bundles[0];
static int32_t it_bias=0;
Expand All @@ -241,7 +240,7 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
int32_t ph_end, ph_beg_const, ixh_beg, xh_sweep;
int32_t pw_end, pw_beg_const, ixw_beg, xw_sweep;

static int8_t ocm_bank = 1;
static int8_t ocm_bank = 1; // We flip the bank at the beginning of loop. starting from bank 0
int32_t w_last, sram_addr;


Expand All @@ -263,7 +262,8 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {

#endif

// debug_printf("starting load_y");
debug_printf("Starting model_rn()");
set_config(4*A_START, 1);

for (ib = 0; ib < N_BUNDLES; ib++) {

Expand Down Expand Up @@ -293,27 +293,21 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
for (il = 0; il < pb->l; il++) {
for (iw_kw2 = 0; iw_kw2 < pb->w_kw2; iw_kw2++) {

// starting from bank 0
ocm_bank = !ocm_bank;
w_last = iw_kw2 == pb->w_kw2-1 ? pb->kw/2+1 : 1;
//*p_base_addr_next = (uint64_t)&ocm[ocm_bank];
//*p_bpt_next = PE_ROWS * pb->coe * w_last * sizeof(Y_TYPE);
debug_printf("Inside the firmware domain, now wait for ocm %x\n\n", ocm_bank);
// Verify the ocm reg values

#ifdef SIM
DMA_WAIT:
// if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
if (!get_config(4*(A_DONE_WRITE + ocm_bank)))
return;
return 1;

char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
sprintf(f_path_raw, "%s/%0d_%0d_%0d_y_raw_sim.txt", DATA_DIR, ib, ip, it);
sprintf(f_path_sum, "%s/%0d_y_sum_sim.txt", DATA_DIR, ib);
FILE *fp_raw = fopen(f_path_raw, "a");
FILE *fp_sum = fopen(f_path_sum, "a");
#else
//start_wait_output((UINTPTR)*p_base_addr_next, *p_bpt_next);
// in FPGA, wait for write done
while (!get_config(4*(A_DONE_WRITE + ocm_bank))){
};
Expand All @@ -324,10 +318,8 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {

#ifdef NDEBUG
// Flush the data just written by the PS to the DDR
//sleep(0.5);
Xil_DCacheFlushRange((INTPTR)&ocm[ocm_bank], PE_ROWS*PE_COLS*sizeof(Y_TYPE)) ;
#endif
debug_printf("Done write by the PL! Start reading and processing ocm %d\n", ocm_bank);
w_last = iw_kw2 == pb->w_kw2-1 ? pb->kw/2+1 : 1;
sram_addr=0;

Expand Down Expand Up @@ -513,20 +505,19 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
fclose(fp_raw);
#endif
set_config(4*(A_DONE_READ + ocm_bank), 1);
debug_printf("done reading and processing ocm %d \n", ocm_bank);
debug_printf("firmware iw_kw2 0x%x done \n", iw_kw2);
// debug_printf("-------- iw_kw2 0x%x done \n", iw_kw2);
} // iw_kw2
iw_kw2 = 0;
debug_printf("firmware il %x done\n", il);
// debug_printf("-------- il %x done\n", il);
} // il
il = 0;
debug_printf("firmware in %x done\n", in);
// debug_printf("-------- in %x done\n", in);
} // in
in = 0;
debug_printf("firmware it %x done\n", it);
debug_printf("-------- it %x done\n", it);
} // it
it = 0;
debug_printf("firmware ip %x done\n", ip);
debug_printf("-------- ip %x done\n", ip);
} // ip

ip = 0;
Expand Down Expand Up @@ -562,18 +553,17 @@ extern EXT_C void load_y (volatile uint8_t *p_done) {
set_config(4*A_BUNDLE_DONE, 1);
} // ib
ib = 0;
debug_printf("done all bundles!!\n");
*p_done = 1;


debug_printf("done all bundles!!\n");
#ifdef SIM
is_first_call = 1;
#endif
return 0;
}


// Rest fo the helper functions used in simulation.
// Rest of the helper functions used in simulation.
#ifdef SIM

extern EXT_C void fill_memory (){
FILE *fp;
char f_path [1000];
Expand Down Expand Up @@ -611,7 +601,7 @@ extern EXT_C void model_setup(){
// Check if the mem region is legal
fill_memory();
// Set up all the config registers
//printf("Setting up config registers\n");
// printf("Setting up config registers\n");
set_config(4*A_START, 0); // Start
set_config(4*(A_DONE_READ+0), 1); // Done read ocm bank 0
set_config(4*(A_DONE_READ+1), 1); // Done read ocm bank 1
Expand Down Expand Up @@ -644,8 +634,4 @@ extern EXT_C void model_setup(){
//printf("Done setting up config registers and bram\n");
}

extern EXT_C void model_run(){
printf("Start...\n");
set_config(4*A_START, 1); // Start
}
#endif
#endif
4 changes: 1 addition & 3 deletions deepsocflow/c/xilinx_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ int main()
xil_printf("Welcome to DeepSoCFlow!\n Store wbx at: %p; y:%p; buffers {0:%p,1:%p}; debug_nhwc:%p; debug_tiled:%p \n", &mem.w, &mem.y, &mem.out_buffers[0], &mem.out_buffers[1], &mem.debug_nhwc, &mem.debug_tiled);

model_setup();
model_run(); // run model and measure time

model_run_timing(); // run model and measure time
check_results();

hardware_cleanup();

return 0;
}
4 changes: 4 additions & 0 deletions deepsocflow/py/hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import glob
from deepsocflow.py.utils import *
import deepsocflow
import time


class Hardware:
Expand Down Expand Up @@ -219,6 +220,7 @@ def simulate(self, SIM='verilator', SIM_PATH=''):


print("\n\nSIMULATING...\n\n")
start = time.time()

if SIM == 'xsim':
with open('build/xsim_cfg.tcl', 'w') as f:
Expand All @@ -228,6 +230,8 @@ def simulate(self, SIM='verilator', SIM_PATH=''):
subprocess.run(["vvp", "build/a.out"])
if SIM == 'verilator':
subprocess.run([f"./V{self.TB_MODULE}"], cwd="build")

print(f"\n\nSIMULATION TIME: {time.time()-start:.2f} seconds\n\n")


def export_vivado_tcl(self, board='zcu104', rtl_dir_abspath=None, scripts_dir_abspath=None, board_tcl_abspath=None):
Expand Down
4 changes: 2 additions & 2 deletions deepsocflow/rtl/ext/alex_axi_dma_rd.v
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ reg m_axi_rready_reg = 1'b0, m_axi_rready_next;

reg [AXI_DATA_WIDTH-1:0] save_axi_rdata_reg = {AXI_DATA_WIDTH{1'b0}};

wire [AXI_DATA_WIDTH-1:0] shift_axi_rdata = {m_axi_rdata, save_axi_rdata_reg} >> ((AXI_STRB_WIDTH-offset_reg)*AXI_WORD_SIZE);
wire [AXI_DATA_WIDTH-1:0] shift_axi_rdata = AXI_DATA_WIDTH'({m_axi_rdata, save_axi_rdata_reg} >> ((AXI_STRB_WIDTH-32'(offset_reg))*AXI_WORD_SIZE));

// internal datapath
reg [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata_int;
Expand All @@ -298,7 +298,7 @@ assign m_axis_read_desc_status_valid = m_axis_read_desc_status_valid_reg;
assign m_axi_arid = {AXI_ID_WIDTH{1'b0}};
assign m_axi_araddr = m_axi_araddr_reg;
assign m_axi_arlen = m_axi_arlen_reg;
assign m_axi_arsize = AXI_BURST_SIZE;
assign m_axi_arsize = 3'(AXI_BURST_SIZE);
assign m_axi_arburst = 2'b01;
assign m_axi_arlock = 1'b0;
assign m_axi_arcache = 4'b0011;
Expand Down
116 changes: 48 additions & 68 deletions deepsocflow/test/sv/axi_sys_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,36 @@
`include "config_tb.svh"

module axi_sys_tb;
localparam ADDR_WIDTH = 40,
DATA_WR_WIDTH = 32,
STRB_WIDTH = 4,
DATA_RD_WIDTH = 32,
localparam ADDR_WIDTH = 40,
DATA_WR_WIDTH = 32,
STRB_WIDTH = 4,
DATA_RD_WIDTH = 32,
C_S_AXI_DATA_WIDTH = 128,
C_S_AXI_ADDR_WIDTH = 32,
LSB = $clog2(C_S_AXI_DATA_WIDTH)-3;


// SIGNALS
logic rstn = 0;
logic [ADDR_WIDTH-1:0] s_axil_awaddr;
logic [2:0] s_axil_awprot;
logic s_axil_awvalid;
logic s_axil_awready;
logic [ADDR_WIDTH-1:0] s_axil_awaddr;
logic [2:0] s_axil_awprot;
logic s_axil_awvalid;
logic s_axil_awready;
logic [DATA_WR_WIDTH-1:0] s_axil_wdata;
logic [STRB_WIDTH-1:0] s_axil_wstrb;
logic s_axil_wvalid;
logic s_axil_wready;
logic [1:0] s_axil_bresp;
logic s_axil_bvalid;
logic s_axil_bready;
logic [ADDR_WIDTH-1:0] s_axil_araddr;
logic [2:0] s_axil_arprot;
logic s_axil_arvalid;
logic s_axil_arready;
logic [STRB_WIDTH-1:0] s_axil_wstrb;
logic s_axil_wvalid;
logic s_axil_wready;
logic [1:0] s_axil_bresp;
logic s_axil_bvalid;
logic s_axil_bready;
logic [ADDR_WIDTH-1:0] s_axil_araddr;
logic [2:0] s_axil_arprot;
logic s_axil_arvalid;
logic s_axil_arready;
logic [DATA_RD_WIDTH-1:0] s_axil_rdata;
logic [1:0] s_axil_rresp;
logic s_axil_rvalid;
logic s_axil_rready;
logic [1:0] s_axil_rresp;
logic s_axil_rvalid;
logic s_axil_rready;
logic o_rd_pixel;
logic [C_S_AXI_ADDR_WIDTH-LSB-1:0] o_raddr_pixel;
logic [C_S_AXI_DATA_WIDTH-1:0] i_rdata_pixel;
Expand All @@ -45,79 +45,59 @@ module axi_sys_tb;
logic [C_S_AXI_DATA_WIDTH-1:0] o_wdata_output;
logic [C_S_AXI_DATA_WIDTH/8-1:0] o_wstrb_output;

bit y_done;

rtl_sim_top dut(.*);

logic clk = 0;
initial forever #(`CLK_PERIOD/2) clk = ~clk;


export "DPI-C" function get_config;
export "DPI-C" function set_config;
import "DPI-C" context function byte get_byte_32 (int unsigned addr);
import "DPI-C" context function void set_byte_32 (int unsigned addr, byte data);
import "DPI-C" context function void model_setup();
import "DPI-C" context function void model_run();
import "DPI-C" context function void load_y(inout bit p_done);
import "DPI-C" context function bit model_run();


function automatic get_config(input int offset);
if (offset < 16*4)
return dut.OC_TOP.CONTROLLER.cfg[offset/4];
else
return dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16];
if (offset < 16*4) return dut.OC_TOP.CONTROLLER.cfg[offset/4];
else return dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16];
endfunction


function automatic set_config(input int offset, input int data);
if (offset < 16*4)begin
//$display("Setting config[%x] = %x", offset/4, data);
dut.OC_TOP.CONTROLLER.cfg[offset/4] <= data;
end
else begin
//$display("Setting bram[%x] = %x", offset/4, data);
dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16] <= data;
end
if (offset < 16*4) dut.OC_TOP.CONTROLLER.cfg[offset/4] <= data;
else dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16] <= data;
endfunction

always_ff @(posedge clk ) begin : Axi_rw
if (o_rd_pixel) begin
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++) begin

always_ff @(posedge clk) begin : Axi_rw
if (o_rd_pixel)
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++)
i_rdata_pixel[i*8 +: 8] <= get_byte_32((o_raddr_pixel << LSB) + i);
end
end
if (o_rd_weights) begin
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++) begin
i_rdata_weights[i*8 +: 8] <= get_byte_32((o_raddr_weights << LSB) + i);
end
end
if (o_we_output) begin
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++) begin
if (o_wstrb_output[i]) begin
set_byte_32((o_waddr_output << LSB)+i, o_wdata_output[i*8 +: 8]);
end
end
end

if (o_rd_weights)
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++)
i_rdata_weights[i*8 +: 8] <= get_byte_32((o_raddr_weights << LSB) + i);

if (o_we_output)
for (int i = 0; i < C_S_AXI_DATA_WIDTH/8; i++)
if (o_wstrb_output[i])
set_byte_32((o_waddr_output << LSB) + i, o_wdata_output[i*8 +: 8]);
end


initial begin
$display("Start...");
//$dumpfile("axi_tb_sys.vcd");
//$dumpvars();

rstn = 0;
repeat(2) @(posedge clk);
#10ps;
repeat(2) @(posedge clk) #10ps;
rstn = 1;

model_setup();

repeat(2) @(posedge clk);
#10ps;
model_run();
while (1) begin
@(posedge clk);
#10ps;
load_y(y_done);
if (y_done) break;
end
repeat(2) @(posedge clk) #10ps;

while (model_run()) @(posedge clk) #10ps;
$finish;
end

Expand Down
Loading

0 comments on commit 5ba0f97

Please sign in to comment.