Skip to content

Commit

Permalink
LocalBus clock domain crossing host. To be converted to RTEFI client
Browse files Browse the repository at this point in the history
  • Loading branch information
Keith Penney committed Aug 30, 2024
1 parent 900bf91 commit a0eec8f
Show file tree
Hide file tree
Showing 5 changed files with 575 additions and 3 deletions.
9 changes: 6 additions & 3 deletions axi/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ include ../dir_list.mk
include $(BUILD_DIR)/top_rules.mk

.PHONY: all
all: axi_host_check axi_host_pipelined_check axi_lb_check axi_lb_cdc_check axi_channel_xdomain_check axi_cdc_check
all: axi_host_check axi_host_pipelined_check axi_lb_check axi_lb_cdc_check axi_channel_xdomain_check axi_cdc_check lb_test_host_check

axi_host_tb: axi_host_tb.v axi_host.v axi_dummy.v axi_delay.v
$(VERILOG_TB)
Expand All @@ -22,9 +22,12 @@ axi_lb_tb: axi_lb_tb.v axi_lb.v axi_host.v axi_delay.v $(LOCALBUS_DIR)/lb_dummy.
axi_lb_cdc_tb: axi_lb_cdc_tb.v axi_lb_cdc.v axi_host.v axi_delay.v $(DSP_DIR)/fifo_2c.v $(DSP_DIR)/dpram.v $(LOCALBUS_DIR)/lb_dummy.v $(LOCALBUS_DIR)/lb_delay.v
$(VERILOG_TB)

lb_test_host_tb: lb_test_host_tb.v lb_test_host.v lb_cdc.v $(DSP_DIR)/fifo_2c.v $(DSP_DIR)/dpram.v $(LOCALBUS_DIR)/lb_dummy.v
$(VERILOG_TB)

axi_channel_xdomain_tb: axi_channel_xdomain_tb.v axi_channel_xdomain.v channel_consumer.v channel_producer.v $(DSP_DIR)/fifo_2c.v $(DSP_DIR)/dpram.v

CLEAN+=axi_lb_cdc_tb axi_lb_tb axi_host_tb axi_host_pipelined_tb axi_cdc_tb axi_channel_xdomain_tb
CLEAN+=axi_lb_cdc.vcd axi_lb.vcd axi_host.vcd axi_host_pipelined.vcd axi_cdc.vcd axi_channel_xdomain.vcd
CLEAN+=axi_lb_cdc_tb axi_lb_tb axi_host_tb axi_host_pipelined_tb axi_cdc_tb axi_channel_xdomain_tb lb_test_host_tb
CLEAN+=axi_lb_cdc.vcd axi_lb.vcd axi_host.vcd axi_host_pipelined.vcd axi_cdc.vcd axi_channel_xdomain.vcd lb_test_host.vcd

include $(BUILD_DIR)/bottom_rules.mk
127 changes: 127 additions & 0 deletions axi/fifo_timing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#! python3

# Calculate FIFO depths and read enable delay based on clock frequencies
# and transaction rates

import math

# See mem_gate.md's note about MTU-limited single-beat transactions
RTEFI_MTU_LIMITED_MAX_XACTS = 183


def FIFO_OUT_depth(fh, fp, HOST_XACT_CYCLES=8, RLAT=3, MAX_XACTS=RTEFI_MTU_LIMITED_MAX_XACTS):
cdc0 = (1/fh + 1/fp)
# print(f"fh/HOST_XACT_CYCLES = {fh/HOST_XACT_CYCLES}; fp/RLAT = {fp/RLAT}")
if fh/HOST_XACT_CYCLES > fp/RLAT:
p0 = fp/RLAT
depth = MAX_XACTS*(1-HOST_XACT_CYCLES*p0/fh) + cdc0*fh/HOST_XACT_CYCLES
else:
p0 = fh/HOST_XACT_CYCLES
depth = cdc0*fh/HOST_XACT_CYCLES
return depth


def FIFO_IN_depth(fh, fp, HOST_XACT_CYCLES=8, RLAT=3, MAX_XACTS=RTEFI_MTU_LIMITED_MAX_XACTS, ENABLE_DELAY=100):
cdc0 = (1/fh + 1/fp)
cdc1 = cdc0
# recall: s1 = s0 + i*RLAT/fp
# recall: s0 = cdc0
s1 = cdc0 + RLAT/fp
h0 = fh/HOST_XACT_CYCLES
if ENABLE_DELAY/fh > MAX_XACTS*RLAT/fp:
# p side will finish filling before h side begins to drain
depth = MAX_XACTS
else:
# h side will begin draining before p side finishes filling
if fp/RLAT < h0:
# the p side is slower than the h side (the exact case this solution is tailored for)
if (s1 + cdc1) > ENABLE_DELAY/fh:
# This path will only work for small clock ratios
depth = cdc1*fp/RLAT
else:
depth = (ENABLE_DELAY/fh - cdc0)*fp/RLAT - 1
else:
# the p side can keep up with the h side
if (s1 + cdc1) > ENABLE_DELAY/fh:
# This path will only work for small clock ratios
depth = fh*cdc1/HOST_XACT_CYCLES
else:
depth = ENABLE_DELAY/HOST_XACT_CYCLES - (cdc0 + RLAT/fp)*fh/HOST_XACT_CYCLES
return depth


def min_Enable_delay(fh, fp, HOST_XACT_CYCLES=8, RLAT=3, MAX_XACTS=RTEFI_MTU_LIMITED_MAX_XACTS):
cdc0 = (1/fh + 1/fp)
h0 = fh/HOST_XACT_CYCLES
if fp/RLAT < h0:
# the p side is slower than the h side (the exact case this solution is tailored for)
enable_delay = fh*(cdc0 + (MAX_XACTS + 1)*(RLAT/fp)) - HOST_XACT_CYCLES*MAX_XACTS
else:
# The p side can keep up with the h side; no ENABLE_DELAY is needed
enable_delay = 0
if enable_delay < 0:
print(f"enable_delay = {enable_delay} < 0!")
print(f"fh*(cdc0 + (MAX_XACTS + 1)*(RLAT/fp)) = {fh*(cdc0 + (MAX_XACTS + 1)*(RLAT/fp))};"
+ f" HOST_XACT_CYCLES*MAX_XACTS = {HOST_XACT_CYCLES*MAX_XACTS}")
return enable_delay


def clog2(n):
return math.ceil(math.log2(n+1))


def doFifoTiming(args):
if args.fh is not None:
fh = 1.0e6*float(args.fh)
else:
fh = 1.0e9/float(args.ph)
if args.fp is not None:
fp = 1.0e6*float(args.fp)
else:
fp = 1.0e9/float(args.pp)
rlat = int(args.rlat)
host_xact_cycles = int(args.xact_cycles)
max_xacts = int(args.max_xacts)
out_depth = FIFO_OUT_depth(fh, fp, HOST_XACT_CYCLES=host_xact_cycles, RLAT=rlat+1, MAX_XACTS=max_xacts)
# print(f"out_depth = {out_depth}")
en_delay = min_Enable_delay(fh, fp, HOST_XACT_CYCLES=host_xact_cycles, RLAT=rlat+1, MAX_XACTS=max_xacts)
# print(f"en_delay = {en_delay}")
in_depth = FIFO_IN_depth(fh, fp, HOST_XACT_CYCLES=host_xact_cycles, RLAT=rlat+1, MAX_XACTS=max_xacts,
ENABLE_DELAY=en_delay)
# print(f"in_depth = {in_depth}")
FIFO_OUT_AW = clog2(out_depth)
FIFO_IN_AW = clog2(in_depth)
ENABLE_DELAY = math.ceil(en_delay)
if args.vh is not None:
fd = open(args.vh, 'w')
else:
fd = None
print(f"// freq(h_clk) = {fh*1.0e-6} MHz", file=fd)
print(f"// freq(p_clk) = {fp*1.0e-6} MHz", file=fd)
print(f"// RLAT = {rlat}", file=fd)
print(f"// XACT_CYCLES = {host_xact_cycles}", file=fd)
print(f"// MAX_XACTS = {max_xacts}", file=fd)
print(f"localparam FIFO_OUT_AW = {FIFO_OUT_AW};", file=fd)
print(f"localparam FIFO_IN_AW = {FIFO_IN_AW};", file=fd)
print(f"localparam ENABLE_DELAY = {ENABLE_DELAY};", file=fd)
return


if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser("LocalBus CDC FIFO Parameters Calculator")
groupHost = parser.add_mutually_exclusive_group(required=True)
groupHost.add_argument("--fh", default=None, help="Host clock frequency (in MHz)")
groupHost.add_argument("--ph", default=None, help="Host clock period (in ns)")
groupPeri = parser.add_mutually_exclusive_group(required=True)
groupPeri.add_argument("--fp", default=None, help="Peripheral clock frequency (in MHz)")
groupPeri.add_argument("--pp", default=None, help="Peripheral clock period (in ns)")
parser.add_argument("-r", "--rlat", default=3,
help="Read cycle latency: how many cycles to assert 'raddr' before latching 'rdata'")
parser.add_argument("-c", "--xact_cycles", default=8,
help="Number of host clock cycles between transactions (1/xact_rate).")
parser.add_argument("-x", "--max_xacts", default=RTEFI_MTU_LIMITED_MAX_XACTS,
help="Maximum number of transactions per burst (packet).")
parser.add_argument("--vh", default=None, help="Filename for auto-generated Verilog Header (.vh) file.")
args = parser.parse_args()
doFifoTiming(args)
123 changes: 123 additions & 0 deletions axi/lb_cdc.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* localbus clock-domain crossing via dual-clock FIFO */

module lb_cdc #(
parameter AW = 24,
parameter DW = 32,
parameter FIFO_OUT_AW = 4,
parameter FIFO_IN_AW = 4,
// clock cycles between asserting addr and latching data
parameter RDELAY = 3
) (
// Interface A: peripheral
input lba_clk,
input [AW-1:0] lba_addr,
input [DW-1:0] lba_wdata,
output [DW-1:0] lba_rdata,
input lba_wen,
input lba_ren,
input lba_wstb,
input lba_rstb,
// lba_clk controls
input rdata_enable,
output rdata_rnw,
// Interface B: host
input lbb_clk,
output [AW-1:0] lbb_addr,
output [DW-1:0] lbb_wdata,
input [DW-1:0] lbb_rdata,
output lbb_wen,
output lbb_ren,
output lbb_wstb,
output lbb_rstb
);

localparam FIFO_A2B_DW = AW + DW + 4;

// ============================== lba_clk domain ==============================
wire a2b_we = lba_wstb | lba_rstb;
wire [FIFO_A2B_DW-1:0] a2b_din, a2b_dout;
assign a2b_din = {lba_rstb, lba_wstb, lba_ren, lba_wen, lba_addr, lba_wdata};

reg b2a_re=1'b0;
wire b2a_empty;
always @(posedge lba_clk) begin
b2a_re <= 1'b0;
if (!b2a_empty) begin
if (~b2a_re & rdata_enable) begin
b2a_re <= 1'b1;
end
end
end

// ============================== lbb_clk domain ==============================
wire a2b_empty;
reg a2b_re=1'b0;
wire [AW-1:0] lbb_addr_w;
wire [DW-1:0] lbb_wdata_w;

reg [RDELAY-1:0] tlatch_sr=0;//, wlatch_sr=0;
wire ren = |tlatch_sr;
wire tlatch = tlatch_sr[RDELAY-1];

always @(posedge lbb_clk) begin
a2b_re <= 1'b0;
if (!a2b_empty && !ren) begin
if (~a2b_re) a2b_re <= 1'b1;
end
tlatch_sr <= {tlatch_sr[RDELAY-2:0], lbb_rstb | lbb_wstb};
end

fifo_2c #(
.aw(FIFO_OUT_AW),
.dw(FIFO_A2B_DW)
) fifo_a2b (
.wr_clk(lba_clk), // input
.we(a2b_we), // input
.din(a2b_din), // input [dw-1:0]
.wr_count(), // output [aw:0]
.full(), // output
.rd_clk(lbb_clk), // input
.re(a2b_re), // input
.dout(a2b_dout), // output [dw-1:0]
.rd_count(), // output [aw:0]
.empty(a2b_empty) // output
);

wire [DW:0] b2a_din, b2a_dout;
assign b2a_din = {lbb_ren_w, lbb_rdata};
assign {rdata_rnw, lba_rdata} = b2a_dout;
fifo_2c #(
.aw(FIFO_IN_AW),
.dw(DW+1)
) fifo_b2a (
.wr_clk(lbb_clk), // input
.we(tlatch), // input
.din(b2a_din), // input [dw-1:0]
.wr_count(), // output [aw:0]
.full(), // output
.rd_clk(lba_clk), // input
.re(b2a_re), // input
.dout(b2a_dout), // output [dw-1:0]
.rd_count(), // output [aw:0]
.empty(b2a_empty) // output
);

wire lbb_rstb_w, lbb_wstb_w, lbb_ren_w, lbb_wen_r;
assign {lbb_rstb_w, lbb_wstb_w, lbb_ren_w, lbb_wen_w, lbb_addr_w, lbb_wdata_w} = a2b_dout;
assign lbb_wen = lbb_wen_w & a2b_re;
assign lbb_ren = lbb_ren_w & ren;
assign lbb_wstb = lbb_wstb_w & a2b_re;
assign lbb_rstb = lbb_rstb_w & a2b_re;
reg [AW-1:0] lbb_addr_r=0;
reg [DW-1:0] lbb_wdata_r=0;
//assign lbb_addr = lbb_addr_w;
//assign lbb_wdata = lbb_wdata_w;
assign lbb_addr = lbb_addr_r;
assign lbb_wdata = lbb_wdata_r;

always @(posedge lbb_clk) begin
lbb_addr_r <= lbb_addr_w;
lbb_wdata_r <= lbb_wdata_w;
end

endmodule
Loading

0 comments on commit a0eec8f

Please sign in to comment.