Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9cd8157
axi_xbar: Correct signal names in waveform file
colluca Jun 27, 2022
bab24f7
docs: Correct typos
colluca Jun 27, 2022
d416ee9
axi_mcast_xbar: Create copies of original IPs as starting point
colluca Jun 29, 2022
28071b7
axi_mcast_xbar: Add basic multicast logic
colluca Sep 7, 2024
6972683
axi_mcast_xbar: Correct deadlock condition
colluca Nov 25, 2022
5813d9f
axi_mcast_xbar: Cut valid->ready->commit combinational path
colluca Dec 7, 2022
9d6df68
axi_mcast_demux: Retrieve AW select index form from the mask
colluca Nov 23, 2022
7bce784
axi_mcast_xbar: Move AW address decoders after spill registers in demux
colluca Dec 12, 2022
29f1302
axi_mcast_demux: Merge B responses appropriately
colluca Dec 13, 2022
3992e5b
axi_mcast_xbar: Allow both regular and mask-based address rules
colluca Apr 26, 2023
1a401d0
axi_mcast_xbar: Allow multiple outstanding multicast transactions
colluca Jun 20, 2023
a64189a
axi_mcast_xbar: Filter multicast requests to unconnected slaves
colluca Sep 19, 2024
a7c1423
axi_mcast_xbar: Add to CI
colluca Sep 9, 2024
1e34421
Improved route decoding for collective operations (#1)
Lura518 Sep 16, 2025
bd1abff
Rebase onto latest master
colluca Sep 19, 2025
3fb5376
Continue rebase, make tests pass again
colluca Sep 22, 2025
eb5e61e
Implement unicast XBAR IPs from multicast IPs
colluca Sep 22, 2025
674d26e
Restrict UniqueIds to 0 when multicast is enabled
colluca Sep 23, 2025
3de08ff
Rename `axi_mcast_demux` to `axi_mcast_demux_mapped`
colluca Sep 23, 2025
5f90fb2
Minor cleanup
colluca Sep 23, 2025
05af798
Add assertions on multicast rule conversion
colluca Sep 24, 2025
fbc7c91
Address final TODOs
colluca Sep 24, 2025
cda42b9
axi_mcast_mux: Remove dependency of valid on ready
colluca Sep 25, 2025
6544830
axi_mcast_demux_mapped: Improve readability of assertions
colluca Sep 26, 2025
8e04779
axi_mcast_demux_mapped: Allow multicast XBAR with NoMulticastRules == 0
colluca Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .ci/Memora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -328,5 +328,23 @@ artifacts:
- src/axi_xbar_unmuxed.sv
- src/axi_xbar.sv
- test/tb_axi_xbar.sv
- test/tb_axi_xbar_pkg.sv
outputs:
- build/axi_xbar-%.tested

axi_mcast_xbar-%:
inputs:
- Bender.yml
- include
- scripts/run_vsim.sh
- src/axi_pkg.sv
- src/axi_intf.sv
- src/axi_test.sv
- src/axi_mcast_demux.sv
- src/axi_err_slv.sv
- src/axi_mcast_mux.sv
- src/axi_mcast_xbar.sv
- test/tb_axi_mcast_xbar.sv
- test/tb_axi_xbar_pkg.sv
outputs:
- build/axi_mcast_xbar-%.tested
2 changes: 1 addition & 1 deletion .github/workflows/gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
jobs:
gitlab-ci:
runs-on: ubuntu-latest
timeout-minutes: 310
timeout-minutes: 360
steps:
- name: Check Gitlab CI
uses: pulp-platform/pulp-actions/gitlab-ci@v2
Expand Down
7 changes: 7 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,10 @@ axi_xbar:
<<: *run_vsim
variables:
TEST_MODULE: axi_xbar
timeout: 6h 00m

axi_mcast_xbar:
<<: *run_vsim
variables:
TEST_MODULE: axi_mcast_xbar
timeout: 6h 00m
10 changes: 9 additions & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package:
- "Thomas Benz <tbenz@iis.ee.ethz.ch>" # current maintainer
- "Michael Rogenmoser <michaero@iis.ee.ethz.ch>" # current maintainer
- "Matheus Cavalcante <matheusd@iis.ee.ethz.ch>"
- "Luca Colagrande <colluca@iis.ee.ethz.ch>"
- "Tim Fischer <fischeti@iis.ee.ethz.ch>"
- "Noah Huetter <huettern@ethz.ch>"
- "Cyril Koenig <cykoenig@iis.ee.ethz.ch>"
Expand All @@ -19,7 +20,7 @@ package:
- "Florian Zaruba <zarubaf@iis.ee.ethz.ch>"

dependencies:
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.37.0 }
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", rev: "multicast-xbar" }
common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.5 }
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.2 }

Expand All @@ -33,6 +34,7 @@ sources:
# Level 0
- src/axi_pkg.sv
# Level 1
- src/axi_demux_id_counters.sv
- src/axi_intf.sv
# Level 2
- src/axi_atop_filter.sv
Expand Down Expand Up @@ -63,6 +65,8 @@ sources:
- src/axi_lite_regs.sv
- src/axi_lite_to_apb.sv
- src/axi_lite_to_axi.sv
- src/axi_mcast_demux_simple.sv
- src/axi_mcast_mux.sv
- src/axi_modify_address.sv
- src/axi_mux.sv
- src/axi_rw_join.sv
Expand All @@ -80,6 +84,7 @@ sources:
- src/axi_from_mem.sv
- src/axi_id_serialize.sv
- src/axi_lfsr.sv
- src/axi_mcast_demux_mapped.sv
- src/axi_multicut.sv
- src/axi_to_axi_lite.sv
- src/axi_to_mem.sv
Expand All @@ -88,11 +93,13 @@ sources:
- src/axi_interleaved_xbar.sv
- src/axi_iw_converter.sv
- src/axi_lite_xbar.sv
- src/axi_mcast_xbar_unmuxed.sv
- src/axi_xbar_unmuxed.sv
- src/axi_to_mem_banked.sv
- src/axi_to_mem_interleaved.sv
- src/axi_to_mem_split.sv
# Level 5
- src/axi_mcast_xbar.sv
- src/axi_xbar.sv
# Level 6
- src/axi_xp.sv
Expand Down Expand Up @@ -130,6 +137,7 @@ sources:
- test/tb_axi_lite_to_apb.sv
- test/tb_axi_lite_to_axi.sv
- test/tb_axi_lite_xbar.sv
- test/tb_axi_mcast_xbar.sv
- test/tb_axi_modify_address.sv
- test/tb_axi_serializer.sv
- test/tb_axi_sim_mem.sv
Expand Down
2 changes: 1 addition & 1 deletion doc/axi_demux.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Setting the `UniqueIds` parameter to `1'b1` reduces the area complexity of the d

`2 * 2^AxiLookBits` counters track the number of [in-flight](../doc#in-flight) transactions. That is, for each ID in the (potentially) reduced set of IDs of `AxiLookBits` bits, there is one counter for write transactions and one for read transactions. Each counter can count up to (and including) `MaxTrans`, and there is a register that holds the index of the master port to which a counter is assigned.

When the demultiplexer gets an AW or an AR, it indexes the counters with the AXI ID. If the indexed counter has a value greater than zero and its master port index register is not equal to the index to which the AW or AR is to be sent, a transaction with the same direction and ID is already in flight to another master port. The demultiplexer then stalls the AW or AR. In all other cases, the demultiplexer forwards the AW or AR, increments the value of the indexed counter, and sets the master port index of the counter. A counter is decremented upon a handshake a B respectively last R beat at a slave port.
When the demultiplexer gets an AW or an AR, it indexes the counters with the AXI ID. If the indexed counter has a value greater than zero and its master port index register is not equal to the index to which the AW or AR is to be sent, a transaction with the same direction and ID is already in flight to another master port. The demultiplexer then stalls the AW or AR. In all other cases, the demultiplexer forwards the AW or AR, increments the value of the indexed counter, and sets the master port index of the counter. A counter associated with the AW or AR channel is decremented upon a handshake on the slave port respectively on the B channel or on the R channel in correspondence of the last beat.

W beats are routed to the master port defined by the value of `slv_aw_select_i` for the corresponding AW. As the order of the W bursts is given by the order of the AWs, the select signals are stored in a FIFO queue. This FIFO is pushed upon a handshake on the AW slave channel and popped upon a handshake of the last W beat of a burst on a W master channel.

Expand Down
2 changes: 1 addition & 1 deletion doc/axi_mux.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The opposite function to the AXI demultiplexer is performed by the AXI Multiplex

![Block-diagram of the AXI 4 Multiplexer Module.](axi_mux.png "Block-diagram of the AXI 4 Multiplexer Module.")

The Multiplexer module is has a simpler structure than the demultiplexer introduced in the previous section. The requests on the AW and AR channels get merged with the same round robin arbitration used for merging the responses in the demultiplexer. One key difference however is the mechanism how the multiplexer determines from which slave port a request came. It uses for this the higher bits of the `axi_id` field of a request. The number of bits can be calculated with:
The Multiplexer module has a simpler structure than the demultiplexer introduced in the previous section. The requests on the AW and AR channels get merged with the same round robin arbitration used for merging the responses in the demultiplexer. One key difference however is the mechanism how the multiplexer determines from which slave port a request came. It uses for this the higher bits of the `axi_id` field of a request. The number of bits can be calculated with:

```systemverilog
$clog2(NoSlavePorts)
Expand Down
4 changes: 2 additions & 2 deletions doc/axi_xbar.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

## Design Overview

`axi_xbar` is a fully-connected crossbar, which means that each master module that is connected to a *slave port* for of the crossbar has direct wires to all slave modules that are connected to the *master ports* of the crossbar.
`axi_xbar` is a fully-connected crossbar, which means that each master module that is connected to a *slave port* of the crossbar has direct wires to all slave modules that are connected to the *master ports* of the crossbar.
A block-diagram of the crossbar is shown below:

![Block-diagram showing the design of the full AXI4 Crossbar.](axi_xbar.png "Block-diagram showing the design of the full AXI4 Crossbar.")
Expand Down Expand Up @@ -49,7 +49,7 @@ The crossbar is configured through the `Cfg` parameter with a `axi_pkg::xbar_cfg
| `LatencyMode` | `enum logic [9:0]` | Latency on the individual channels, defined in detail in section *Pipelining and Latency* below. |
| `AxiIdWidthSlvPorts` | `int unsigned` | The AXI ID width of the slave ports. |
| `AxiIdUsedSlvPorts` | `int unsigned` | The number of slave port ID bits (starting at the least significant) the crossbar uses to determine the uniqueness of an AXI ID (see section *Ordering and Stalls* below). This value has to be less or equal than `AxiIdWidthSlvPorts`. |
| `UniqueIds` | `bit` | If you can guarantee that the ID of each transaction is always unique among all in-flight transactions in the same direction, setting this parameter to `1'b1` simplifies the crossbar. See the [`axi_demux` documentation](axi_demux#ordering-and-stalls) for details. |
| `UniqueIds` | `bit` | If you can guarantee that the ID of each transaction is always unique among all in-flight transactions in the same direction, setting this parameter to `1'b1` simplifies the crossbar. See the [`axi_demux` documentation](axi_demux.md#ordering-and-stalls) for details. |
| `AxiAddrWidth` | `int unsigned` | The AXI address width. |
| `AxiDataWidth` | `int unsigned` | The AXI data width. |
| `NoAddrRules` | `int unsigned` | The number of address map rules. |
Expand Down
43 changes: 27 additions & 16 deletions scripts/run_vsim.sh
Original file line number Diff line number Diff line change
Expand Up @@ -175,21 +175,6 @@ exec_test() {
done
done
;;
axi_xbar)
for NumMst in 1 6; do
for NumSlv in 1 8; do
for Atop in 0 1; do
for Exclusive in 0 1; do
for UniqueIds in 0 1; do
call_vsim tb_axi_xbar -gTbNumMasters=$NumMst -gTbNumSlaves=$NumSlv \
-gTbEnAtop=$Atop -gTbEnExcl=$Exclusive \
-gTbUniqueIds=$UniqueIds
done
done
done
done
done
;;
axi_to_mem_banked)
for MEM_LAT in 1 2; do
for BANK_FACTOR in 1 2; do
Expand Down Expand Up @@ -219,7 +204,7 @@ exec_test() {
MST_ID=5
for DATA_WIDTH in 64 256; do
for PIPE in 0 1; do
call_vsim tb_axi_xbar -t 1ns -voptargs="+acc" \
call_vsim tb_axi_xbar -t 1ns \
-gTbNumMasters=$NUM_MST \
-gTbNumSlaves=$NUM_SLV \
-gTbAxiIdWidthMasters=$MST_ID \
Expand All @@ -241,6 +226,32 @@ exec_test() {
done
done
;;
axi_mcast_xbar)
for GEN_ATOP in 0 1; do
for NUM_MST in 1 6; do
for NUM_SLV in 2 9; do
for MST_ID_USE in 3 5; do
MST_ID=5
for DATA_WIDTH in 64 256; do
for PIPE in 0; do
for UNIQUE_IDS in 0; do
call_vsim tb_axi_mcast_xbar -t 1ns \
-gTbNumMasters=$NUM_MST \
-gTbNumMcastSlaves=$NUM_SLV \
-gTbAxiIdWidthMasters=$MST_ID \
-gTbAxiIdUsed=$MST_ID_USE \
-gTbAxiDataWidth=$DATA_WIDTH \
-gTbPipeline=$PIPE \
-gTbEnAtop=$GEN_ATOP \
-gTbUniqueIds=$UNIQUE_IDS
done
done
done
done
done
done
done
;;
*)
call_vsim tb_$1 -t 1ns -coverage -voptargs="+acc +cover=bcesfx"
;;
Expand Down
146 changes: 146 additions & 0 deletions src/axi_demux_id_counters.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright (c) 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors:
// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
// - Michael Rogenmoser <michaero@iis.ee.ethz.ch>
// - Thomas Benz <tbenz@iis.ee.ethz.ch>
// - Andreas Kurth <akurth@iis.ee.ethz.ch>

`include "common_cells/registers.svh"

module axi_demux_id_counters #(
// the lower bits of the AXI ID that should be considered, results in 2**AXI_ID_BITS counters
parameter int unsigned AxiIdBits = 2,
parameter int unsigned CounterWidth = 4,
parameter type mst_port_select_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// lookup
input logic [AxiIdBits-1:0] lookup_axi_id_i,
output mst_port_select_t lookup_mst_select_o,
output logic lookup_mst_select_occupied_o,
// push
output logic full_o,
input logic [AxiIdBits-1:0] push_axi_id_i,
input mst_port_select_t push_mst_select_i,
input logic push_i,
// inject ATOPs in AR channel
input logic [AxiIdBits-1:0] inject_axi_id_i,
input logic inject_i,
// pop
input logic [AxiIdBits-1:0] pop_axi_id_i,
input logic pop_i,
// outstanding transactions
output logic any_outstanding_trx_o
);
localparam int unsigned NoCounters = 2**AxiIdBits;
typedef logic [CounterWidth-1:0] cnt_t;

// registers, each gets loaded when push_en[i]
mst_port_select_t [NoCounters-1:0] mst_select_q;

// counter signals
logic [NoCounters-1:0] push_en, inject_en, pop_en, occupied, cnt_full;

//-----------------------------------
// Lookup
//-----------------------------------
assign lookup_mst_select_o = mst_select_q[lookup_axi_id_i];
assign lookup_mst_select_occupied_o = occupied[lookup_axi_id_i];
//-----------------------------------
// Push and Pop
//-----------------------------------
assign push_en = (push_i) ? (1 << push_axi_id_i) : '0;
assign inject_en = (inject_i) ? (1 << inject_axi_id_i) : '0;
assign pop_en = (pop_i) ? (1 << pop_axi_id_i) : '0;
assign full_o = |cnt_full;
//-----------------------------------
// Status
//-----------------------------------
assign any_outstanding_trx_o = |occupied;

// counters
for (genvar i = 0; i < NoCounters; i++) begin : gen_counters
logic cnt_en, cnt_down, overflow;
cnt_t cnt_delta, in_flight;
always_comb begin
unique case ({push_en[i], inject_en[i], pop_en[i]})
3'b001 : begin // pop_i = -1
cnt_en = 1'b1;
cnt_down = 1'b1;
cnt_delta = cnt_t'(1);
end
3'b010 : begin // inject_i = +1
cnt_en = 1'b1;
cnt_down = 1'b0;
cnt_delta = cnt_t'(1);
end
// 3'b011, inject_i & pop_i = 0 --> use default
3'b100 : begin // push_i = +1
cnt_en = 1'b1;
cnt_down = 1'b0;
cnt_delta = cnt_t'(1);
end
// 3'b101, push_i & pop_i = 0 --> use default
3'b110 : begin // push_i & inject_i = +2
cnt_en = 1'b1;
cnt_down = 1'b0;
cnt_delta = cnt_t'(2);
end
3'b111 : begin // push_i & inject_i & pop_i = +1
cnt_en = 1'b1;
cnt_down = 1'b0;
cnt_delta = cnt_t'(1);
end
default : begin // do nothing to the counters
cnt_en = 1'b0;
cnt_down = 1'b0;
cnt_delta = cnt_t'(0);
end
endcase
end

delta_counter #(
.WIDTH ( CounterWidth ),
.STICKY_OVERFLOW ( 1'b0 )
) i_in_flight_cnt (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.clear_i ( 1'b0 ),
.en_i ( cnt_en ),
.load_i ( 1'b0 ),
.down_i ( cnt_down ),
.delta_i ( cnt_delta ),
.d_i ( '0 ),
.q_o ( in_flight ),
.overflow_o ( overflow )
);
assign occupied[i] = |in_flight;
assign cnt_full[i] = overflow | (&in_flight);

// holds the selection signal for this id
`FFLARN(mst_select_q[i], push_mst_select_i, push_en[i], '0, clk_i, rst_ni)

// pragma translate_off
`ifndef VERILATOR
`ifndef XSIM
// Validate parameters.
cnt_underflow: assert property(
@(posedge clk_i) disable iff (~rst_ni) (pop_en[i] |=> !overflow)) else
$fatal(1, "axi_demux_id_counters > Counter: %0d underflowed.\
The reason is probably a faulty AXI response.", i);
`endif
`endif
// pragma translate_on
end
endmodule

Loading