Skip to content

Commit b6bd646

Browse files
committed
cache hit timing optimization
1 parent 8f29ad5 commit b6bd646

File tree

9 files changed

+265
-358
lines changed

9 files changed

+265
-358
lines changed

hw/rtl/cache/VX_bank_flush.sv

+4-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ module VX_bank_flush #(
3333
output wire flush_init,
3434
output wire flush_valid,
3535
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
36-
output wire [NUM_WAYS-1:0] flush_way,
36+
output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way,
3737
input wire flush_ready,
3838
input wire mshr_empty,
3939
input wire bank_empty
@@ -113,17 +113,10 @@ module VX_bank_flush #(
113113
assign flush_valid = (state == STATE_FLUSH);
114114
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
115115

116-
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
117-
VX_decoder #(
118-
.N (`CS_WAY_SEL_BITS),
119-
.D (NUM_WAYS)
120-
) ctr_decoder (
121-
.sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
122-
.data_in (1'b1),
123-
.data_out (flush_way)
124-
);
116+
if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way
117+
assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS];
125118
end else begin : g_flush_way_all
126-
assign flush_way = {NUM_WAYS{1'b1}};
119+
assign flush_way = '0;
127120
end
128121

129122
endmodule

hw/rtl/cache/VX_cache_bank.sv

+63-114
Large diffs are not rendered by default.

hw/rtl/cache/VX_cache_data.sv

+56-67
Original file line numberDiff line numberDiff line change
@@ -41,46 +41,31 @@ module VX_cache_data #(
4141
input wire read,
4242
input wire write,
4343
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
44-
input wire [NUM_WAYS-1:0] evict_way,
44+
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
4545
input wire [NUM_WAYS-1:0] tag_matches,
4646
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
4747
input wire [`CS_WORD_WIDTH-1:0] write_word,
4848
input wire [WORD_SIZE-1:0] write_byteen,
4949
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
5050
// outputs
51-
output wire [`CS_WORD_WIDTH-1:0] read_data,
52-
output wire line_dirty,
53-
output wire [`CS_LINE_WIDTH-1:0] evict_data,
51+
output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx,
52+
output wire [`CS_LINE_WIDTH-1:0] read_data,
53+
output wire evict_dirty,
5454
output wire [LINE_SIZE-1:0] evict_byteen
5555
);
5656
`UNUSED_PARAM (WORD_SIZE)
5757
`UNUSED_VAR (stall)
5858

59-
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
60-
61-
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
62-
6359
if (WRITEBACK != 0) begin : g_writeback
6460
localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0);
65-
wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r;
66-
67-
VX_onehot_encoder #(
68-
.N (NUM_WAYS)
69-
) fill_way_enc (
70-
.data_in (evict_way),
71-
.data_out (evict_way_idx),
72-
`UNUSED_PIN (valid_out)
73-
);
74-
75-
`BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1);
7661

7762
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata;
7863
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata;
7964
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren;
8065

8166
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
8267
wire evict = fill || flush;
83-
wire evict_way_en = (NUM_WAYS == 1) || evict_way[i];
68+
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
8469
wire dirty_data = write; // only asserted on writes
8570
wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]);
8671
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
@@ -121,54 +106,47 @@ module VX_cache_data #(
121106
);
122107

123108
if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen
124-
assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r];
109+
assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx];
125110
end else begin : g_line_dirty
126-
assign line_dirty = byteen_rdata[evict_way_idx_r];
111+
assign evict_dirty = byteen_rdata[way_idx];
127112
assign evict_byteen = '1;
128113
end
129114

130-
assign evict_data = line_rdata[evict_way_idx_r];
131-
132115
end else begin : g_no_writeback
133116
`UNUSED_VAR (init)
134117
`UNUSED_VAR (flush)
135-
assign line_dirty = 0;
136-
assign evict_data = '0;
118+
assign evict_dirty = 0;
137119
assign evict_byteen = '0;
138120
end
139121

140-
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
141-
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
142-
wire [BYTEENW-1:0] line_wren;
122+
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
143123

144-
wire fill_way_en = (NUM_WAYS == 1) || evict_way[i];
124+
if (WRITE_ENABLE) begin : g_data_store
125+
// create a single write-enable block ram to reduce area overhead
126+
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
127+
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
128+
wire line_write;
129+
wire line_read;
145130

146-
if (WRITE_ENABLE != 0) begin : g_wdata
131+
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
132+
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
147133
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
148134
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
149135
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
150136
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
151137
end
152-
assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
153-
assign line_wren = {LINE_SIZE{fill && fill_way_en}}
154-
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
155-
156-
end else begin : g_ro_wdata
157-
`UNUSED_VAR (write)
158-
`UNUSED_VAR (write_byteen)
159-
`UNUSED_VAR (write_word)
160-
`UNUSED_VAR (word_idx)
161-
assign line_wdata = fill_data;
162-
assign line_wren = fill_way_en;
138+
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
139+
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
140+
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
163141
end
164142

165-
wire line_write = fill || (write && WRITE_ENABLE);
166-
wire line_read = read || ((fill || flush) && WRITEBACK);
143+
assign line_write = fill || (write && WRITE_ENABLE);
144+
assign line_read = read || ((fill || flush) && WRITEBACK);
167145

168146
VX_sp_ram #(
169-
.DATAW (`CS_LINE_WIDTH),
147+
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
170148
.SIZE (`CS_LINES_PER_BANK),
171-
.WRENW (BYTEENW),
149+
.WRENW (NUM_WAYS * LINE_SIZE),
172150
.OUT_REG (1)
173151
) data_store (
174152
.clk (clk),
@@ -178,35 +156,46 @@ module VX_cache_data #(
178156
.wren (line_wren),
179157
.addr (line_idx),
180158
.wdata (line_wdata),
181-
.rdata (line_rdata[i])
159+
.rdata (line_rdata)
182160
);
161+
end else begin : g_data_store
162+
`UNUSED_VAR (write)
163+
`UNUSED_VAR (write_byteen)
164+
`UNUSED_VAR (write_word)
165+
`UNUSED_VAR (word_idx)
166+
167+
// we don't merge the ways into a single block ram due to WREN overhead
168+
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
169+
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
170+
VX_sp_ram #(
171+
.DATAW (`CS_LINE_WIDTH),
172+
.SIZE (`CS_LINES_PER_BANK),
173+
.OUT_REG (1)
174+
) data_store (
175+
.clk (clk),
176+
.reset (reset),
177+
.read (read),
178+
.write (fill && fill_way_en),
179+
.wren (1'b1),
180+
.addr (line_idx),
181+
.wdata (fill_data),
182+
.rdata (line_rdata[i])
183+
);
184+
end
183185
end
184186

185-
wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx;
187+
wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx;
188+
186189
VX_onehot_encoder #(
187190
.N (NUM_WAYS)
188-
) hit_idx_enc (
191+
) way_idx_enc (
189192
.data_in (tag_matches),
190-
.data_out (hit_way_idx),
193+
.data_out (hit_idx),
191194
`UNUSED_PIN (valid_out)
192195
);
193196

194-
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
195-
// order the data layout to perform ways multiplexing last.
196-
// this allows converting way index to binary in parallel with BRAM read and word indexing.
197-
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
198-
VX_transpose #(
199-
.DATAW (`CS_WORD_WIDTH),
200-
.N (NUM_WAYS),
201-
.M (`CS_WORDS_PER_LINE)
202-
) transpose (
203-
.data_in (line_rdata),
204-
.data_out (transposed_rdata)
205-
);
206-
assign read_data = transposed_rdata[word_idx][hit_way_idx];
207-
end else begin : g_read_data_1w
208-
`UNUSED_VAR (word_idx)
209-
assign read_data = line_rdata[hit_way_idx];
210-
end
197+
`BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1);
198+
199+
assign read_data = line_rdata[way_idx];
211200

212201
endmodule

hw/rtl/cache/VX_cache_define.vh

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
`define CS_LINE_WIDTH (8 * LINE_SIZE)
2323
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
2424
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
25+
`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS)
2526

2627
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
2728
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)

0 commit comments

Comments
 (0)