diff --git a/Documentation/01_Module_Docs/10_L1_Data_Cache.md b/Documentation/01_Module_Docs/10_L1_Data_Cache.md index 18f1ad07..b4f9f5fe 100644 --- a/Documentation/01_Module_Docs/10_L1_Data_Cache.md +++ b/Documentation/01_Module_Docs/10_L1_Data_Cache.md @@ -1,41 +1,72 @@ -# THIS MODULE IS NOT YET OUTLINED # - -# L1 Data Cache # -(Any Notes would go here) - -## Contents -* [Inputs](#inputs) -* [Outputs](#outputs) -* [Functionality](#functionality) - * [Registers](#registers) - * [Clk](#on-posedge-clk) - * [Active low reset](#asynchronous-active-low-reset) - -## Inputs -|Name|Bits wide| -|:---|:---:| -|```name```|#-bit| -|```name```|#-bit| - -## Outputs -|Name|Bits wide| -|:---|:---:| -|```name```|#-bit| -|```name```|#-bit| +# L1_Data_Cache Module + +## Overview +`L1_Data_Cache` is a Verilog module designed to simulate an L1 data cache with a write-back policy and a least recently used (LRU) replacement algorithm. This module is a part of a larger memory hierarchy system and interacts with lower memory levels (like L2 cache). + +## Module Interface + +### Inputs +- **clk**: Clock signal. +- **reset**: Reset signal. +- **write_enable**: Enable signal for write operations. +- **read_enable**: Enable signal for read operations. +- **request_address**: Address for the current read/write request. +- **write_data**: Data to be written into the cache. +- **l2_response_data**: Data received from the L2 cache. +- **l2_ready**: Signal indicating readiness of the L2 cache. + +### Outputs +- **response_data**: Data returned in response to a read operation. +- **c_state**: Current state of the cache (for debugging/monitoring). +- **l2_request**: Signal to request data from L2 cache. +- **l2_write_enable**: Enable signal for writing data to L2 cache. +- **l2_address**: Address for L2 cache operations. +- **l2_write_data**: Data to be written to the L2 cache. + +## Key Parameters and Constants +- **CACHE_SIZE**: Total size of the L1 cache (4 KB). +- **BLOCK_SIZE**: Size of each cache block (4 bytes). +- **ASSOCIATIVITY**: Set associativity of the cache (2-way). +- **DATA_WIDTH**: Width of the data (32 bits). +- **NUM_SETS**: Number of sets in the cache. +- **ADDR_WIDTH**: Width of the memory address. + +## Internal Design + +### SRAM Module +- The cache utilizes an `sram_module` for storing and retrieving cache data. + +### Cache Structure +- The cache is organized into sets and ways, with arrays for tags, valid bits, and dirty bits. +- LRU counters are maintained for each set to implement the LRU replacement policy. + +### State Machine +- The cache operates as a `Mealy`` finite state machine with states: `IDLE`, `CHECK_TAG`, `WRITEBACK`, and `FILL`. ## Functionality -### Registers - - #-bit ```name``` register - - #-bit ```name``` register -### On posedge clk - - ```somebranch = someval``` - - Use a table when necessary if statements are used: - - ```name``` - |Name|Bits wide| - |---|---| - |```name == 0```|```reg = val```| - |```name == 1```|```reg = val```| - - -### Asynchronous active low reset - - Register values reset to 0 + +### Cache Operations +- **Read/Write Access**: On receiving a read or write request, the cache checks for a hit or miss. +- **Write-Back**: On a write miss with a dirty line, data is written back to the lower memory level. +- **Cache Fill**: On a miss, data is loaded from the lower memory level. + +### LRU Algorithm +- The cache uses an LRU algorithm to determine which cache line to replace on a miss. + +### Reset Logic +- A reset functionality is provided to clear the cache and reset its state. + +## Implementation Details + +### Tasks and Functions +- Several tasks and functions are used to modularize operations like handling cache hits, cache misses, LRU calculations, and SRAM interactions. + +### Handling Cache States +- The module defines specific tasks for handling different cache states and transitioning between them based on the current operation. + +## Notes & Scope for Improvements +- This module is designed with considerations for modularity and performance optimization. It can be integrated into larger systems requiring cache functionality with LRU management. +- ECC, byte accessible, Non-blocking cache, different controllers + + +--- diff --git a/Documentation/02_Complex_Module_Functions/03_L1_Data_Cache.md b/Documentation/02_Complex_Module_Functions/03_L1_Data_Cache.md index d4b35f04..b53dc450 100644 --- a/Documentation/02_Complex_Module_Functions/03_L1_Data_Cache.md +++ b/Documentation/02_Complex_Module_Functions/03_L1_Data_Cache.md @@ -1 +1,83 @@ -(Note: This document is currently incomplete) +# L1_Data_Cache Module Detailed Documentation + +## Overview +The `L1_Data_Cache` module in Verilog simulates an L1 data cache with write-back policy and LRU (Least Recently Used) replacement strategy. It's designed to interface with an L2 cache in a memory hierarchy. + +## Module Structure + +### Inputs and Outputs + +#### Inputs +| Input | Description | Bit Width | +|-------------------|---------------------------------------|-----------| +| `clk` | Clock signal | 1 | +| `reset` | Reset signal | 1 | +| `write_enable` | Enable signal for write operations | 1 | +| `read_enable` | Enable signal for read operations | 1 | +| `request_address` | Address for read/write request | 32 | +| `write_data` | Data to write into cache | 32 | +| `l2_response_data`| Data received from L2 cache | 32 | +| `l2_ready` | Signal indicating L2 cache readiness | 1 | + +#### Outputs +| Output | Description | Bit Width | +|-------------------|---------------------------------------|-----------| +| `response_data` | Data output for read operations | 32 | +| `c_state` | Current state of cache (debugging) | 2 | +| `l2_request` | Request signal for L2 cache | 1 | +| `l2_write_enable` | Write enable for L2 cache | 1 | +| `l2_address` | Address for L2 cache operations | 32 | +| `l2_write_data` | Data to write to L2 cache | 32 | + +### Parameters and Constants + +| Parameter | Description | Value / Formula | +|-------------------|-------------------------------------|--------------------------------| +| `CACHE_SIZE` | Total size of L1 cache | 4 * 1024 (4 KB) | +| `BLOCK_SIZE` | Size of each cache block | 4 bytes | +| `ASSOCIATIVITY` | Set associativity of cache | 2-way | +| `DATA_WIDTH` | Width of data | 32 bits | +| `BLOCK_WIDTH` | Width of each cache block | `BLOCK_SIZE * 8` | +| `NUM_SETS` | Number of sets in cache | `CACHE_SIZE / (BLOCK_SIZE * ASSOCIATIVITY)` | +| `ADDR_WIDTH` | Width of memory address | 32 bits | +| `OFFSET_WIDTH` | Width of block offset | Calculated from `BLOCK_SIZE` | +| `INDEX_WIDTH` | Width of cache set index | Calculated from `NUM_SETS` | +| `TAG_WIDTH` | Width of cache tag | `ADDR_WIDTH - OFFSET_WIDTH - INDEX_WIDTH` | + +### Cache Structure + +| Component | Description | Type / Bit Width | +|-------------------|-----------------------------------|---------------------------------| +| `cache_tags` | Stores tags for each cache line | Array of TAG_WIDTH | +| `valid` | Indicates if a cache line is valid| Array of 1-bit flags | +| `dirty` | Indicates if a cache line is dirty| Array of 1-bit flags | +| `lru_counter` | LRU counters for each cache line | Array of counters | + +## Functional Overview + +### State Machine +The cache operates through a state machine with distinct states for handling various operations: +- **IDLE**: Awaiting read or write requests. +- **CHECK_TAG**: Checking if the requested address is in the cache (cache hit) or not (cache miss). +- **WRITEBACK**: Writing back data to the L2 cache for dirty cache lines on a miss. +- **FILL**: Fetching data from the L2 cache to fill the cache line on a miss. + +### Cache Operations +- **Read/Write**: Handles read and write operations, checking for cache hits or misses. +- **Write-Back**: Writes back dirty data to L2 cache on misses. +- **Cache Fill**: Loads data from L2 cache on misses. + +### LRU Algorithm +- Implements an LRU algorithm for cache line replacement. + +### Modular Design +- The module leverages tasks and functions to compartmentalize operations such as hit/miss handling, LRU logic, and SRAM interactions. + +### State Handling +- Dedicated tasks manage state transitions and actions, ensuring clear and maintainable state logic. + +## Integration and Performance +- The design is optimized for integration into systems requiring L1 cache functionality, with a focus on performance and modularity. + + +--- diff --git a/dv/sVerilogTests/L1_Data_Cache.sv b/dv/sVerilogTests/L1_Data_Cache.sv new file mode 100644 index 00000000..f037d572 --- /dev/null +++ b/dv/sVerilogTests/L1_Data_Cache.sv @@ -0,0 +1,295 @@ +`timescale 1ns / 1ps + +module tb_L1_Data_Cache; + parameter CLOCK_PERIOD = 10; + reg clk; + reg reset; + reg write_enable; + reg read_enable; + reg [31:0] request_address; + reg [31:0] write_data; + wire [31:0] response_data; + wire [1:0] c_state; + + // L2 Cache + reg [31:0] l2_response_data; + reg l2_ready; + reg l2_request; + reg l2_write_enable; + wire [31:0] l2_address; + wire [31:0] l2_write_data; + + // Hardcoded parameters for the data cache + localparam L2_CACHE_SIZE = 16 * 1024; // Cache size: 4 KB + localparam L2_BLOCK_SIZE = 4; // Block size: 4 bytes (32 bits) + localparam L2_ASSOCIATIVITY = 4; // 2-way + localparam L2_DATA_WIDTH = 32; + + localparam L2_BLOCK_WIDTH = L2_BLOCK_SIZE * 8; + localparam L2_NUM_SETS = L2_CACHE_SIZE/(L2_BLOCK_SIZE * L2_ASSOCIATIVITY); + localparam L2_ADDR_WIDTH = 32; + + // Calculating the number of bits for offset, index, and tag + localparam L2_OFFSET_WIDTH = $clog2(L2_BLOCK_SIZE); + localparam L2_INDEX_WIDTH = $clog2(L2_NUM_SETS); + localparam L2_TAG_WIDTH = L2_ADDR_WIDTH - L2_OFFSET_WIDTH - L2_INDEX_WIDTH; + + // L2 Cache Address Decomposition + reg [L2_TAG_WIDTH-1:0] current_tag; + reg [L2_INDEX_WIDTH-1:0] current_index; + reg [L2_OFFSET_WIDTH-1:0] current_offset; + + L1_Data_Cache dut ( + .clk(clk), + .reset(reset), + .write_enable(write_enable), + .read_enable(read_enable), + .request_address(request_address), + .write_data(write_data), + .response_data(response_data), + .c_state(c_state), + + .l2_response_data(l2_response_data), + .l2_ready(l2_ready), + .l2_request(l2_request), + .l2_write_enable(l2_write_enable), + .l2_address(l2_address), + .l2_write_data(l2_write_data) + ); + + // L2 Cache Data Storage + reg [L2_BLOCK_WIDTH-1:0] l2_cache_data [0: L2_NUM_SETS-1][0: L2_ASSOCIATIVITY-1]; + reg [L2_TAG_WIDTH - 1:0] l2_cache_tags [0:L2_NUM_SETS-1][0:L2_ASSOCIATIVITY-1]; + + initial begin + + l2_cache_data [512][0] <= 32'hAAAAAAAA; + l2_cache_data [512][1] <= 32'hBBBBBBBB; + l2_cache_data [512][2] <= 32'hCCCCCCCC; + l2_cache_data [512][3] <= 32'hDDDDDDDD; + + l2_cache_data [524][0] <= 32'hEEEEEEEE; + l2_cache_data [524][1] <= 32'hFFFFFFFF; + l2_cache_data [524][2] <= 32'hABCDEFAB; + l2_cache_data [524][3] <= 32'hFEDCBAFE; + + l2_cache_data [1023][0] <= 32'h11111111; + l2_cache_data [1023][1] <= 32'h22222222; + l2_cache_data [1023][2] <= 32'h33333333; + l2_cache_data [1023][3] <= 32'h44444444; + + + l2_cache_tags [512][0] <= 20'h00000; //32'h00000802 + l2_cache_tags [512][1] <= 20'hABCDE; //32'hABCDE802 + l2_cache_tags [512][2] <= 20'hAAAAA; //32'hAAAAA802 + l2_cache_tags [512][3] <= 20'hFFFFF; //32'hFFFFF802 + + l2_cache_tags [524][0] <= 20'h00000; // 32'h00000832 + l2_cache_tags [524][1] <= 20'hABCDE; // 32'hABCDE832 + l2_cache_tags [524][2] <= 20'hAAAAA; // 32'hAAAAA832 + l2_cache_tags [524][3] <= 20'hFFFFF; // 32'hFFFFF832 + + l2_cache_tags [1023][0] <= 20'h00000; // 32'h00000FFF + l2_cache_tags [1023][1] <= 20'hABCDE; // 32'hABCDEFFF + l2_cache_tags [1023][2] <= 20'hAAAAA; // 32'hAAAAAFFF + l2_cache_tags [1023][3] <= 20'hFFFFF; // 32'hFFFFFFFF + end + + initial begin + clk =0; + forever #(CLOCK_PERIOD/2) clk = ~clk; + end + + initial begin + reset_signals(); + edge_cases_testing(); + sequential_consecutive_testing(); + $finish; + end + + task edge_cases_testing(); + begin + // ================= L1 SET 0 test, L2 set 512 ================= + test_cache_read_miss(32'h00000802, 32'hAAAAAAAA); + test_cache_read_miss(32'hABCDE802, 32'hBBBBBBBB); + test_cache_read_miss(32'hAAAAA802, 32'hCCCCCCCC); + test_cache_read_miss(32'hFFFFF802, 32'hDDDDDDDD); + + test_cache_read_hit (32'hAAAAA802, 32'hCCCCCCCC); + test_cache_read_hit (32'hFFFFF802, 32'hDDDDDDDD); + + test_cache_read_miss(32'h00000802, 32'hAAAAAAAA); + test_cache_read_miss(32'hABCDE802, 32'hBBBBBBBB); + + test_cache_read_hit (32'h00000802, 32'hAAAAAAAA); + test_cache_read_hit (32'hABCDE802, 32'hBBBBBBBB); + + // ================= L1 SET 511 test, L2 set 1023 ================= + test_cache_read_miss(32'h00000FFF, 32'h11111111); + test_cache_read_miss(32'hABCDEFFF, 32'h22222222); + test_cache_read_miss(32'hAAAAAFFF, 32'h33333333); + test_cache_read_miss(32'hFFFFFFFF, 32'h44444444); + + test_cache_read_hit (32'hAAAAAFFF, 32'h33333333); + test_cache_read_hit (32'hFFFFFFFF, 32'h44444444); + + test_cache_read_miss(32'h00000FFF, 32'h11111111); + test_cache_read_miss(32'hABCDEFFF, 32'h22222222); + + test_cache_read_hit (32'h00000FFF, 32'h11111111); + test_cache_read_hit (32'hABCDEFFF, 32'h22222222); + end + endtask + + task sequential_consecutive_testing; + begin + // ================= SEQUENTIAL TESTING ================= + read_enable =1; + request_address = 32'h00000832; + #51 read_enable = 0; #9; // fill and read + // lru counter 0: 1 && lru counter 1: 0 - 32'h00000832 + + #10 read_enable = 1; + request_address = 32'hABCDE832; + #51 read_enable = 0; #9; + // lru counter 0: 0 - 32'hABCDE832 && lru counter 1: 1 - 32'h00000832 + + test_cache_read_hit (32'h00000832, 32'hEEEEEEEE); + // lru counter 0: 1 - 32'hABCDE832 && lru counter 1: 0 - 32'h00000832 + + test_cache_read_hit (32'hABCDE832, 32'hFFFFFFFF); + // lru counter 0: 0 - 32'hABCDE832 && lru counter 1: 1 - 32'h00000832 + + test_cache_read_miss(32'hAAAAA832, 32'hABCDEFAB); + // lru counter 0: 1 - 32'hABCDE832 && lru counter 1: 0 - 32'hAAAAA832 + + test_cache_read_hit (32'hABCDE832, 32'hFFFFFFFF); + // lru counter 0: 0 - 32'hABCDE832 && lru counter 1: 1 - 32'hAAAAA832 + + test_cache_read_miss(32'h00000832, 32'hEEEEEEEE); + // lru counter 0: 1 - 32'hABCDE832 && lru counter 1: 0 - 32'h00000832 + + test_cache_write_hit(32'hABCDE832, 32'hBEEF_DEAD); + // dirty 1 for 32'hABCDE832 with beef_dead + // lru counter 0: 0 - 32'hABCDE832 && lru counter 1: 1 - 32'h00000832 + + test_cache_read_miss(32'hAAAAA832, 32'hABCDEFAB); + // lru counter 0: 1 - 32'hABCDE832 && lru counter 1: 0 - 32'hAAAAA832 + + test_cache_read_writeback_miss(32'hFFFFF832, 32'hFEDCBAFE); + // lru counter 0: 0 - 32'hFFFFF832 && lru counter 1: 1 - 32'hAAAAA832 + + test_cache_read_hit(32'hAAAAA832, 32'hABCDEFAB); + test_cache_read_hit(32'hFFFFF832, 32'hFEDCBAFE); + test_cache_read_miss(32'h00000832, 32'hEEEEEEEE); + test_cache_read_miss(32'hABCDE832, 32'hFFFFFFFF); + end + endtask + + task reset_signals; + begin + reset = 1; + write_enable = 0; + read_enable = 0; + l2_ready = 0; + l2_request = 0; + l2_write_enable = 0; + write_data = 0; + l2_response_data = 0; + + #15 reset = 0; + end + endtask + + task test_cache_read_hit(input [L2_ADDR_WIDTH-1:0] addr, input [31:0] expected_data); + begin + #10 read_enable = 1; + request_address = addr; + + #21 read_enable = 0; + assert(response_data == expected_data) else $display("Read Hit Test Failed for address %h", addr); + #9; + end + endtask + + task test_cache_read_writeback_miss(input[L2_ADDR_WIDTH-1:0] addr, input [31:0] expected_data); + begin + #10 read_enable = 1; + request_address = addr; + + #81 read_enable = 0; + assert(response_data == expected_data) else $display("Read Miss Test Failed for address %h", addr); + #9; + end + endtask + + task test_cache_write_hit(input [L2_ADDR_WIDTH-1:0] addr, input [31:0] data); + begin + #10 write_enable = 1; + request_address = addr; + write_data = data; + #31 write_enable = 0; #9; + end + endtask + + task test_cache_read_miss(input [L2_ADDR_WIDTH-1:0] addr, input [31:0] expected_data); + begin + #10 read_enable = 1; + request_address = addr; + #51; + read_enable = 0; + assert(response_data == expected_data) else $display("Read Miss Test Failed for address %h", addr); + #9; + end + endtask + + task test_cache_write_miss(input [L2_ADDR_WIDTH-1:0] addr, input [31:0] data); + begin + write_enable = 1; + request_address = addr; + write_data = data; + #41; + write_enable = 0; + end + endtask + + // L2 Cache Simulation Logic + always @(posedge clk) begin + integer i; + reg hit; + reg [31:0] selected_data; + + if (l2_request && !l2_ready) begin + + current_offset = l2_address[L2_OFFSET_WIDTH-1 -: L2_OFFSET_WIDTH]; + current_index = l2_address[(L2_ADDR_WIDTH - L2_TAG_WIDTH - 1)-: L2_INDEX_WIDTH]; + current_tag = l2_address[L2_ADDR_WIDTH-1 -: L2_TAG_WIDTH]; + + hit = 0; + for (i = 0; i < L2_ASSOCIATIVITY; i = i + 1) begin + if (l2_cache_tags[current_index][i] == current_tag) begin + + hit = 1; + selected_data = l2_cache_data[current_index][i]; + break; + end + end + + if (hit) begin + if (l2_write_enable) begin +// l2_cache_data[current_index][i] = l2_write_data; + end else begin + l2_response_data = selected_data; + end + l2_ready = 1; + #10 l2_ready = 0; + end else begin + l2_response_data = 32'hdeadbeef; + l2_ready = 1; + #10 l2_ready = 0; + end + end + end +endmodule + + diff --git a/dv/sVerilogTests/edgeTest#1.png b/dv/sVerilogTests/edgeTest#1.png new file mode 100644 index 00000000..c06ea924 Binary files /dev/null and b/dv/sVerilogTests/edgeTest#1.png differ diff --git a/dv/sVerilogTests/edgeTest#2.png b/dv/sVerilogTests/edgeTest#2.png new file mode 100644 index 00000000..a3e9a5d2 Binary files /dev/null and b/dv/sVerilogTests/edgeTest#2.png differ diff --git a/dv/sVerilogTests/seqTest.png b/dv/sVerilogTests/seqTest.png new file mode 100644 index 00000000..089e2289 Binary files /dev/null and b/dv/sVerilogTests/seqTest.png differ diff --git a/rtl/L1_Data_Cache.sv b/rtl/L1_Data_Cache.sv new file mode 100644 index 00000000..d6246b68 --- /dev/null +++ b/rtl/L1_Data_Cache.sv @@ -0,0 +1,335 @@ +// Write HIT: Write-back: In a write-back cache, data is written to the cache and only later to the main memory when the cache line is replaced. +// Write MISS: Write allocate: when a write miss occurs, the cache line is loaded into the cache, and then the write operation is performed. +// Look-through: which means it checks the main memory for cache misses. +// LRU (Least Recently Used): which means the least recently used cache line is selected for replacement. +// small caches are low prone to the a very low temporal double-bit error rate. + +// 2 words per block., Non blocking cache to implement +// to make synthesis easy we need to write the memory in SRAM style +`timescale 1ns / 1ps +module sram_module( + input wire clk, + input wire write_enable, read_enable, + input wire [8:0] set_index, + input wire way_select, + input wire [31:0] write_data, + output reg [31:0] read_data +); + // Constants for cache configuration + localparam BLOCK_SIZE = 4; //bytes + localparam CACHE_SIZE = 4 * 1024; // 4 KB + localparam ASSOCIATIVITY = 2; + + localparam BLOCK_WIDTH = BLOCK_SIZE * 8; //32 bits + localparam NUM_SETS = CACHE_SIZE/(BLOCK_SIZE * ASSOCIATIVITY); //512 sets + localparam ADDR_WIDTH = 32; + + reg [BLOCK_WIDTH -1:0] memory_array [0:NUM_SETS * ASSOCIATIVITY - 1]; + wire [ADDR_WIDTH -1:0] actual_address = set_index * ASSOCIATIVITY + way_select; +// wire gated_clk = clk & (write_enable | read_enable); //Power saving Consideration +// but the write or read is continously high for this module + + always @(posedge clk) begin + if (write_enable) begin + if (actual_address < (NUM_SETS * ASSOCIATIVITY)) + memory_array[actual_address] <= write_data; + end else if(read_enable)begin + if (actual_address < (NUM_SETS * ASSOCIATIVITY)) + read_data = memory_array[actual_address]; + end + // Handle out of bounds. + end +endmodule : sram_module + +module L1_Data_Cache( + input wire clk, + input wire reset, + input wire write_enable, read_enable, + input wire [31:0] request_address, + input wire [31:0] write_data, + output reg [31:0] response_data, + output reg [1:0] c_state, + + // To LOWER MEMORY + output reg l2_request, + output reg l2_write_enable, + output reg [31:0] l2_address, + output reg [31:0] l2_write_data, + input wire [31:0] l2_response_data, + input wire l2_ready +); + // Hardcoded parameters for the data cache + localparam CACHE_SIZE = 4 * 1024; // Cache size: 4 KB + localparam BLOCK_SIZE = 4; // Block size: 4 bytes (32 bits) + localparam ASSOCIATIVITY = 2; // 2-way + localparam DATA_WIDTH = 32; + + localparam BLOCK_WIDTH = BLOCK_SIZE * 8; //32 bits + localparam NUM_SETS = CACHE_SIZE/(BLOCK_SIZE * ASSOCIATIVITY); // 512 sets + localparam ADDR_WIDTH = 32; + + // Calculating the number of bits for offset, index, and tag + localparam OFFSET_WIDTH = $clog2(BLOCK_SIZE); // 2 bits + localparam INDEX_WIDTH = $clog2(NUM_SETS); // 9 bits + localparam TAG_WIDTH = ADDR_WIDTH - OFFSET_WIDTH - INDEX_WIDTH; // 21 bits + + // Internal Variables + +// reg [BLOCK_WIDTH- 1:0] cache_data [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + //need to add offset access + + reg [TAG_WIDTH - 1:0] cache_tags [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + reg valid [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + reg dirty [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + reg [ASSOCIATIVITY-1:0] lru_counter [0:NUM_SETS-1]; + + reg hit; + reg [ASSOCIATIVITY-1:0] way, lru_way; + + reg sram_read_req =0; + + typedef enum integer {IDLE, CHECK_TAG, WRITEBACK, FILL} cache_state_t; + cache_state_t state = IDLE; + + typedef struct packed{ + logic [31:0] address; + logic [TAG_WIDTH-1:0] tag; + logic [INDEX_WIDTH-1:0] index; + logic [OFFSET_WIDTH-1:0] offset; + }current_address_t; + current_address_t current_addr; + + reg [31:0] sram_read_data; + typedef struct packed{ + logic [31:0] write_data; + logic write_enable; + logic read_enable; + logic way; + logic [8:0] index; + }sram_data_t; + sram_data_t put_sram_data; + + sram_module cache_data_sram ( + .clk(clk), + .write_enable (put_sram_data.write_enable), + .read_enable (put_sram_data.read_enable), + .set_index (put_sram_data.index), + .way_select (put_sram_data.way), + .write_data (put_sram_data.write_data), + .read_data (sram_read_data) + ); + + // LRU Function + function integer get_lru_way(input integer set_index); + integer i; + reg [ASSOCIATIVITY-1:0] max_count; + begin + max_count = 0; + //max_count = -1; -1 here is 32'hFFFFFFFF turnication happens and max_count will be 3 + lru_way = 0; + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (lru_counter[set_index][i] > max_count) begin + max_count = lru_counter[set_index][i]; + lru_way = i; + end + end + get_lru_way = lru_way; + end + endfunction + + // === === === Helper Tasks === === === + task handle_cache_hit; + begin + if (write_enable) begin + set_sram_write_request(current_addr.index, way, write_data); + dirty[current_addr.index][way] <= 1; + state <= IDLE; + end else if (read_enable) begin + if(!sram_read_req) begin + set_sram_read_request(current_addr.index, way); + sram_read_req <= 1; + end else begin + response_data <= sram_read_data; + sram_read_req <=0; + state <= IDLE; + end + end + update_lru_counters(current_addr.index, way); + end + endtask + + task handle_cache_miss; + begin + if (dirty[current_addr.index][lru_way]) begin + state <= WRITEBACK; + end else begin + state <= FILL; + end + end + endtask + + task set_sram_read_request; + input integer index, way; + begin + put_sram_data.read_enable <= 1; + put_sram_data.write_enable <= 0; + put_sram_data.index <= index; + put_sram_data.way <= way; + end + endtask + + task set_sram_write_request; + input integer index, way, data; + begin + put_sram_data.write_enable <= 1; + put_sram_data.read_enable <= 0; + put_sram_data.index <= index; + put_sram_data.way <= way; + put_sram_data.write_data <= data; + end + endtask + + task set_l2_request; + input [31:0] address, data; + input write_enable; + begin + l2_address <= address; + l2_write_enable <= write_enable; + l2_request <= 1; + l2_write_data <= data; + end + endtask + + task update_lru_counters(input integer set_index, input integer accessed_way); + integer i; + begin + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (i == accessed_way) begin + lru_counter[set_index][i] <= 0; + end else if (lru_counter[set_index][i] != (ASSOCIATIVITY - 1)) begin + lru_counter[set_index][i] <= lru_counter[set_index][i] + 1; + end + end + end + endtask + + //=== === === Cache Operation Tasks === === === + task reset_cache; + integer i, j; + begin + // resetting all these in single clock? + // use valid bits to reset may be + state <= IDLE; + for (i = 0; i < NUM_SETS; i = i+1) begin + for (j = 0; j < ASSOCIATIVITY; j = j+1) begin + + set_sram_write_request(i, j, 0); + cache_tags[i][j] <= 0; + valid[i][j] <= 0; + dirty[i][j] <= 0; +// lru_counter[i][j] <= j; // biased + lru_counter[i][j] <= 1; // still no + end + end + end + endtask + + task idle_state_logic; + begin + if (write_enable || read_enable) begin + current_addr.address <= request_address; + current_addr.tag <= request_address[ADDR_WIDTH-1 -: TAG_WIDTH]; + current_addr.index <= request_address[(ADDR_WIDTH - TAG_WIDTH - 1)-: INDEX_WIDTH]; + current_addr.offset <= request_address[OFFSET_WIDTH-1 -: OFFSET_WIDTH]; + state <= CHECK_TAG; + end + end + endtask + + task check_tag_logic; + integer i; + begin + hit = 0; + // have to parallel check for better performance and parallely start the sram + // instantiate the sram in prev state maybe + lru_way = get_lru_way(current_addr.index); + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (valid[current_addr.index][i] && cache_tags[current_addr.index][i] == current_addr.tag) begin + hit = 1; + way = i; + break; + end + end + if (hit) begin + handle_cache_hit(); + end else begin + handle_cache_miss(); + end + end + endtask + + task writeback_logic; + reg [31:0] writeback_address; + begin + writeback_address = {cache_tags[current_addr.index][lru_way], current_addr.index, {OFFSET_WIDTH{1'b0}}}; + if (!sram_read_req) begin + set_sram_read_request(current_addr.index, lru_way); + sram_read_req <= 1; + end else begin + set_l2_request(writeback_address, sram_read_data, 1); + if (l2_ready) begin + l2_write_enable <= 0; + l2_request <= 0; + sram_read_req <= 0; + state <= FILL; + end + end + end + endtask + + task fill_logic; + begin + if (!l2_request) begin + set_l2_request(current_addr.address, 0, 0); + end else if (l2_ready) begin + set_sram_write_request(current_addr.index, lru_way, l2_response_data); + cache_tags[current_addr.index][lru_way] <= current_addr.tag; + valid[current_addr.index][lru_way] <= 1; + dirty[current_addr.index][lru_way] <= 0; + update_lru_counters(current_addr.index, lru_way); + l2_request <= 0; + state <= CHECK_TAG; + end + end + endtask + + // Main Cache Operation + always @(posedge clk or posedge reset) begin + if (reset) begin + reset_cache(); + end else begin + case (state) + IDLE: begin + idle_state_logic(); + c_state <= 0; + end + CHECK_TAG: begin + check_tag_logic(); + c_state <= 1; + end + WRITEBACK: begin + writeback_logic(); + c_state <= 2; + end + FILL: begin + fill_logic(); + c_state <= 3; + end + default: begin + idle_state_logic(); + c_state <= 0; + end + endcase + end + end +endmodule \ No newline at end of file