Skip to content

Commit

Permalink
Merge pull request #70 from JonasDann/master
Browse files Browse the repository at this point in the history
Fixed perf_fpga overflow bug
  • Loading branch information
maximilianheer authored Aug 15, 2024
2 parents c718117 + fa3e645 commit bf9d01b
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 16 deletions.
8 changes: 4 additions & 4 deletions examples_hw/apps/perf_fpga/vfpga_top.svh
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ end
always_comb begin
// Requests
sq_rd.data = 0;
sq_rd.data.opcode = 5'd1;
sq_rd.data.strm = 2'b0;
sq_rd.data.opcode = LOCAL_READ;
sq_rd.data.strm = STRM_HOST;
sq_rd.data.mode = 0;
sq_rd.data.rdma = 0;
sq_rd.data.remote = 0;
Expand All @@ -173,8 +173,8 @@ always_comb begin
sq_rd.valid = (state_C == ST_READ) && ~done_req;

sq_wr.data = 0;
sq_wr.data.opcode = 5'd2;
sq_wr.data.strm = 2'b0;
sq_wr.data.opcode = LOCAL_WRITE;
sq_wr.data.strm = STRM_HOST;
sq_wr.data.mode = 0;
sq_wr.data.rdma = 0;
sq_wr.data.remote = 0;
Expand Down
14 changes: 11 additions & 3 deletions examples_sw/apps/perf_fpga/main.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <iostream>
#include <algorithm>
#include <string>
#include <malloc.h>
#include <time.h>
Expand Down Expand Up @@ -28,7 +29,7 @@ constexpr auto const defTargetVfid = 0;
constexpr auto const nReps = 1;
constexpr auto const defSize = 128; // 2^7
constexpr auto const maxSize = 16 * 1024;
constexpr auto const clkNs = 1000.0 / 300.0;
constexpr auto const clkNs = 1000.0 / 250.0;
constexpr auto const nBenchRuns = 100;

/**
Expand Down Expand Up @@ -115,12 +116,19 @@ int main(int argc, char *argv[])
while(curr_size <= maxSize) {
for(int j = 0; j < nBenchRuns; j++) {
time_bench_rd.emplace_back(benchmark_run(cthread, hMem, BenchOper::START_RD));
memset(hMem, 0xEA, maxSize);
time_bench_wr.emplace_back(benchmark_run(cthread, hMem, BenchOper::START_WR));
for (size_t i = 0; i < curr_size; i++) {
uint8_t value = (i / 64 + 1) >> std::min(((i % 64) * 8), (size_t) 63);
if (((int8_t *) hMem)[i] != value) {
std::cout << "hMem[" << i << "] value " << (uint32_t) ((uint8_t *) hMem)[i] << " should be " << (uint8_t) value << std::endl;
}
}
}
std::cout << std::fixed << std::setprecision(2);
std::cout << std::setw(8) << curr_size << " [bytes], RD: "
<< std::setw(8) << ((n_reps * 1024 * curr_size) / vctr_avg(time_bench_rd)) << " [MB/s], WR: "
<< std::setw(8) << ((n_reps * 1024 * curr_size) / vctr_avg(time_bench_wr)) << " [MB/s]" << std::endl;
<< std::setw(8) << (((double) n_reps * 1024 * curr_size) / vctr_avg(time_bench_rd)) << " [MB/s], WR: "
<< std::setw(8) << (((double) n_reps * 1024 * curr_size) / vctr_avg(time_bench_wr)) << " [MB/s]" << std::endl;

time_bench_rd.clear();
time_bench_wr.clear();
Expand Down
2 changes: 1 addition & 1 deletion examples_sw/apps/perf_local/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ int main(int argc, char *argv[])
};
bench.runtime(benchmark_thr);
std::cout << std::fixed << std::setprecision(2);
std::cout << "Size: " << std::setw(8) << curr_size << ", thr: " << std::setw(8) << (n_regions * 1000 * curr_size) / (bench.getAvg() / n_reps_thr) << " MB/s";
std::cout << "Size: " << std::setw(8) << curr_size << ", thr: " << std::setw(8) << ((double) n_regions * 1000 * curr_size) / (bench.getAvg() / n_reps_thr) << " MB/s";
#ifndef EN_LAT_TESTS
std::cout << std::endl;
#endif
Expand Down
9 changes: 6 additions & 3 deletions scripts/wr_hdl/template_gen/lynx_pkg_tmplt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ package lynxTypes;
parameter integer OPCODE_BITS = 5;
parameter integer STRM_BITS = 2;

parameter integer LOCAL_READ = 1;
parameter integer LOCAL_WRITE = 2;

parameter integer STRM_CARD = 0;
parameter integer STRM_HOST = 1;
parameter integer STRM_TCP = 2;
Expand Down Expand Up @@ -390,16 +393,16 @@ package lynxTypes;

typedef struct packed {
// Opcode
logic [OPCODE_BITS-1:0] opcode;
logic [STRM_BITS-1:0] strm;
logic [OPCODE_BITS-1:0] opcode; // One of the values of fpga::CoyoteOper
logic [STRM_BITS-1:0] strm; // One of STRM_CARD, STRM_HOST, STRM_TCP, or STRM_RDMA
logic mode;
logic rdma;
logic remote;

// ID
logic [DEST_BITS-1:0] vfid; // rsrvd
logic [PID_BITS-1:0] pid;
logic [DEST_BITS-1:0] dest;
logic [DEST_BITS-1:0] dest; // The index of the AXI stream that data arrives at/departs from

// FLAGS
logic last;
Expand Down
10 changes: 5 additions & 5 deletions sw/include/cDefs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@ enum class CoyoteOperNew {

enum class CoyoteOper {
NOOP = 0,
LOCAL_READ = 1,
LOCAL_WRITE = 2,
LOCAL_TRANSFER = 3,
LOCAL_OFFLOAD = 4,
LOCAL_SYNC = 5,
LOCAL_READ = 1, // Transfer data from CPU or FPGA memory to FPGA stream (depending on sgEntry.local.src_stream)
LOCAL_WRITE = 2, // Transfer data from FPGA stream to CPU or FPGA memory (depending on sgEntry.local.dst_stream)
LOCAL_TRANSFER = 3, // LOCAL_READ and LOCAL_WRITE in parallel
LOCAL_OFFLOAD = 4, // Transfer data from CPU memory to FPGA memory
LOCAL_SYNC = 5, // Transfer data from FPGA memory to CPU memory
REMOTE_RDMA_READ = 6,
REMOTE_RDMA_WRITE = 7,
REMOTE_RDMA_SEND = 8,
Expand Down

0 comments on commit bf9d01b

Please sign in to comment.