From 22e59c1faf22394cf983963f8c1abcf795b06304 Mon Sep 17 00:00:00 2001 From: hmaximili Date: Wed, 11 Sep 2024 00:17:13 +0200 Subject: [PATCH] Further documentation + cmake-adaptation for DDR in U250 --- README.md | 2 +- cmake/FindCoyoteHW.cmake | 1 + sw/include/cDefs.hpp | 10 ++--- sw/include/cFunc.hpp | 26 +++++++++++-- sw/include/cLib.hpp | 7 ++++ sw/include/cRnfg.hpp | 25 +++++++------ sw/include/cSched.hpp | 38 ++++++++++++++----- sw/include/cService.hpp | 24 ++++++------ sw/src/cRnfg.cpp | 79 +++++++++++++++++++++++++++++++--------- sw/src/cSched.cpp | 48 +++++++++++++++++------- sw/src/cService.cpp | 64 ++++++++++++++++++++++---------- 11 files changed, 231 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 095213e0..896fdbd0 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Some of **Coyote's** features: Full `Vivado/Vitis` suite is needed to build the hardware side of things. Hardware server will be enough for deployment only scenarios. Coyote runs with `Vivado 2022.1`. Previous versions can be used at one's own peril. -Following AMD platforms are supported: `vcu118`, `Alveo u50`, `Alveo u55c`, `Alveo u200`, `Alveo u250` and `Alveo u280`. Coyote is currently being developed on the HACC cluster at ETH Zurich. For more information and possible external access check out the following link: https://systems.ethz.ch/research/data-processing-on-modern-hardware/hacc.html +We are currently only actively supporting the AMD `Alveo u55c` accelerator card. Our codebase offers some legacy-support for the following platforms: `vcu118`, `Alveo u50`, `Alveo u200`, `Alveo u250` and `Alveo u280`, but we are not actively working with these cards anymore. Coyote is currently being developed on the HACC cluster at ETH Zurich. For more information and possible external access check out the following link: https://systems.ethz.ch/research/data-processing-on-modern-hardware/hacc.html `CMake` is used for project creation. Additionally `Jinja2` template engine for Python is used for some of the code generation. The API is writen in `C++`, 17 should suffice (for now). diff --git a/cmake/FindCoyoteHW.cmake b/cmake/FindCoyoteHW.cmake index 564cb425..bafd9f2e 100644 --- a/cmake/FindCoyoteHW.cmake +++ b/cmake/FindCoyoteHW.cmake @@ -220,6 +220,7 @@ macro(validation_checks_hw) set(FPGA_PART xcu250-figd2104-2L-e CACHE STRING "FPGA device.") set(DDR_SIZE 34) set(HBM_SIZE 0) + set(N_DDR_CHAN 1) elseif(FDEV_NAME STREQUAL "u280") set(FPGA_PART xcu280-fsvh2892-2L-e CACHE STRING "FPGA device.") set(DDR_SIZE 34) diff --git a/sw/include/cDefs.hpp b/sw/include/cDefs.hpp index 480bad35..2db70fc2 100644 --- a/sw/include/cDefs.hpp +++ b/sw/include/cDefs.hpp @@ -188,11 +188,11 @@ enum class CoyoteOper { // What do these classes mean? - it's probably classes of memory allocation (regular, huge page, GPU etc.) enum class CoyoteAlloc { - REG = 0, - THP = 1, - HPF = 2, - PRM = 3, - GPU = 4 + REG = 0, // Regular + THP = 1, // Not quite clear what this is for, especially compared to HPF + HPF = 2, // Huge Page + PRM = 3, // Programmale Region Memory + GPU = 4 // GPU-memory (required for the FPGA-GPU-DMA) }; /* AVX regs */ diff --git a/sw/include/cFunc.hpp b/sw/include/cFunc.hpp index 55aa7bce..ac1c18a3 100644 --- a/sw/include/cFunc.hpp +++ b/sw/include/cFunc.hpp @@ -165,21 +165,26 @@ class cFunc : public bFunc { // Request to execute a function case defOpTask: { - // Expansion + // Tuple that can hold multiple arguments std::tuple msg; + // Lambda function to read data from the socket to the receive buffer (most likely arguments for execution) auto f_rd = [&](auto& x){ using U = decltype(x); int size_arg = sizeof(U); + // Try to accept the incoming messages from the socket. If not possible, log an error. if(n = read(connfd, recv_buf, size_arg) == size_arg) { memcpy(&x, recv_buf, size_arg); } else { syslog(LOG_ERR, "Request invalid, connfd: %d", connfd); } }; + + // Not exactly sure about this, but would argue that the received message is stored in previously declared message std::apply([=](auto&&... args) {(f_rd(args), ...);}, msg); + // Schedule the task for execution in the thread that it belongs to, based on the arguments that were received for it clients[connfd]->scheduleTask(std::unique_ptr>(new auto(std::make_from_tuple*, Args...)>, Args...>>(std::tuple_cat( std::make_tuple(tid), std::make_tuple(oid), @@ -187,8 +192,12 @@ class cFunc : public bFunc { std::make_tuple(f), msg))))); + // While not completed, check for task completion in the associated thread while(!cmpltd) { + // Check the thread for completion of the scheduled task cmpltd = clients[connfd]->getTaskCompletedNext(cmpl_tid, cmpl_ev); + + // If task has been completed, send both the completion tid and completion ev back to the caller, which is cLib through the iTask if(cmpltd) { if(write(connfd, &cmpl_tid, sizeof(int32_t)) != sizeof(int32_t)) { syslog(LOG_ERR, "Completion tid could not be sent, connfd: %d", connfd); @@ -198,6 +207,7 @@ class cFunc : public bFunc { syslog(LOG_ERR, "Completion could not be sent, connfd: %d", connfd); } } else { + // If task has not yet been completed, wait for a certain amount of time before checking again std::this_thread::sleep_for(std::chrono::nanoseconds(sleepIntervalCompletion)); } } @@ -214,29 +224,39 @@ class cFunc : public bFunc { } syslog(LOG_NOTICE, "Connection %d closing ...", connfd); - // Send cleanup + + // Send cleanup - enqueue the connection that should be processed in the queue for cleanup mtx_q.lock(); cln_q.push(connfd); mtx_q.unlock(); } + // Function that cleans up the threads that have finished processing void cleanConns() { run_cln = true; int connfd; + // As long as the clean-up runs, get threads to be cleaned from the FIFO and continue cleaning them up while(run_cln) { + // Close the lock before accessing the cleaning-queue mtx_q.lock(); if(!cln_q.empty()) { + // Get socket from the cleaning-queue connfd = cln_q.front(); cln_q.pop(); + + // Close the request-thread from the reqs-structure reqs[connfd].second.join(); + // Delete the request-thread from the reqs-structure reqs.erase(connfd); + + // Erase the thread from the clients-structure clients.erase(connfd); } mtx_q.unlock(); - + // Wait for some time std::this_thread::sleep_for(std::chrono::nanoseconds(sleepIntervalRequests)); } } diff --git a/sw/include/cLib.hpp b/sw/include/cLib.hpp index 2d04e1a5..afdc66fa 100644 --- a/sw/include/cLib.hpp +++ b/sw/include/cLib.hpp @@ -50,6 +50,7 @@ class cLib { } // Set sun-family and sun-path in the server-socket address struct + // Which means: This is a local socket for Inter-Process Communication and not a network socket for network communication server.sun_family = AF_UNIX; strcpy(server.sun_path, sock_name); @@ -191,6 +192,12 @@ class cLib { close(sockfd); } + /** + * task, iTask, iCmpl are used for interaction with cFunc: They send a task to cFunc, which then + * places this task in the execution queue of the thread for scheduled execution, wait for the + * completion event and send back the completion ID and the completion event here to the iCmpl. + */ + // Task blocking: Variadic function that takes a priority and an arbitrary number of arguments for further processing // Function is basically the same as iTask, but with a blocking completion-handshake at the end Cmpl task(int32_t priority, Args... msg) { diff --git a/sw/include/cRnfg.hpp b/sw/include/cRnfg.hpp index b42af18d..e0620ebb 100644 --- a/sw/include/cRnfg.hpp +++ b/sw/include/cRnfg.hpp @@ -41,21 +41,22 @@ using namespace boost::interprocess; namespace fpga { /* Alias */ +// A bitstream consists of a pointer to memory and its length in combination using bStream = std::pair; // vaddr*, length /** * @brief Coyote reconfiguration loader * - * Partial bitstream loader + * Partial bitstream loader, required for loading partial bitstreams into the vFPGAs * */ class cRnfg { protected: /* Fpga device */ - int32_t fd = { 0 }; - pid_t pid; - uint32_t crid; - static std::atomic_uint32_t crid_gen; + int32_t fd = { 0 }; // File Descript + pid_t pid; // Process ID + uint32_t crid; // Configuration ID (I guess?) + static std::atomic_uint32_t crid_gen; // Atomic for Configuration ID, not sure what this is used for /* Locks */ named_mutex mlock; // Internal memory lock @@ -64,25 +65,25 @@ class cRnfg { std::unordered_map mapped_pages; /* PR */ - uint8_t readByte(ifstream& fb); - bStream readBitstream(ifstream& fb); - void reconfigureBase(void* vaddr, uint32_t len, uint32_t vfid = -1); + uint8_t readByte(ifstream& fb); // Function to read a byte from an input stream + bStream readBitstream(ifstream& fb); // Function to read a bitstream from an input stream + void reconfigureBase(void* vaddr, uint32_t len, uint32_t vfid = -1); // Function to reconfigure the base of the FPGA via the bitstream (pointer to it), length of the bitstream and vFPGA-ID /* Memory alloc */ - void* getMem(csAlloc&& cs_alloc); - void freeMem(void* vaddr); + void* getMem(csAlloc&& cs_alloc); // Function to allocate memory via a csAlloc-object as defined in cDefs + void freeMem(void* vaddr); // Function to free memory via its start-address public: /** - * @brief Ctor, Dtor + * @brief Ctor, Dtor - Constructor and Destructor * */ cRnfg(uint32_t dev); ~cRnfg(); /** - * @brief Shell reconfiguration + * @brief Shell reconfiguration - function to call for reconfiguration of the shell */ void shellReconfigure(std::string name); diff --git a/sw/include/cSched.hpp b/sw/include/cSched.hpp index afaaa596..52af8738 100644 --- a/sw/include/cSched.hpp +++ b/sw/include/cSched.hpp @@ -36,6 +36,7 @@ #include #include +// Has the cRnfg for handling bitstreams - might be interessant for further checks #include "cRnfg.hpp" using namespace std; @@ -43,7 +44,9 @@ using namespace boost::interprocess; namespace fpga { -/* Struct */ +/* Struct + * Consists of ctid, oid and priority for scheduling +*/ struct cLoad { int32_t ctid; int32_t oid; @@ -53,15 +56,20 @@ struct cLoad { /* Schedule reordering */ class taskCmprSched { private: + + // State variables: Priority and bool for reordering bool priority; bool reorder; public: + + // Constructor: Set state variables taskCmprSched(const bool& priority, const bool& reorder) { this->priority = priority; this->reorder = reorder; } + // Takes pointers to two cLoads as scheduling requests and decides which one has the higher priority bool operator()(const std::unique_ptr& req1, const std::unique_ptr& req2) { // Comparison if(priority) { @@ -84,15 +92,18 @@ class taskCmprSched { * * This is the main vFPGA scheduler. It schedules submitted user tasks. * These tasks trickle down: cTask -> cThread -> cProcess -> cSched -> vFPGA + * That's not true! There is no cProcess in Coyote v2 * */ class cSched : public cRnfg { protected: /* vFPGA */ + // vfid as vFPGA-identifier, fcnfg as the configuration of this vFGPA int32_t vfid = { -1 }; fCnfg fcnfg; /* Locks */ + // Lock for thread-safe operations named_mutex plock; // Internal vFPGA lock /* Scheduling */ @@ -100,10 +111,12 @@ class cSched : public cRnfg { const bool reorder; /* Thread */ + // Thread used for scheduling tasks bool run; thread scheduler_thread; /* Scheduler queue */ + // Queue that stores pointers to load-objects. The order of the queue is calculated using the comparator-operator specified in taskCmprSched condition_variable cv_queue; mutex mtx_queue; priority_queue, vector>, taskCmprSched> request_queue; @@ -111,38 +124,43 @@ class cSched : public cRnfg { /* Scheduling and completion */ condition_variable cv_rcnfg; mutex mtx_rcnfg; - int curr_ctid = { -1 }; + int curr_ctid = { -1 }; // current completion thread ID condition_variable cv_cmplt; mutex mtx_cmplt; - bool curr_run = { false }; + bool curr_run = { false }; // current run ID /* Partial bitstreams */ + // Map with all bitstreams std::unordered_map bstreams; /* PR */ + // Function for FPGA-reconfiguration based on the operator ID void reconfigure(int32_t oid); /* (Thread) Process requests */ + // Function for processing Requests void processRequests(); public: /** - * @brief Ctor, Dtor + * @brief Ctor, Dtor - constructor and destructor + * + * Seems like scheduler gets created per vfid and device * */ cSched(int32_t vfid, uint32_t dev, bool priority = true, bool reorder = true); ~cSched(); /** - * @brief Run + * @brief Run - run the scheduler * */ void runSched(); /** - * @brief Getters + * @brief Getters - return the vFGPA-ID * */ inline auto getVfid() const { return vfid; } @@ -152,10 +170,10 @@ class cSched : public cRnfg { * * @param oid : operator ID */ - auto isReconfigurable() const { return fcnfg.en_pr; } - void addBitstream(std::string name, int32_t oid); - void removeBitstream(int32_t oid); - bool checkBitstream(int32_t oid); + auto isReconfigurable() const { return fcnfg.en_pr; } // Checks if a certain vFPGA is reconfigurable + void addBitstream(std::string name, int32_t oid); // Add a new bitstream to the map + void removeBitstream(int32_t oid); // Remove a bitstream based on the operator + bool checkBitstream(int32_t oid); // Check a bistream (for what?) /** * @brief Schedule operation diff --git a/sw/include/cService.hpp b/sw/include/cService.hpp index 175acde0..d9b289e0 100644 --- a/sw/include/cService.hpp +++ b/sw/include/cService.hpp @@ -31,6 +31,7 @@ #include #include +// Can use Scheduler, cFunc and cThread #include "cSched.hpp" #include "bFunc.hpp" #include "cThread.hpp" @@ -43,34 +44,35 @@ namespace fpga { * @brief Coyote service * * Coyote daemon, provides background scheduling service. + * Inherits from cSched (not sure why though) * */ class cService : public cSched { protected: - // Singleton + // Singleton: Important that there's only a single instance of cService per vFPGA that controls all threads registered for this vFPGA static cService *cservice; - // Function map + // Function map - can store client threads for the calling function-IDs std::unordered_map> functions; // Forks pid_t pid; - // ID + // ID - targets a single vFPGA & dev, thus these are global variables / identifiers int32_t vfid = { -1 }; uint32_t dev; string service_id; - // Type + // Type - remote connection bool remote = { false }; uint16_t port; - // Conn + // Conn - connection via a socket, uniquely identified via curr_id string socket_name; int sockfd; int curr_id = { 0 }; - // Notify + // Notify - pointer to user interrupt service routine void (*uisr)(int); /** @@ -80,14 +82,14 @@ class cService : public cSched { void myHandler(int signum); /** - * @brief Initialize + * @brief Initializer for daemon and socket (for connection) * */ void daemonInit(); void socketInit(); /** - * @brief Accept connections + * @brief Accept connections - methods for (QP?) exchange with local and remote */ void acceptConnectionLocal(); void acceptConnectionRemote(); @@ -100,9 +102,9 @@ class cService : public cSched { public: /** - * @brief Creates a service for a single vFPGA + * @brief Creates a service for a single vFPGA - execute the protected constructor internally to keep the singleton-property * - * @param vfid - vVFPGA id + * @param vfid - vFPGA id * @param dev - PCIe device * @param priority - priority ordering * @param reorder - reordeing of tasks @@ -127,7 +129,7 @@ class cService : public cSched { void addFunction(int32_t fid, std::unique_ptr f); /** - * @brief QP exchange util (blocking) + * @brief QP exchange util (blocking) - used on the server side, while the client side forces active exchange via the constructor in cLib * */ static void exchangeQpClient() {} diff --git a/sw/src/cRnfg.cpp b/sw/src/cRnfg.cpp index 77b96777..b496a695 100644 --- a/sw/src/cRnfg.cpp +++ b/sw/src/cRnfg.cpp @@ -28,13 +28,19 @@ namespace fpga */ cRnfg::cRnfg(uint32_t dev) : mlock(open_or_create, "pr_mtx") { DBG3("cRnfg: ctor called"); - // Open + + // Generate a string for the file-descriptor of the programmable region / vFPGA of this FPGA std::string region = "/dev/fpga_" + std::to_string(dev) + "_pr"; + + // Open the file-descriptor for the vFPGA fd = open(region.c_str(), O_RDWR | O_SYNC); if (fd == -1) throw std::runtime_error("cRcnfg could not be obtained"); + // Get the Process ID of the calling process pid = getpid(); + + // Use the atomic crid_gen to generate a new ID crid = crid_gen++; } @@ -45,7 +51,7 @@ namespace fpga cRnfg::~cRnfg() { DBG3("cRnfg: dtor called"); - // Mapped + // Mapped: Free all obtained memory pages for (auto &it : mapped_pages) { freeMem(it.first); @@ -53,6 +59,7 @@ namespace fpga named_mutex::remove("pr_mtx"); + // Close the file-descriptor close(fd); } @@ -61,45 +68,60 @@ namespace fpga // ======------------------------------------------------------------------------------- /** - * @brief Bitstream memory allocation + * @brief Bitstream memory allocation - get memory * - * @param cs_alloc - allocatin config + * @param cs_alloc - allocation config as defined in cDefs * @return void* - pointer to allocated mem */ void* cRnfg::getMem(csAlloc&& cs_alloc) { + // Pre-initialize memory that needs to be allocated void *mem = nullptr; void *memNonAligned = nullptr; + + // 64-Bit Array for temporary storage uint64_t tmp[maxUserCopyVals]; uint32_t size; + // For a valid requested memory size, proceed with memory allocation if (cs_alloc.size > 0) { + // Store requested size, process ID and cr ID in the temporary array tmp[0] = static_cast(cs_alloc.size); // n_pages tmp[1] = static_cast(pid); tmp[2] = static_cast(crid); + // Switch in this case used to check that the requested memory type is PRM (programmale region memory) switch (cs_alloc.alloc) { case CoyoteAlloc::PRM: { // m lock + // Close lock for thread-safe memory allocation mlock.lock(); + // IO-Call to the driver for allocating memory for the programmable region if (ioctl(fd, IOCTL_ALLOC_PR_MEM, &tmp)) { throw std::runtime_error("ioctl_alloc_host_pr_mem mapping failed"); } + // Map into memory - not exactly sure how this works at this spot here memNonAligned = mmap(NULL, (cs_alloc.size + 1) * hugePageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmapPr); - if (memNonAligned == MAP_FAILED) + + // Check if memory-mapping worked + if (memNonAligned == MAP_FAILED) { throw std::runtime_error("get_pr_mem mmap failed"); } + // Open the lock after the critical memory-operation mlock.unlock(); + // Align the previously obtained memory for usage for vFPGA-reconfiguration mem = (void *)((((reinterpret_cast(memNonAligned) + hugePageSize - 1) >> hugePageShift)) << hugePageShift); - cs_alloc.mem = memNonAligned; + + // Store the non-aligned memory in the csAlloc-struct (not sure why not the aligned memory though?) + cs_alloc.mem = memNonAligned; break; } @@ -107,20 +129,23 @@ namespace fpga throw std::runtime_error("unauthorized memory allocation"); } + // Place the obtained memory in the memory-mapping-structure that is part of the thread (thread in charge of calling Reconfiguration?) mapped_pages.emplace(mem, cs_alloc); DBG3("Mapped mem at: " << std::hex << reinterpret_cast(mem) << std::dec); } + // Return the pointer to the obtained memory return mem; } /** - * @brief Bitstream memory deallocation + * @brief Bitstream memory deallocation. Opposite to previous function: Free the obtained memory again. * * @param vaddr - mapped al */ void cRnfg::freeMem(void *vaddr) { + // Save vaddr, process ID and cr ID in the temporary array uint64_t tmp[maxUserCopyVals]; uint32_t size; @@ -128,21 +153,27 @@ namespace fpga tmp[1] = static_cast(pid); tmp[2] = static_cast(crid); + // Check if the current vaddr is actually part of the existing memory mapping if (mapped_pages.find(vaddr) != mapped_pages.end()) { + // Get the memory-mapping entry for the given vaddr auto mapped = mapped_pages[vaddr]; + // Check the alloc-struct to find more information on the allocation switch (mapped.alloc) { + // Only operate if the allocation is actually for PR memory case CoyoteAlloc::PRM : { mlock.lock(); + // Unmap the mapped memory if (munmap(mapped.mem, (mapped.size + 1) * hugePageSize) != 0) { throw std::runtime_error("free_pr_mem munmap failed"); } + // Send IO-call for freeing the PR-memory if (ioctl(fd, IOCTL_FREE_PR_MEM, &tmp)) { throw std::runtime_error("ioctl_free_host_pr_mem failed"); @@ -172,19 +203,26 @@ namespace fpga */ void cRnfg::reconfigureBase(void *vaddr, uint32_t len, uint32_t vfid) { + // Create a tmp-array that holds address of the bitstream, length of the bitstream, process ID and CR ID uint64_t tmp[maxUserCopyVals]; tmp[0] = reinterpret_cast(vaddr); tmp[1] = static_cast(len); tmp[2] = static_cast(pid); tmp[3] = static_cast(crid); + + // Check the vFPGA-ID (regular number is vFGPA, -1 indicates that the shell needs reconfiguration) if(vfid != -1) { + // Get the vFPGA-ID as last argument in the tmp-array tmp[4] = static_cast(vfid); + // Issue a ioctl call to the driver for reconfiguration of the PR if (ioctl(fd, IOCTL_RECONFIGURE_APP, &tmp)) // Blocking throw std::runtime_error("ioctl_reconfig_app failed"); DBG3("App reconfiguration completed"); } else { + + // Issue a ioctl call to the driver for reconfiguration of the base shell if (ioctl(fd, IOCTL_RECONFIGURE_SHELL, &tmp)) // Blocking throw std::runtime_error("ioctl_reconfig_shell failed"); @@ -192,7 +230,7 @@ namespace fpga } } - // Util + // Util: Read a byte from the input stream and return it uint8_t cRnfg::readByte(ifstream &fb) { char temp; @@ -201,19 +239,19 @@ namespace fpga } /** - * @brief Read in a bitstream + * @brief Read in a bitstream from the input stream */ bStream cRnfg::readBitstream(ifstream& fb) { // Size - uint32_t len = fb.tellg(); - fb.seekg(0); - uint32_t n_pages = (len + hugePageSize - 1) / hugePageSize; + uint32_t len = fb.tellg(); // Get the current read position in the input stream - should possibly be the length of the input stream + fb.seekg(0); // Set read position back to beginning of the input stream + uint32_t n_pages = (len + hugePageSize - 1) / hugePageSize; // Calculate the number of required memory pages to store the bitstream // Get mem - void *vaddr = getMem({CoyoteAlloc::PRM, n_pages}); - uint32_t *vaddr_32 = reinterpret_cast(vaddr); + void *vaddr = getMem({CoyoteAlloc::PRM, n_pages}); // Get memory to store the bitstream that is read from the input stream + uint32_t *vaddr_32 = reinterpret_cast(vaddr); - // Read in + // Read in: Read the input-stream bytewise and store it bytewise in the mapped memory for (uint32_t i = 0; i < len / 4; i++) { vaddr_32[i] = 0; @@ -224,6 +262,8 @@ namespace fpga } DBG3("Shell bitstream loaded"); + + // Return the bitstream object return std::make_pair(vaddr, len); } @@ -233,22 +273,25 @@ namespace fpga // ======------------------------------------------------------------------------------- /** - * @brief Add a bitstream to the map + * @brief Add a bitstream to the map - read in a new bitstream, used for adding it to the FPGA * * @param name - path * @param oid - operator ID */ void cRnfg::shellReconfigure(std::string name) { - // Stream + // Create a new input stream for the bitstream which is defined via its name as argument ifstream f_bit(name, ios::ate | ios::binary); if (!f_bit) throw std::runtime_error("Shell bitstream could not be opened"); - + // Read the bitstream in (call of the previously defined function) bStream bstream = readBitstream(f_bit); + + // Close the input stream f_bit.close(); + // Reconfigure the FPGA with the new loaded bitstream reconfigureBase(std::get<0>(bstream), std::get<1>(bstream)); } diff --git a/sw/src/cSched.cpp b/sw/src/cSched.cpp index ffda1cf6..1733f248 100644 --- a/sw/src/cSched.cpp +++ b/sw/src/cSched.cpp @@ -24,6 +24,8 @@ namespace fpga /** * @brief Construct a new cSched, bitstream handler + * + * Constructor of the scheduler. Directly creates a new bitstream handler and a request-queue and sets vfid, priority and reorder * * @param vfid - vFPGA id */ @@ -37,15 +39,19 @@ namespace fpga // Cnfg uint64_t tmp[2]; + // System call to driver to enable configuration of programmable region if (ioctl(fd, IOCTL_PR_CNFG, &tmp)) throw std::runtime_error("ioctl_pr_cnfg() failed, vfid: " + to_string(vfid)); + // Set configuration programmability based on return value from the ioctl-call fcnfg.en_pr = tmp[0]; } /** * @brief Destructor cSched * + * Set run to false, end the scheduler thread, remove all bitstreams from the list + * */ cSched::~cSched() { @@ -53,19 +59,20 @@ namespace fpga run = false; DBG3("cSched: joining"); - scheduler_thread.join(); + scheduler_thread.join(); // Stop the scheduling thread - // Mapped + // Iterate over all bitstreams and remove them one after each other for (auto &it : bstreams) { removeBitstream(it.first); } + // Remove the mutex that has been created previously in the constructor named_mutex::remove(("vpga_mtx_user_" + std::to_string(vfid)).c_str()); } /** - * @brief Run the thread + * @brief Run the thread: Obtain an initial lock, create a scheduler_thread with the function to process requests and wait * */ void cSched::runSched() @@ -75,6 +82,7 @@ namespace fpga // Thread DBG3("cSched: initial lock"); + // Create the scheduler-thread to execute the processRequests-function scheduler_thread = thread(&cSched::processRequests, this); DBG3("cSched: thread started, vfid: " << vfid); @@ -85,11 +93,13 @@ namespace fpga // ======------------------------------------------------------------------------------- // (Thread) Process requests // ======------------------------------------------------------------------------------- + + // Function to run in the scheduler_thread to process the scheduled tasks void cSched::processRequests() { unique_lock lck_q(mtx_queue); unique_lock lck_r(mtx_rcnfg); - run = true; + run = true; // Set run to true bool recIssued = false; int32_t curr_oid = -1; cv_queue.notify_one(); @@ -97,28 +107,32 @@ namespace fpga lck_r.unlock(); ; + // Busy-loop: Keep processing while run is true or the request_queue still has elements that need to be processed while (run || !request_queue.empty()) { lck_q.lock(); + + // Check if there are still requests in the queue if (!request_queue.empty()) { - // Grab next reconfig request + // Grab next reconfig request from the top of the queue auto curr_req = std::move(const_cast &>(request_queue.top())); request_queue.pop(); lck_q.unlock(); - // Obtain vFPGA + // Obtain vFPGA-lock plock.lock(); - // Check whether reconfiguration is needed + // Check whether reconfiguration is possible if (isReconfigurable()) { + // Check if the current operation ID is different to the one pulled from the request queue. Only then a reconfiguration is actually required. if (curr_oid != curr_req->oid) { - reconfigure(curr_req->oid); + reconfigure(curr_req->oid); // If reconfiguration is possible and oid has changed, start a reconfiguration recIssued = true; - curr_oid = curr_req->oid; + curr_oid = curr_req->oid; // Update current operator ID } else { @@ -158,6 +172,7 @@ namespace fpga } } + // Place a new load in the request_queue void cSched::pLock(int32_t ctid, int32_t oid, uint32_t priority) { unique_lock lck_q(mtx_queue); @@ -184,7 +199,7 @@ namespace fpga // ======------------------------------------------------------------------------------- /** - * @brief Reconfiguration IO + * @brief Reconfiguration IO - calls the bitstream handler to trigger a reconfiguration of the FPGA * * @param oid - operator id */ @@ -205,22 +220,25 @@ namespace fpga */ void cSched::addBitstream(std::string name, int32_t oid) { + // Check that the new bitstream (identified by the operator ID) is not yet stored in the bitstream-map if (bstreams.find(oid) == bstreams.end()) { - // Stream + // Create an input-stream of the bitstream, from it's original file ifstream f_bit(name, ios::ate | ios::binary); if (!f_bit) throw std::runtime_error("Shell bitstream could not be opened"); + // Read bitstream from the input-stream bStream bstream = readBitstream(f_bit); f_bit.close(); DBG3("Bitstream loaded, oid: " << oid); - + // Insert the new bitstream with the operator ID in the bitstream-map bstreams.insert({oid, bstream}); return; } + // Error if the bitstream with this operator ID is already present throw std::runtime_error("bitstream with same operation ID already present"); } @@ -231,11 +249,12 @@ namespace fpga */ void cSched::removeBitstream(int32_t oid) { + // Check if the operator ID of the bitstream to be removed can actually be found in the Bitstream-Map if (bstreams.find(oid) != bstreams.end()) { auto bstream = bstreams[oid]; - freeMem(bstream.first); - bstreams.erase(oid); + freeMem(bstream.first); // memory for the bitstream is freed + bstreams.erase(oid); // entry is erased from the from bitstream-map } } @@ -246,6 +265,7 @@ namespace fpga */ bool cSched::checkBitstream(int32_t oid) { + // Check bitstream-map with the operator-ID if (bstreams.find(oid) != bstreams.end()) { return true; diff --git a/sw/src/cService.cpp b/sw/src/cService.cpp index 771d7f46..8d81f240 100644 --- a/sw/src/cService.cpp +++ b/sw/src/cService.cpp @@ -9,13 +9,13 @@ cService* cService::cservice = nullptr; // ======------------------------------------------------------------------------------- /** - * @brief Constructor + * @brief Constructor for the cService object, protected to keep singleton-status * * @param vfid */ cService::cService(string name, bool remote, int32_t vfid, uint32_t dev, void (*uisr)(int), uint16_t port, bool priority, bool reorder) : remote(remote), vfid(vfid), dev(dev), uisr(uisr), port(port), cSched(vfid, dev, priority, reorder) { - // ID + // ID - create both a service-ID and a socket-name for communication service_id = ("coyote-daemon-vfid-" + std::to_string(vfid) + "-" + name).c_str(); socket_name = ("/tmp/coyote-daemon-vfid-" + std::to_string(vfid) + "-" + name).c_str(); } @@ -24,18 +24,25 @@ cService::cService(string name, bool remote, int32_t vfid, uint32_t dev, void (* // Sig handler // ======------------------------------------------------------------------------------- +// Set the signum-handler based on the given signum-number (basically can only handle SIGTERM) void cService::sigHandler(int signum) { cservice->myHandler(signum); } +// Can handle signum-calls (only SIGTERM supported as of now) void cService::myHandler(int signum) { + // Handle termination signals if(signum == SIGTERM) { syslog(LOG_NOTICE, "SIGTERM received\n");//cService::getPid()); + + // Unlink the socket unlink(socket_name.c_str()); //kill(getpid(), SIGTERM); syslog(LOG_NOTICE, "Exiting"); closelog(); + + // Exit the daemon with success-code exit(EXIT_SUCCESS); } else { syslog(LOG_NOTICE, "Signal %d not handled", signum); @@ -51,7 +58,7 @@ void cService::myHandler(int signum) { * */ void cService::daemonInit() { - // Fork + // Fork: Create a new child-process, check success by the created PID DBG3("Forking..."); pid = fork(); if(pid < 0 ) @@ -59,26 +66,26 @@ void cService::daemonInit() { if(pid > 0 ) exit(EXIT_SUCCESS); - // Sid + // Sid - create a new session, of which the process is now the session-leader if(setsid() < 0) exit(EXIT_FAILURE); // Signal handler - signal(SIGTERM, cService::sigHandler); - signal(SIGCHLD, SIG_IGN); - signal(SIGHUP, SIG_IGN); + signal(SIGTERM, cService::sigHandler); // set up the custom handler for a SIGTERM signal + signal(SIGCHLD, SIG_IGN); // ignore the SIGCHLD command to prevent the creation of zombie processes + signal(SIGHUP, SIG_IGN); // ignore the SIGHUP command so that the process keeps running even if the terminal is killed - // Fork + // Fork again. The new process is not a session leader and has no controlling terminal pid = fork(); if(pid < 0 ) exit(EXIT_FAILURE); if(pid > 0 ) exit(EXIT_SUCCESS); - // Permissions + // Permissions - the daemon can create files with any required permission umask(0); - // Cd + // Cd: Change directory to the working directory to avoid locking any directory if((chdir("/")) < 0) { exit(EXIT_FAILURE); } @@ -102,31 +109,38 @@ void cService::socketInit() { sockfd = -1; + // In case remote is set to true, initialize a network socket for network communication if(remote) { struct sockaddr_in server; + // Create the socket and check if it's successful sockfd = ::socket(AF_INET, SOCK_STREAM, 0); if (sockfd == -1) throw std::runtime_error("Could not create a socket"); + // Select network and adress for connection server.sin_family = AF_INET; server.sin_addr.s_addr = INADDR_ANY; server.sin_port = htons(port); + // Try to connect the socket if (::bind(sockfd, (struct sockaddr*)&server, sizeof(server)) < 0) throw std::runtime_error("Could not bind a socket"); if (sockfd < 0 ) throw std::runtime_error("Could not listen to a port: " + to_string(port)); } else { + // Create a local socket for Inter-Process Communication struct sockaddr_un server; socklen_t len; + // Check for successful creation of the IPC-socket if((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { syslog(LOG_ERR, "Error creating a server socket"); exit(EXIT_FAILURE); } + // Try to bind the socket to remote side for network-based exchange server.sun_family = AF_UNIX; strcpy(server.sun_path, socket_name.c_str()); unlink(server.sun_path); @@ -138,6 +152,7 @@ void cService::socketInit() { } } + // Try to listen to the network socket if(listen(sockfd, maxNumClients) == -1) { syslog(LOG_ERR, "Error listen()"); exit(EXIT_FAILURE); @@ -148,6 +163,8 @@ void cService::socketInit() { * @brief Accept connections * */ + +// Accept a local connection (I guess that's a IPC - inter-process communication) void cService::acceptConnectionLocal() { sockaddr_un client; socklen_t len = sizeof(client); @@ -157,10 +174,11 @@ void cService::acceptConnectionLocal() { pid_t rpid; int fid; + // Try to accept an incoming connection if((connfd = accept(sockfd, (struct sockaddr *)&client, &len)) != -1) { syslog(LOG_NOTICE, "Connection accepted local, connfd: %d", connfd); - // Read rpid + // Read rpid (registered process ID) if((n = read(connfd, recv_buf, sizeof(pid_t))) == sizeof(pid_t)) { memcpy(&rpid, recv_buf, sizeof(pid_t)); syslog(LOG_NOTICE, "Registered pid: %d", rpid); @@ -170,7 +188,7 @@ void cService::acceptConnectionLocal() { exit(EXIT_FAILURE); } - // Read fid + // Read fid (function ID) if((n = read(connfd, recv_buf, sizeof(int))) == sizeof(int)) { memcpy(&fid, recv_buf, sizeof(int)); syslog(LOG_NOTICE, "Function id: %d", fid); @@ -180,12 +198,14 @@ void cService::acceptConnectionLocal() { exit(EXIT_FAILURE); } + // Create a new client thread for the function in the function-struct functions[fid]->registerClientThread(connfd, vfid, rpid, dev, this, uisr); } std::this_thread::sleep_for(std::chrono::nanoseconds(sleepIntervalDaemon)); } +// Accept a remote connection (that's probably for RDMA-usecase to exchange the QP) void cService::acceptConnectionRemote() { uint32_t recv_qpid; uint8_t ack; @@ -198,6 +218,7 @@ void cService::acceptConnectionRemote() { ibvQ r_qp; bThread *cthread; + // Try to accept the incoming connection if((connfd = ::accept(sockfd, NULL, 0)) != -1) { syslog(LOG_NOTICE, "Connection accepted remote, connfd: %d", connfd); @@ -221,18 +242,19 @@ void cService::acceptConnectionRemote() { exit(EXIT_FAILURE); } + // Get a cThread from the function registered in the func-struct cthread = functions[fid]->registerClientThread(connfd, vfid, getpid(), dev, this, uisr); - cthread->getQpair()->remote = r_qp; - cthread->getMem({CoyoteAlloc::HPF, r_qp.size, true}); + cthread->getQpair()->remote = r_qp; // store the received remote QP + cthread->getMem({CoyoteAlloc::HPF, r_qp.size, true}); // Allocate memory for receiving data for RDMA - // Send local queue pair + // Send local queue pair to the remote side if (::write(connfd, &cthread->getQpair()->local, sizeof(ibvQ)) != sizeof(ibvQ)) { ::close(connfd); syslog(LOG_ERR, "Could not write a local queue"); exit(EXIT_FAILURE); } - // Write context and connection + // Write context and connection to the config-space of Coyote cthread->writeQpContext(port); // ARP lookup cthread->doArpLookup(cthread->getQpair()->remote.ip_addr); @@ -245,14 +267,14 @@ void cService::acceptConnectionRemote() { } /** - * @brief Main run service + * @brief Main run service for the daemon * */ void cService::start() { // Init daemon daemonInit(); - // Run scheduler + // Run scheduler - creates a scheduler-thread which waits for incoming requests if(isReconfigurable()) runSched(); // Init socket @@ -261,11 +283,13 @@ void cService::start() { // Init threads syslog(LOG_NOTICE, "Thread initialization"); + // Iterate over entries in the func-struct and start all of these functions + // Going back to the definition of func-start(): Starting a clean-up-thread? for (auto it = functions.begin(); it != functions.end(); it++) { it->second->start(); } - // Main + // Main - exchange of QP or local connection, depending on remote-setting while(1) { if(!remote) acceptConnectionLocal(); @@ -277,6 +301,8 @@ void cService::start() { // ======------------------------------------------------------------------------------- // Functions // ======------------------------------------------------------------------------------- + +// Place an additional function in the func-struct if the function ID doesn't already exist void cService::addFunction(int32_t fid, std::unique_ptr f) { if(functions.find(fid) == functions.end()) { functions.emplace(fid,std::move(f));