Skip to content

Commit

Permalink
Modified UNVMe driver to use UIO device and no-IOMMU VFIO
Browse files Browse the repository at this point in the history
This is the main transition away from an IOMMU-based driver to something that
can work with a statically-mapped buffer defined with a UIO device, alongside
VFIO running in no-IOMMU mode for access to PCIe.

This change allows the driver to run on embedded devices without an IOMMU, but
with the flexibility to allocate a chunk of memory to UNVMe through the device
tree, such as the Xilinx Zynq UltraScale+ MPSoC.

The implementation is currently quite restricted, requiring a UIO driver to be
loaded at /dev/uio0, with its first memory map a region of DMA-able memory of
size 1GiB and address 0x40000000.
  • Loading branch information
RussellJoyce committed Apr 11, 2018
1 parent b6b85ab commit a4f30b4
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 93 deletions.
4 changes: 0 additions & 4 deletions Makefile.def
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ CPPFLAGS+=-D_GNU_SOURCE
# To turn on debug message logging
#CPPFLAGS+=-DUNVME_DEBUG

# To use identity map for DMA address # for faster submission time
# (but may cause DMA overlapping for applications with high memory usage)
CPPFLAGS+=-DUNVME_IDENTITY_MAP_DMA

# To build the ioengine modules, specify fio directory where header files are
#FIODIR:=/opt/fio

11 changes: 6 additions & 5 deletions src/unvme_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <string.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>

#include "rdtsc.h"
#include "unvme_core.h"
Expand Down Expand Up @@ -199,9 +200,6 @@ static u16 unvme_get_cid(unvme_desc_t* desc)
static u64 unvme_map_dma(const unvme_ns_t* ns, void* buf, u64 bufsz)
{
unvme_device_t* dev = ((unvme_session_t*)ns->ses)->dev;
#ifdef UNVME_IDENTITY_MAP_DMA
u64 addr = (u64)buf & dev->vfiodev.iovamask;
#else
vfio_dma_t* dma = NULL;
unvme_lockr(&dev->iomem.lock);
int i;
Expand All @@ -215,7 +213,6 @@ static u64 unvme_map_dma(const unvme_ns_t* ns, void* buf, u64 bufsz)
u64 addr = dma->addr + (u64)(buf - dma->buf);
if ((addr + bufsz) > (dma->addr + dma->size))
FATAL("buffer overrun");
#endif
//if ((addr & (ns->blocksize - 1)) != 0)
// FATAL("unaligned buffer address");
return addr;
Expand Down Expand Up @@ -494,7 +491,9 @@ unvme_ns_t* unvme_do_open(int pci, int nsid, int qcount, int qsize)
vfio_dma_t* dma = vfio_dma_alloc(&dev->vfiodev, 4096);
if (nvme_acmd_identify(&dev->nvmedev, 0, dma->addr, 0))
FATAL("nvme_acmd_identify controller failed");
nvme_identify_ctlr_t* idc = (nvme_identify_ctlr_t*)dma->buf;
nvme_identify_ctlr_t* idc = malloc(sizeof(nvme_identify_ctlr_t));
memcpy(idc, dma->buf, sizeof(nvme_identify_ctlr_t));

if (nsid > idc->nn) {
ERROR("invalid %06x nsid %d (max %d)", pci, nsid, idc->nn);
return NULL;
Expand Down Expand Up @@ -523,6 +522,7 @@ unvme_ns_t* unvme_do_open(int pci, int nsid, int qcount, int qsize)
int mp = 2 << (idc->mdts - 1);
if (ns->maxppio > mp) ns->maxppio = mp;
}
free(idc);
vfio_dma_free(dma);

// get max number of queues supported
Expand All @@ -539,6 +539,7 @@ unvme_ns_t* unvme_do_open(int pci, int nsid, int qcount, int qsize)
ns->qsize = qsize;

// setup IO queues
DEBUG_FN("Creating %d IO queues (of max %d), queue size %d", qcount, maxqcount, qsize);
dev->ioqs = zalloc(qcount * sizeof(unvme_queue_t));
for (i = 0; i < qcount; i++) unvme_ioq_create(dev, i);
}
Expand Down
128 changes: 44 additions & 84 deletions src/unvme_vfio.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@
#define FATAL(fmt, arg...) do { ERROR(fmt, ##arg); abort(); } while (0)

/// Starting device DMA address
#define VFIO_IOVA 0x800000000
#define UIO_BASE 0x40000000
/// Size of UIO buffer/device
#define UIO_SIZE 0x40000000

/// IRQ index names
const char* vfio_irq_names[] = { "INTX", "MSI", "MSIX", "ERR", "REQ" };
Expand Down Expand Up @@ -102,31 +104,18 @@ static vfio_mem_t* vfio_mem_alloc(vfio_device_t* dev, size_t size, void* pmb)
if (pmb) {
mem->dma.buf = pmb;
} else {
mem->dma.buf = mmap(0, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, -1, 0);
if (mem->dma.buf == MAP_FAILED)
FATAL("mmap: %s", strerror(errno));
mem->mmap = 1;
if (dev->uiobufoff + size > UIO_SIZE) {
ERROR("Out of UIO memory space (next allocation would use %#lx of %#lx)", dev->uiobufoff + size, UIO_SIZE);
free(mem);
return NULL;
}
mem->dma.buf = dev->uiobuf + dev->uiobufoff;
dev->uiobufoff += size;
}

pthread_mutex_lock(&dev->lock);
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = (VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE),
.size = (__u64)size,
.vaddr = (__u64)mem->dma.buf,
#ifdef UNVME_IDENTITY_MAP_DMA
.iova = (__u64)mem->dma.buf & dev->iovamask,
#else
.iova = dev->iovanext,
#endif
};

if (ioctl(dev->contfd, VFIO_IOMMU_MAP_DMA, &map) < 0) {
FATAL("VFIO_IOMMU_MAP_DMA: %s", strerror(errno));
}
mem->dma.size = size;
mem->dma.addr = map.iova;
mem->dma.addr = dev->iovanext;
mem->dma.mem = mem;
mem->dev = dev;

Expand All @@ -141,8 +130,8 @@ static vfio_mem_t* vfio_mem_alloc(vfio_device_t* dev, size_t size, void* pmb)
dev->memlist->prev->next = mem;
dev->memlist->prev = mem;
}
dev->iovanext = map.iova + size;
DEBUG_FN("%x %#lx %#lx %#lx", dev->pci, map.iova, map.size, dev->iovanext);
dev->iovanext += size;
DEBUG_FN("%x %#lx %#lx %#lx %#lx", dev->pci, mem->dma.addr, size, dev->iovanext, dev->uiobufoff);
pthread_mutex_unlock(&dev->lock);

return mem;
Expand All @@ -157,35 +146,26 @@ int vfio_mem_free(vfio_mem_t* mem)
{
vfio_device_t* dev = mem->dev;

struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
.size = (__u64)mem->dma.size,
.iova = mem->dma.addr,
};

// unmap and free dma memory
if (mem->dma.buf) {
if (ioctl(dev->contfd, VFIO_IOMMU_UNMAP_DMA, &unmap) < 0)
FATAL("VFIO_IOMMU_UNMAP_DMA: %s", strerror(errno));
}
if (mem->mmap) {
if (munmap(mem->dma.buf, mem->dma.size) < 0)
FATAL("munmap: %s", strerror(errno));
}

// remove node from memory list
pthread_mutex_lock(&dev->lock);
if (mem->next == dev->memlist) dev->iovanext -= mem->dma.size;
if (mem->next == mem) {
if (mem->next == dev->memlist) { // If removing last item in list
dev->iovanext -= mem->dma.size;
dev->uiobufoff -= mem->dma.size;
}
if (mem->next == mem) { // If removing only item in list
dev->memlist = NULL;
dev->iovanext = dev->iovabase;
} else {
dev->uiobufoff = 0;
} else { // If there are other items in list
mem->next->prev = mem->prev;
mem->prev->next = mem->next;
if (dev->memlist == mem) dev->memlist = mem->next;
dev->iovanext = dev->memlist->prev->dma.addr + dev->memlist->prev->dma.size;
if (dev->memlist == mem) { // If first item in list
dev->memlist = mem->next;
}
dev->iovanext = dev->memlist->prev->dma.addr + dev->memlist->prev->dma.size; // IOVA next is after last item in list
dev->uiobufoff = dev->iovanext - dev->iovabase; // UIO buffer offset is same as IOVA offset
}
DEBUG_FN("%x %#lx %#lx %#lx", dev->pci, unmap.iova, unmap.size, dev->iovanext);
DEBUG_FN("%x %#lx %#lx %#lx %#lx", dev->pci, mem->dma.addr, mem->dma.size, dev->iovanext, dev->uiobufoff);
pthread_mutex_unlock(&dev->lock);

free(mem);
Expand Down Expand Up @@ -312,18 +292,17 @@ vfio_device_t* vfio_create(vfio_device_t* dev, int pci)
if ((i = readlink(path, path, sizeof(path))) < 0)
FATAL("No iommu_group associated with device %s", pciname);
path[i] = 0;
sprintf(path, "/dev/vfio%s", strrchr(path, '/'));
sprintf(path, "/dev/vfio/noiommu-%s", &strrchr(path, '/')[1]);

struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };

// allocate and initialize device context
if (!dev) dev = zalloc(sizeof(*dev));
else dev->ext = 1;
dev->pci = pci;
dev->pagesize = sysconf(_SC_PAGESIZE);
dev->iovabase = VFIO_IOVA;
dev->iovabase = UIO_BASE;
dev->iovanext = dev->iovabase;
if (pthread_mutex_init(&dev->lock, 0)) return NULL;

Expand All @@ -334,7 +313,7 @@ vfio_device_t* vfio_create(vfio_device_t* dev, int pci)
if (ioctl(dev->contfd, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
FATAL("ioctl VFIO_GET_API_VERSION");

if (ioctl(dev->contfd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) == 0)
if (ioctl(dev->contfd, VFIO_CHECK_EXTENSION, VFIO_NOIOMMU_IOMMU) == 0)
FATAL("ioctl VFIO_CHECK_EXTENSION");

if ((dev->groupfd = open(path, O_RDWR)) < 0)
Expand All @@ -349,12 +328,9 @@ vfio_device_t* vfio_create(vfio_device_t* dev, int pci)
if (ioctl(dev->groupfd, VFIO_GROUP_SET_CONTAINER, &dev->contfd) < 0)
FATAL("ioctl VFIO_GROUP_SET_CONTAINER");

if (ioctl(dev->contfd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
if (ioctl(dev->contfd, VFIO_SET_IOMMU, VFIO_NOIOMMU_IOMMU) < 0)
FATAL("ioctl VFIO_SET_IOMMU");

if (ioctl(dev->contfd, VFIO_IOMMU_GET_INFO, &iommu_info) < 0)
FATAL("ioctl VFIO_IOMMU_GET_INFO");

dev->fd = ioctl(dev->groupfd, VFIO_GROUP_GET_DEVICE_FD, pciname);
if (dev->fd < 0)
FATAL("ioctl VFIO_GROUP_GET_DEVICE_FD");
Expand Down Expand Up @@ -415,37 +391,16 @@ vfio_device_t* vfio_create(vfio_device_t* dev, int pci)
FATAL("VFIO_DEVICE_GET_IRQ_INFO MSIX count %d != %d", irq.count, dev->msixsize);
}

#ifdef UNVME_IDENTITY_MAP_DMA
// Set up mask to support identity IOVA map option
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = (VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE),
.iova = dev->iovabase,
.size = dev->pagesize,
};
struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
.size = dev->pagesize,
};
// Open and map UIO device as memory buffer
dev->uiofd = open("/dev/uio0", O_RDWR | O_SYNC);
if (dev->uiofd == -1)
FATAL("unable to open /dev/uio0, %d", errno);
dev->uiobuf = mmap(NULL, UIO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev->uiofd, 0);
if (dev->uiobuf == MAP_FAILED)
FATAL("unable to mmap /dev/uio0, %d", errno);

map.vaddr = (__u64)mmap(0, map.size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if ((void*)map.vaddr == MAP_FAILED)
FATAL("mmap: %s", strerror(errno));
while (map.iova) {
if (ioctl(dev->contfd, VFIO_IOMMU_MAP_DMA, &map) < 0) {
if (errno == EFAULT) break;
FATAL("VFIO_IOMMU_MAP_DMA: %s", strerror(errno));
}
unmap.iova = map.iova;
if (ioctl(dev->contfd, VFIO_IOMMU_UNMAP_DMA, &unmap) < 0)
FATAL("VFIO_IOMMU_MUNAP_DMA: %s", strerror(errno));
map.iova <<= 1;
}
dev->iovamask = map.iova - 1;
(void) munmap((void*)map.vaddr, map.size);
DEBUG_FN("iovamask=%#llx", dev->iovamask);
#endif
if (mlock(dev->uiobuf, UIO_SIZE) == -1)
FATAL("unable to mlock, %d", errno);

return (vfio_device_t*)dev;
}
Expand All @@ -459,6 +414,11 @@ void vfio_delete(vfio_device_t* dev)
if (!dev) return;
DEBUG_FN("%x", dev->pci);

// Close and unmap UIO buffer
munlock(dev->uiobuf, UIO_SIZE);
munmap(dev->uiobuf, UIO_SIZE);
close(dev->uiofd);

// free all memory associated with the device
while (dev->memlist) vfio_mem_free(dev->memlist);

Expand Down
3 changes: 3 additions & 0 deletions src/unvme_vfio.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ typedef struct _vfio_device {
__u64 iovamask; ///< max IO virtual address mask
pthread_mutex_t lock; ///< multithreaded lock
vfio_mem_t* memlist; ///< memory allocated list
int uiofd; ///< file descriptor of UIO device
void* uiobuf; ///< UIO buffer pointer
off_t uiobufoff; ///< UIO buffer offset
} vfio_device_t;

// Export functions
Expand Down

0 comments on commit a4f30b4

Please sign in to comment.