Skip to content

Commit d8a4de2

Browse files
author
Deyoung Hong
committed
Tune fio engine and completion path.
1 parent f656ac0 commit d8a4de2

File tree

3 files changed

+60
-103
lines changed

3 files changed

+60
-103
lines changed

ioengine/unvme_fio.c

Lines changed: 21 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -12,43 +12,24 @@
1212
#include "fio.h"
1313
#include "optgroup.h" // since fio 2.4
1414

15-
#define TDEBUG(fmt, arg...) //printf("#%s.%d " fmt "\n", __func__, td->thread_number, ##arg)
15+
#define TDEBUG(fmt, arg...) //fprintf(stderr, "#%s.%d " fmt "\n", __func__, td->thread_number, ##arg)
1616
#define FATAL(fmt, arg...) do { warnx(fmt, ##arg); abort(); } while (0)
1717

18-
typedef struct {
19-
struct io_u** iocq;
20-
int head;
21-
int tail;
22-
} unvme_data_t;
23-
18+
/// Context used for thread initialization
2419
typedef struct {
2520
pthread_mutex_t mutex;
2621
const unvme_ns_t* ns;
2722
int ncpus;
28-
u64 rdtsc_timeout;
2923
} unvme_context_t;
3024

25+
/// Thread IO completion queue
26+
typedef struct io_u *unvme_iocq_t;
27+
3128

3229
// Static variables
3330
static unvme_context_t unvme = { .mutex = PTHREAD_MUTEX_INITIALIZER };
3431

3532

36-
/**
37-
* Read tsc.
38-
*/
39-
static inline uint64_t rdtsc(void)
40-
{
41-
union {
42-
uint64_t val;
43-
struct {
44-
uint32_t lo;
45-
uint32_t hi;
46-
};
47-
} tsc;
48-
asm volatile ("rdtsc" : "=a" (tsc.lo), "=d" (tsc.hi));
49-
return tsc.val;
50-
}
51-
5233
/*
5334
* Clean up UNVMe upon exit.
5435
*/
@@ -79,10 +60,6 @@ static int do_unvme_init(struct thread_data *td)
7960
if (td->o.iodepth >= unvme.ns->qsize)
8061
FATAL("iodepth %d greater than queue size", td->o.iodepth);
8162

82-
uint64_t tsc = rdtsc();
83-
usleep(10000);
84-
unvme.rdtsc_timeout = (rdtsc() - tsc) * 100 * 300; // 300 secs timeout
85-
8663
unvme.ncpus = sysconf(_SC_NPROCESSORS_ONLN);
8764
printf("unvme_open %s q=%dx%d ncpus=%d\n",
8865
unvme.ns->device, unvme.ns->qcount, unvme.ns->qsize, unvme.ncpus);
@@ -125,16 +102,9 @@ static int fio_unvme_get_file_size(struct thread_data *td, struct fio_file *f)
125102
*/
126103
static int fio_unvme_init(struct thread_data *td)
127104
{
128-
unvme_data_t* udata = calloc(1, sizeof(unvme_data_t));
129-
if (!udata) return 1;
130-
131-
udata->iocq = calloc(td->o.iodepth + 1, sizeof(void*));
132-
if (!udata->iocq) {
133-
free (udata);
134-
return 1;
135-
}
136-
137-
td->io_ops_data = udata;
105+
unvme_iocq_t* iocq = calloc(td->o.iodepth, sizeof(void*));
106+
if (!iocq) return 1;
107+
td->io_ops_data = iocq;
138108
return 0;
139109
}
140110

@@ -145,11 +115,8 @@ static int fio_unvme_init(struct thread_data *td)
145115
*/
146116
static void fio_unvme_cleanup(struct thread_data *td)
147117
{
148-
unvme_data_t* udata = td->io_ops_data;
149-
if (udata) {
150-
if (udata->iocq) free(udata->iocq);
151-
free(udata);
152-
}
118+
if (td->io_ops_data) free(td->io_ops_data);
119+
td->io_ops_data = NULL;
153120
}
154121

155122
/*
@@ -201,15 +168,9 @@ static void fio_unvme_iomem_free(struct thread_data *td)
201168
*/
202169
static struct io_u* fio_unvme_event(struct thread_data *td, int event)
203170
{
204-
unvme_data_t* udata = td->io_ops_data;
205-
struct io_u* io_u = NULL;
206-
207-
if (udata->head != udata->tail) {
208-
io_u = udata->iocq[udata->head];
209-
TDEBUG("GET.%d %p", udata->head, io_u->buf);
210-
if (++udata->head > td->o.iodepth) udata->head = 0;
211-
}
212-
return io_u;
171+
unvme_iocq_t* iocq = td->io_ops_data;
172+
TDEBUG("GET.%d %p", event, iocq[event]->buf);
173+
return iocq[event];
213174
}
214175

215176
/*
@@ -223,30 +184,27 @@ static int fio_unvme_getevents(struct thread_data *td, unsigned int min,
223184
{
224185
int i;
225186
struct io_u* io_u;
226-
int events = 0;
227-
u64 endtsc = 0;
228-
unvme_data_t* udata = td->io_ops_data;
187+
unvme_iocq_t* iocq = td->io_ops_data;
188+
int ec = 0;
229189

230-
do {
190+
for (;;) {
231191
io_u_qiter(&td->io_u_all, io_u, i) {
232192
if (io_u->engine_data) {
233193
int stat = unvme_apoll(io_u->engine_data, 0);
234194
if (stat == 0) {
235195
io_u->engine_data = NULL;
236-
udata->iocq[udata->tail] = io_u;
237-
TDEBUG("PUT.%d %p", udata->tail, io_u->buf);
238-
if (++udata->tail > td->o.iodepth) udata->tail = 0;
239-
if (++events >= min) return events;
196+
TDEBUG("PUT.%d %p (%d %d)", ec, io_u->buf, min, max);
197+
iocq[ec++] = io_u;
198+
if (ec == max) return ec;
240199
} else if (stat == -1) {
241-
if (endtsc == 0) endtsc = rdtsc() + unvme.rdtsc_timeout;
200+
if (ec >= min) return ec;
242201
} else {
243202
FATAL("\nunvme_apoll return %#x", stat);
244203
}
245204
}
246205
}
247-
} while (rdtsc() < endtsc);
206+
}
248207

249-
FATAL("\nunvme_apoll timeout");
250208
return 0;
251209
}
252210

src/unvme_core.c

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -59,25 +59,26 @@ static unvme_lock_t unvme_lock = 0; ///< session lock
5959
*/
6060
static unvme_desc_t* unvme_desc_get(unvme_queue_t* q)
6161
{
62+
static u32 id = 0;
6263
unvme_desc_t* desc;
6364

6465
if (q->descfree) {
6566
desc = q->descfree;
6667
LIST_DEL(q->descfree, desc);
68+
69+
desc->error = 0;
70+
desc->cidcount = 0;
71+
u64* cidmask = desc->cidmask;
72+
int i = q->masksize >> 3;
73+
while (i--) *cidmask++ = 0;
6774
} else {
6875
desc = zalloc(sizeof(unvme_desc_t) + q->masksize);
76+
desc->id = ++id;
6977
desc->q = q;
7078
}
7179
LIST_ADD(q->desclist, desc);
72-
73-
if (desc == desc->next) {
74-
desc->id = 1;
75-
q->descnext = desc;
76-
} else {
77-
desc->id = desc->prev->id + 1;
78-
}
80+
if (desc == desc->next) q->descpend = desc; // head of pending list
7981
q->desccount++;
80-
8182
return desc;
8283
}
8384

@@ -89,16 +90,14 @@ static void unvme_desc_put(unvme_desc_t* desc)
8990
{
9091
unvme_queue_t* q = desc->q;
9192

92-
if (q->descnext == desc) {
93-
if (desc != desc->next) q->descnext = desc->next;
94-
else q->descnext = NULL;
93+
// check to change the pending head or clear the list
94+
if (desc == q->descpend) {
95+
if (desc != desc->next) q->descpend = desc->next;
96+
else q->descpend = NULL;
9597
}
9698

9799
LIST_DEL(q->desclist, desc);
98-
memset(desc, 0, sizeof(unvme_desc_t) + q->masksize);
99-
desc->q = q;
100100
LIST_ADD(q->descfree, desc);
101-
102101
q->desccount--;
103102
}
104103

@@ -116,19 +115,20 @@ static int unvme_check_completion(unvme_queue_t* q, int timeout, u32* cqe_cs)
116115
u64 endtsc = 0;
117116
do {
118117
cid = nvme_check_completion(q->nvmeq, &err, cqe_cs);
119-
if (cid >= 0 || timeout == 0) break;
118+
if (timeout == 0 || cid >= 0) break;
120119
if (endtsc) sched_yield();
121120
else endtsc = rdtsc() + timeout * q->nvmeq->dev->rdtsec;
122121
} while (rdtsc() < endtsc);
122+
123123
if (cid < 0) return cid;
124124

125125
// find the pending cid in the descriptor list to clear it
126-
unvme_desc_t* desc = q->descnext;
126+
unvme_desc_t* desc = q->descpend;
127127
int b = cid >> 6;
128128
u64 mask = (u64)1 << (cid & 63);
129129
while ((desc->cidmask[b] & mask) == 0) {
130130
desc = desc->next;
131-
if (desc == q->descnext)
131+
if (desc == q->descpend)
132132
FATAL("pending cid %d not found", cid);
133133
}
134134
if (err) desc->error = err;
@@ -142,11 +142,11 @@ static int unvme_check_completion(unvme_queue_t* q, int timeout, u32* cqe_cs)
142142

143143
// check to advance next pending descriptor
144144
if (q->cidcount) {
145-
while (q->descnext->cidcount == 0) q->descnext = q->descnext->next;
145+
while (q->descpend->cidcount == 0) q->descpend = q->descpend->next;
146146
}
147147
PDEBUG("# c q%d={%d %d %#lx} d={%d %d %#lx} @%d",
148148
q->nvmeq->id, cid, q->cidcount, *q->cidmask,
149-
desc->id, desc->cidcount, *desc->cidmask, q->descnext->id);
149+
desc->id, desc->cidcount, *desc->cidmask, q->descpend->id);
150150
return err;
151151
}
152152

@@ -160,32 +160,31 @@ static u16 unvme_get_cid(unvme_desc_t* desc)
160160
u16 cid;
161161
unvme_queue_t* q = desc->q;
162162
int qsize = q->size;
163-
if ((q->cidcount + 1) < qsize) {
164-
cid = q->cid;
165-
while (q->cidmask[cid >> 6] & ((u64)1 << (cid & 63))) {
166-
if (++cid >= qsize) cid = 0;
167-
}
168-
q->cid = cid;
169-
} else {
170-
// if submission queue is full then process pending in descriptor
171-
unvme_desc_t* desc = q->descnext;
172-
while (desc->cidcount) {
173-
int err = unvme_check_completion(q, UNVME_TIMEOUT, NULL);
174-
if (err) {
175-
if (err == -1) FATAL("q%d timeout", q->nvmeq->id);
176-
else ERROR("q%d error %#x", q->nvmeq->id, err);
177-
}
163+
164+
// if submission queue is full then process completion first
165+
if ((q->cidcount + 1) == qsize) {
166+
int err = unvme_check_completion(q, UNVME_TIMEOUT, NULL);
167+
if (err) {
168+
if (err == -1) FATAL("q%d timeout", q->nvmeq->id);
169+
else ERROR("q%d error %#x", q->nvmeq->id, err);
178170
}
179-
cid = q->cid;
171+
}
172+
173+
// get a free cid
174+
cid = q->cid;
175+
while (q->cidmask[cid >> 6] & ((u64)1L << (cid & 63))) {
176+
if (++cid >= qsize) cid = 0;
180177
}
181178

182179
// set cid bit used
183180
int b = cid >> 6;
184181
u64 mask = (u64)1 << (cid & 63);
185-
q->cidmask[b] |= mask;
186-
q->cidcount++;
187182
desc->cidmask[b] |= mask;
188183
desc->cidcount++;
184+
q->cidmask[b] |= mask;
185+
q->cidcount++;
186+
q->cid = cid;
187+
if (++q->cid >= qsize) q->cid = 0;
189188

190189
return cid;
191190
}
@@ -654,7 +653,7 @@ int unvme_do_poll(unvme_desc_t* desc, int timeout, u32* cqe_cs)
654653
while (desc->cidcount) {
655654
if ((err = unvme_check_completion(desc->q, timeout, cqe_cs)) != 0) break;
656655
}
657-
if (desc->id != 0 && desc->cidcount == 0) unvme_desc_put(desc);
656+
if (desc->cidcount == 0) unvme_desc_put(desc);
658657
PDEBUG("# q%d +%d", desc->q->nvmeq->id, desc->q->desccount);
659658

660659
return err;

src/unvme_core.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ typedef struct _unvme_queue {
110110
int desccount; ///< number of pending descriptors
111111
int masksize; ///< bit mask size to allocate
112112
u64* cidmask; ///< cid pending bit mask
113-
unvme_desc_t* desclist; ///< use descriptor list
113+
unvme_desc_t* desclist; ///< used descriptor list
114114
unvme_desc_t* descfree; ///< free descriptor list
115-
unvme_desc_t* descnext; ///< next pending descriptor to process
115+
unvme_desc_t* descpend; ///< pending descriptor list
116116
} unvme_queue_t;
117117

118118
/// Device context

0 commit comments

Comments
 (0)