Skip to content

Commit

Permalink
io_uring/rsrc: optimise registered huge pages
Browse files Browse the repository at this point in the history
When registering huge pages, internally io_uring will split them into
many PAGE_SIZE bvec entries. That's bad for performance as drivers need
to eventually dma-map the data and will do it individually for each bvec
entry. Coalesce huge pages into one large bvec.

Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
isilence authored and axboe committed Feb 22, 2023
1 parent b000ae0 commit 57bebf8
Showing 1 changed file with 32 additions and 6 deletions.
38 changes: 32 additions & 6 deletions io_uring/rsrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
unsigned long off;
size_t size;
int ret, nr_pages, i;
struct folio *folio;

*pimu = ctx->dummy_ubuf;
if (!iov->iov_base)
Expand All @@ -1224,6 +1225,21 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
goto done;
}

/* If it's a huge page, try to coalesce them into a single bvec entry */
if (nr_pages > 1) {
folio = page_folio(pages[0]);
for (i = 1; i < nr_pages; i++) {
if (page_folio(pages[i]) != folio) {
folio = NULL;
break;
}
}
if (folio) {
folio_put_refs(folio, nr_pages - 1);
nr_pages = 1;
}
}

imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
if (!imu)
goto done;
Expand All @@ -1236,6 +1252,17 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,

off = (unsigned long) iov->iov_base & ~PAGE_MASK;
size = iov->iov_len;
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->ubuf_end = imu->ubuf + iov->iov_len;
imu->nr_bvecs = nr_pages;
*pimu = imu;
ret = 0;

if (folio) {
bvec_set_page(&imu->bvec[0], pages[0], size, off);
goto done;
}
for (i = 0; i < nr_pages; i++) {
size_t vec_len;

Expand All @@ -1244,12 +1271,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
off = 0;
size -= vec_len;
}
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->ubuf_end = imu->ubuf + iov->iov_len;
imu->nr_bvecs = nr_pages;
*pimu = imu;
ret = 0;
done:
if (ret)
kvfree(imu);
Expand Down Expand Up @@ -1364,6 +1385,11 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
const struct bio_vec *bvec = imu->bvec;

if (offset <= bvec->bv_len) {
/*
* Note, huge pages buffers consists of one large
* bvec entry and should always go this way. The other
* branch doesn't expect non PAGE_SIZE'd chunks.
*/
iter->bvec = bvec;
iter->nr_segs = bvec->bv_len;
iter->count -= offset;
Expand Down

0 comments on commit 57bebf8

Please sign in to comment.