Skip to content

Commit

Permalink
Improve speculative prefetcher for block cloning
Browse files Browse the repository at this point in the history
- Issue prescient prefetches for demand indirect blocks after the
first one.  It should be quite rare for reads/writes, but much more
useful for cloning due to much bigger (up to 1022 blocks) accesses.
It covers the gap during the first couple accesses when we can not
speculate yet, but we know what is needed right now.  It reduces
dbuf_hold() sync read delays in dmu_buf_hold_array_by_dnode().
 - Increase maximum prefetch distance for indirect blocks from 64
to 128MB.  It should cover the maximum 1022 blocks of block cloning
access size in case of default 128KB recordsize used.  In case of
bigger recordsize the above prescient prefetch should also help.

Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by:	Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
Closes #16814
  • Loading branch information
amotin authored Dec 4, 2024
1 parent c33a55b commit a01504b
Showing 1 changed file with 29 additions and 5 deletions.
34 changes: 29 additions & 5 deletions module/zfs/dmu_zfetch.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
/* max bytes to prefetch per stream (default 64MB) */
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
#endif
/* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max bytes to prefetch indirects for per stream (default 128MB) */
unsigned int zfetch_max_idistance = 128 * 1024 * 1024;
/* max request reorder distance within a stream (default 16MB) */
unsigned int zfetch_max_reorder = 16 * 1024 * 1024;
/* Max log2 fraction of holes in a stream */
Expand Down Expand Up @@ -472,6 +472,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
zstream_t *zs;
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
zfs_prefetch_type_t os_prefetch = zf->zf_dnode->dn_objset->os_prefetch;
int64_t ipf_start, ipf_end;

if (zfs_prefetch_disable || os_prefetch == ZFS_PREFETCH_NONE)
return (NULL);
Expand Down Expand Up @@ -571,13 +572,13 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
* This access is not part of any existing stream. Create a new
* stream for it unless we are at the end of file.
*/
ASSERT0P(zs);
if (end_blkid < maxblkid)
dmu_zfetch_stream_create(zf, end_blkid);
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_misses);
return (NULL);
ipf_start = 0;
goto prescient;

hit:
nblks = dmu_zfetch_hit(zs, nblks);
Expand Down Expand Up @@ -650,6 +651,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
pf_nblks = zs->zs_ipf_dist >> dbs;
if (zs->zs_ipf_start < zs->zs_pf_end)
zs->zs_ipf_start = zs->zs_pf_end;
ipf_start = zs->zs_ipf_end;
if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;

Expand All @@ -658,8 +660,30 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
zfs_refcount_add(&zs->zs_callers, NULL);
mutex_exit(&zf->zf_lock);

prescient:
/*
* Prefetch the following indirect blocks for this access to reduce
* dbuf_hold() sync read delays in dmu_buf_hold_array_by_dnode().
* This covers the gap during the first couple accesses when we can
* not predict the future yet, but know what is needed right now.
* This should be very rare for reads/writes to need more than one
* indirect, but more useful for cloning due to much bigger accesses.
*/
ipf_start = MAX(ipf_start, blkid + 1);
int epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
ipf_end = P2ROUNDUP(end_blkid, 1 << epbs) >> epbs;

int issued = 0;
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
issued += dbuf_prefetch(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_SYNC_READ, ARC_FLAG_PRESCIENT_PREFETCH);
}

if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
if (issued)
ZFETCHSTAT_ADD(zfetchstat_io_issued, issued);
return (zs);
}

Expand Down

0 comments on commit a01504b

Please sign in to comment.