From ca1a2a645725709f6f02da243ab34a6fbba5d8e3 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 21 Sep 2024 18:50:29 +0200 Subject: [PATCH] wait for two short reads before uncapping the max read size for disk IO: 1st short read = probably at end of file 2nd short read = confirming that it's indeed EOF --- library/std/src/io/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 2a4262b236780..80eb4f0ce9692 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -398,8 +398,7 @@ where // - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820) // - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads // - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems -// at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost -// proportional to buffer size (#110650) +// at the same time, i.e. small reads suffer from syscall overhead, all reads incur costs proportional to buffer size (#110650) // pub(crate) fn default_read_to_end( r: &mut R, @@ -444,6 +443,8 @@ pub(crate) fn default_read_to_end( } } + let mut consecutive_short_reads = 0; + loop { if buf.len() == buf.capacity() && buf.capacity() == start_cap { // The buffer might be an exact fit. Let's read into a probe buffer @@ -489,6 +490,12 @@ pub(crate) fn default_read_to_end( return Ok(buf.len() - start_len); } + if bytes_read < buf_len { + consecutive_short_reads += 1; + } else { + consecutive_short_reads = 0; + } + // store how much was initialized but not filled initialized = unfilled_but_initialized; @@ -503,7 +510,10 @@ pub(crate) fn default_read_to_end( // The reader is returning short reads but it doesn't call ensure_init(). // In that case we no longer need to restrict read sizes to avoid // initialization costs. - if !was_fully_initialized { + // When reading from disk we usually don't get any short reads except at EOF. + // So we wait for at least 2 short reads before uncapping the read buffer; + // this helps with the Windows issue. + if !was_fully_initialized && consecutive_short_reads > 1 { max_read_size = usize::MAX; }