Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

don't splice from files into pipes in io::copy #108283

Merged
merged 2 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 46 additions & 16 deletions library/std/src/sys/unix/kernel_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@
//! Once it has obtained all necessary pieces and brought any wrapper types into a state where they
//! can be safely bypassed it will attempt to use the `copy_file_range(2)`,
//! `sendfile(2)` or `splice(2)` syscalls to move data directly between file descriptors.
//! Since those syscalls have requirements that cannot be fully checked in advance and
//! gathering additional information about file descriptors would require additional syscalls
//! anyway it simply attempts to use them one after another (guided by inaccurate hints) to
//! figure out which one works and falls back to the generic read-write copy loop if none of them
//! does.
//! Since those syscalls have requirements that cannot be fully checked in advance it attempts
//! to use them one after another (guided by hints) to figure out which one works and
//! falls back to the generic read-write copy loop if none of them does.
//! Once a working syscall is found for a pair of file descriptors it will be called in a loop
//! until the copy operation is completed.
//!
Expand Down Expand Up @@ -84,14 +82,10 @@ pub(crate) fn copy_spec<R: Read + ?Sized, W: Write + ?Sized>(
/// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred
/// type may be wrong.
enum FdMeta {
/// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata
/// because it is cheaper than probing all possible syscalls (reader side)
Metadata(Metadata),
Socket,
Pipe,
/// We don't have any metadata, e.g. because the original type was `File` which can represent
/// any `FileType` and we did not query the metadata either since it did not seem beneficial
/// (writer side)
/// We don't have any metadata because the stat syscall failed
NoneObtained,
}

Expand Down Expand Up @@ -131,6 +125,39 @@ impl FdMeta {
}
}

/// Returns true either if changes made to the source after a sendfile/splice call won't become
/// visible in the sink or the source has explicitly opted into such behavior (e.g. by splicing
/// a file into a pipe, the pipe being the source in this case).
///
/// This will prevent File -> Pipe and File -> Socket splicing/sendfile optimizations to uphold
/// the Read/Write API semantics of io::copy.
///
/// Note: This is not 100% airtight, the caller can use the RawFd conversion methods to turn a
/// regular file into a TcpSocket which will be treated as a socket here without checking.
fn safe_kernel_copy(source: &FdMeta, sink: &FdMeta) -> bool {
match (source, sink) {
// Data arriving from a socket is safe because the sender can't modify the socket buffer.
// Data arriving from a pipe is safe(-ish) because either the sender *copied*
// the bytes into the pipe OR explicitly performed an operation that enables zero-copy,
// thus promising not to modify the data later.
(FdMeta::Socket, _) => true,
(FdMeta::Pipe, _) => true,
(FdMeta::Metadata(meta), _)
if meta.file_type().is_fifo() || meta.file_type().is_socket() =>
{
true
}
// Data going into non-pipes/non-sockets is safe because the "later changes may become visible" issue
// only happens for pages sitting in send buffers or pipes.
(_, FdMeta::Metadata(meta))
if !meta.file_type().is_fifo() && !meta.file_type().is_socket() =>
{
true
}
_ => false,
}
}

struct CopyParams(FdMeta, Option<RawFd>);

struct Copier<'a, 'b, R: Read + ?Sized, W: Write + ?Sized> {
Expand Down Expand Up @@ -186,7 +213,8 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
// So we just try and fallback if needed.
// If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead
// fall back to the generic copy loop.
if input_meta.potential_sendfile_source() {
if input_meta.potential_sendfile_source() && safe_kernel_copy(&input_meta, &output_meta)
{
let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write);
result.update_take(reader);

Expand All @@ -197,7 +225,9 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
}
}

if input_meta.maybe_fifo() || output_meta.maybe_fifo() {
if (input_meta.maybe_fifo() || output_meta.maybe_fifo())
&& safe_kernel_copy(&input_meta, &output_meta)
{
let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write);
result.update_take(reader);

Expand Down Expand Up @@ -298,13 +328,13 @@ impl CopyRead for &File {

impl CopyWrite for File {
fn properties(&self) -> CopyParams {
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
}
}

impl CopyWrite for &File {
fn properties(&self) -> CopyParams {
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
CopyParams(fd_to_meta(*self), Some(self.as_raw_fd()))
}
}

Expand Down Expand Up @@ -401,13 +431,13 @@ impl CopyRead for StdinLock<'_> {

impl CopyWrite for StdoutLock<'_> {
fn properties(&self) -> CopyParams {
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
}
}

impl CopyWrite for StderrLock<'_> {
fn properties(&self) -> CopyParams {
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
}
}

Expand Down
42 changes: 42 additions & 0 deletions library/std/src/sys/unix/kernel_copy/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,48 @@ fn copies_append_mode_sink() -> Result<()> {
Ok(())
}

#[test]
fn dont_splice_pipes_from_files() -> Result<()> {
// splicing to a pipe and then modifying the source could lead to changes
// becoming visible in an unexpected order.

use crate::io::SeekFrom;
use crate::os::unix::fs::FileExt;
use crate::process::{ChildStdin, ChildStdout};
use crate::sys_common::FromInner;

let (read_end, write_end) = crate::sys::pipe::anon_pipe()?;

let mut read_end = ChildStdout::from_inner(read_end);
let mut write_end = ChildStdin::from_inner(write_end);

let tmp_path = tmpdir();
let file = tmp_path.join("to_be_modified");
let mut file =
crate::fs::OpenOptions::new().create_new(true).read(true).write(true).open(file)?;

const SZ: usize = libc::PIPE_BUF as usize;

// put data in page cache
let mut buf: [u8; SZ] = [0x01; SZ];
file.write_all(&buf).unwrap();

// copy page into pipe
file.seek(SeekFrom::Start(0)).unwrap();
assert!(io::copy(&mut file, &mut write_end).unwrap() == SZ as u64);

// modify file
buf[0] = 0x02;
file.write_at(&buf, 0).unwrap();

// read from pipe
read_end.read_exact(buf.as_mut_slice()).unwrap();

assert_eq!(buf[0], 0x01, "data in pipe should reflect the original, not later modifications");

Ok(())
}

#[bench]
fn bench_file_to_file_copy(b: &mut test::Bencher) {
const BYTES: usize = 128 * 1024;
Expand Down