From 7128c761050908aeae1ce9ebf96941903df1a083 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 10:33:36 -0500 Subject: [PATCH 1/5] refactor(source): Reuse RecursivePathSource loading --- src/cargo/sources/path.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 313159bcbc8..41304dcda07 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -244,12 +244,9 @@ impl<'gctx> RecursivePathSource<'gctx> { /// Returns the packages discovered by this source. It may walk the /// filesystem if package information haven't yet loaded. - pub fn read_packages(&self) -> CargoResult> { - if self.loaded { - Ok(self.packages.clone()) - } else { - self.read_packages_inner() - } + pub fn read_packages(&mut self) -> CargoResult> { + self.load()?; + Ok(self.packages.clone()) } fn read_packages_inner(&self) -> CargoResult> { From 53e5f2bbcaf01d9565ad46d688311383e757bb2b Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 14:14:36 -0500 Subject: [PATCH 2/5] refactor(source): Flatten list_files_walk This prepares the way for moving `walk` from `read_packages` into here. --- src/cargo/sources/path.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 41304dcda07..8924362d63c 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -466,7 +466,9 @@ fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> return list_files_gix(pkg, &repo, &filter, gctx); } } - list_files_walk(pkg, &filter, gctx) + let mut ret = Vec::new(); + list_files_walk(pkg.root(), &mut ret, true, &filter, gctx)?; + Ok(ret) } /// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository @@ -637,7 +639,7 @@ fn list_files_gix( files.extend(list_files_gix(pkg, &sub_repo, filter, gctx)?); } Err(_) => { - walk(&file_path, &mut files, false, filter, gctx)?; + list_files_walk(&file_path, &mut files, false, filter, gctx)?; } } } else if (filter)(&file_path, is_dir) { @@ -656,17 +658,6 @@ fn list_files_gix( /// This is a fallback for [`list_files_gix`] when the package /// is not tracked under a Git repository. fn list_files_walk( - pkg: &Package, - filter: &dyn Fn(&Path, bool) -> bool, - gctx: &GlobalContext, -) -> CargoResult> { - let mut ret = Vec::new(); - walk(pkg.root(), &mut ret, true, filter, gctx)?; - Ok(ret) -} - -/// Helper recursive function for [`list_files_walk`]. -fn walk( path: &Path, ret: &mut Vec, is_root: bool, From 5d43722905f1fd6703b45f61cdf2c36f6eb68f07 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 14:21:23 -0500 Subject: [PATCH 3/5] refactor(source): Pull in read_packages This is tied to the `Source` and only used there. --- src/cargo/ops/cargo_read_manifest.rs | 250 +------------------------- src/cargo/ops/mod.rs | 2 +- src/cargo/sources/path.rs | 253 ++++++++++++++++++++++++++- 3 files changed, 253 insertions(+), 252 deletions(-) diff --git a/src/cargo/ops/cargo_read_manifest.rs b/src/cargo/ops/cargo_read_manifest.rs index 960974ebece..24313956073 100644 --- a/src/cargo/ops/cargo_read_manifest.rs +++ b/src/cargo/ops/cargo_read_manifest.rs @@ -1,15 +1,10 @@ -use std::collections::{HashMap, HashSet}; -use std::fs; -use std::io; -use std::path::{Path, PathBuf}; +use std::path::Path; -use crate::core::{EitherManifest, Manifest, Package, PackageId, SourceId}; +use crate::core::{EitherManifest, Package, SourceId}; use crate::util::errors::CargoResult; -use crate::util::important_paths::find_project_manifest_exact; use crate::util::toml::read_manifest; use crate::util::GlobalContext; -use cargo_util::paths; -use tracing::{info, trace}; +use tracing::trace; pub fn read_package( path: &Path, @@ -33,242 +28,3 @@ pub fn read_package( Ok(Package::new(manifest, path)) } - -#[tracing::instrument(skip(source_id, gctx))] -pub fn read_packages( - path: &Path, - source_id: SourceId, - gctx: &GlobalContext, -) -> CargoResult> { - let mut all_packages = HashMap::new(); - let mut visited = HashSet::::new(); - let mut errors = Vec::::new(); - - trace!( - "looking for root package: {}, source_id={}", - path.display(), - source_id - ); - - walk(path, &mut |dir| { - trace!("looking for child package: {}", dir.display()); - - // Don't recurse into hidden/dot directories unless we're at the toplevel - if dir != path { - let name = dir.file_name().and_then(|s| s.to_str()); - if name.map(|s| s.starts_with('.')) == Some(true) { - return Ok(false); - } - - // Don't automatically discover packages across git submodules - if dir.join(".git").exists() { - return Ok(false); - } - } - - // Don't ever look at target directories - if dir.file_name().and_then(|s| s.to_str()) == Some("target") - && has_manifest(dir.parent().unwrap()) - { - return Ok(false); - } - - if has_manifest(dir) { - read_nested_packages( - dir, - &mut all_packages, - source_id, - gctx, - &mut visited, - &mut errors, - )?; - } - Ok(true) - })?; - - if all_packages.is_empty() { - match errors.pop() { - Some(err) => Err(err), - None => { - if find_project_manifest_exact(path, "cargo.toml").is_ok() { - Err(anyhow::format_err!( - "Could not find Cargo.toml in `{}`, but found cargo.toml please try to rename it to Cargo.toml", - path.display() - )) - } else { - Err(anyhow::format_err!( - "Could not find Cargo.toml in `{}`", - path.display() - )) - } - } - } - } else { - Ok(all_packages.into_iter().map(|(_, v)| v).collect()) - } -} - -fn nested_paths(manifest: &Manifest) -> Vec { - let mut nested_paths = Vec::new(); - let resolved = manifest.resolved_toml(); - let dependencies = resolved - .dependencies - .iter() - .chain(resolved.build_dependencies()) - .chain(resolved.dev_dependencies()) - .chain( - resolved - .target - .as_ref() - .into_iter() - .flat_map(|t| t.values()) - .flat_map(|t| { - t.dependencies - .iter() - .chain(t.build_dependencies()) - .chain(t.dev_dependencies()) - }), - ); - for dep_table in dependencies { - for dep in dep_table.values() { - let cargo_util_schemas::manifest::InheritableDependency::Value(dep) = dep else { - continue; - }; - let cargo_util_schemas::manifest::TomlDependency::Detailed(dep) = dep else { - continue; - }; - let Some(path) = dep.path.as_ref() else { - continue; - }; - nested_paths.push(PathBuf::from(path.as_str())); - } - } - nested_paths -} - -fn walk(path: &Path, callback: &mut dyn FnMut(&Path) -> CargoResult) -> CargoResult<()> { - if !callback(path)? { - trace!("not processing {}", path.display()); - return Ok(()); - } - - // Ignore any permission denied errors because temporary directories - // can often have some weird permissions on them. - let dirs = match fs::read_dir(path) { - Ok(dirs) => dirs, - Err(ref e) if e.kind() == io::ErrorKind::PermissionDenied => return Ok(()), - Err(e) => { - let cx = format!("failed to read directory `{}`", path.display()); - let e = anyhow::Error::from(e); - return Err(e.context(cx)); - } - }; - for dir in dirs { - let dir = dir?; - if dir.file_type()?.is_dir() { - walk(&dir.path(), callback)?; - } - } - Ok(()) -} - -fn has_manifest(path: &Path) -> bool { - find_project_manifest_exact(path, "Cargo.toml").is_ok() -} - -fn read_nested_packages( - path: &Path, - all_packages: &mut HashMap, - source_id: SourceId, - gctx: &GlobalContext, - visited: &mut HashSet, - errors: &mut Vec, -) -> CargoResult<()> { - if !visited.insert(path.to_path_buf()) { - return Ok(()); - } - - let manifest_path = find_project_manifest_exact(path, "Cargo.toml")?; - - let manifest = match read_manifest(&manifest_path, source_id, gctx) { - Err(err) => { - // Ignore malformed manifests found on git repositories - // - // git source try to find and read all manifests from the repository - // but since it's not possible to exclude folders from this search - // it's safer to ignore malformed manifests to avoid - // - // TODO: Add a way to exclude folders? - info!( - "skipping malformed package found at `{}`", - path.to_string_lossy() - ); - errors.push(err.into()); - return Ok(()); - } - Ok(tuple) => tuple, - }; - - let manifest = match manifest { - EitherManifest::Real(manifest) => manifest, - EitherManifest::Virtual(..) => return Ok(()), - }; - let nested = nested_paths(&manifest); - let pkg = Package::new(manifest, &manifest_path); - - let pkg_id = pkg.package_id(); - use std::collections::hash_map::Entry; - match all_packages.entry(pkg_id) { - Entry::Vacant(v) => { - v.insert(pkg); - } - Entry::Occupied(_) => { - // We can assume a package with publish = false isn't intended to be seen - // by users so we can hide the warning about those since the user is unlikely - // to care about those cases. - if pkg.publish().is_none() { - let _ = gctx.shell().warn(format!( - "skipping duplicate package `{}` found at `{}`", - pkg.name(), - path.display() - )); - } - } - } - - // Registry sources are not allowed to have `path=` dependencies because - // they're all translated to actual registry dependencies. - // - // We normalize the path here ensure that we don't infinitely walk around - // looking for crates. By normalizing we ensure that we visit this crate at - // most once. - // - // TODO: filesystem/symlink implications? - if !source_id.is_registry() { - for p in nested.iter() { - let path = paths::normalize_path(&path.join(p)); - let result = - read_nested_packages(&path, all_packages, source_id, gctx, visited, errors); - // Ignore broken manifests found on git repositories. - // - // A well formed manifest might still fail to load due to reasons - // like referring to a "path" that requires an extra build step. - // - // See https://github.com/rust-lang/cargo/issues/6822. - if let Err(err) = result { - if source_id.is_git() { - info!( - "skipping nested package found at `{}`: {:?}", - path.display(), - &err, - ); - errors.push(err); - } else { - return Err(err); - } - } - } - } - - Ok(()) -} diff --git a/src/cargo/ops/mod.rs b/src/cargo/ops/mod.rs index 49dfaf53fc8..b32b2147dc9 100644 --- a/src/cargo/ops/mod.rs +++ b/src/cargo/ops/mod.rs @@ -12,7 +12,7 @@ pub use self::cargo_new::{init, new, NewOptions, NewProjectKind, VersionControl} pub use self::cargo_output_metadata::{output_metadata, ExportInfo, OutputMetadataOptions}; pub use self::cargo_package::{check_yanked, package, package_one, PackageOpts}; pub use self::cargo_pkgid::pkgid; -pub use self::cargo_read_manifest::{read_package, read_packages}; +pub use self::cargo_read_manifest::read_package; pub use self::cargo_run::run; pub use self::cargo_test::{run_benches, run_tests, TestOptions}; pub use self::cargo_uninstall::uninstall; diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 8924362d63c..cae6981498a 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -1,14 +1,21 @@ +use std::collections::{HashMap, HashSet}; use std::fmt::{self, Debug, Formatter}; +use std::fs; +use std::io; use std::path::{Path, PathBuf}; use std::task::Poll; -use crate::core::{Dependency, Package, PackageId, SourceId}; +use crate::core::{Dependency, EitherManifest, Manifest, Package, PackageId, SourceId}; use crate::ops; use crate::sources::source::MaybePackage; use crate::sources::source::QueryKind; use crate::sources::source::Source; use crate::sources::IndexSummary; -use crate::util::{internal, CargoResult, GlobalContext}; +use crate::util::errors::CargoResult; +use crate::util::important_paths::find_project_manifest_exact; +use crate::util::internal; +use crate::util::toml::read_manifest; +use crate::util::GlobalContext; use anyhow::Context as _; use cargo_util::paths; use filetime::FileTime; @@ -16,7 +23,7 @@ use gix::bstr::{BString, ByteVec}; use gix::dir::entry::Status; use gix::index::entry::Stage; use ignore::gitignore::GitignoreBuilder; -use tracing::{debug, trace, warn}; +use tracing::{debug, info, trace, warn}; use walkdir::WalkDir; /// A source that represents a package gathered at the root @@ -250,7 +257,7 @@ impl<'gctx> RecursivePathSource<'gctx> { } fn read_packages_inner(&self) -> CargoResult> { - ops::read_packages(&self.path, self.source_id, self.gctx) + read_packages(&self.path, self.source_id, self.gctx) } /// List all files relevant to building this package inside this source. @@ -750,3 +757,241 @@ fn last_modified_file( trace!("last modified file {}: {}", path.display(), max); Ok((max, max_path)) } + +fn read_packages( + path: &Path, + source_id: SourceId, + gctx: &GlobalContext, +) -> CargoResult> { + let mut all_packages = HashMap::new(); + let mut visited = HashSet::::new(); + let mut errors = Vec::::new(); + + trace!( + "looking for root package: {}, source_id={}", + path.display(), + source_id + ); + + walk(path, &mut |dir| { + trace!("looking for child package: {}", dir.display()); + + // Don't recurse into hidden/dot directories unless we're at the toplevel + if dir != path { + let name = dir.file_name().and_then(|s| s.to_str()); + if name.map(|s| s.starts_with('.')) == Some(true) { + return Ok(false); + } + + // Don't automatically discover packages across git submodules + if dir.join(".git").exists() { + return Ok(false); + } + } + + // Don't ever look at target directories + if dir.file_name().and_then(|s| s.to_str()) == Some("target") + && has_manifest(dir.parent().unwrap()) + { + return Ok(false); + } + + if has_manifest(dir) { + read_nested_packages( + dir, + &mut all_packages, + source_id, + gctx, + &mut visited, + &mut errors, + )?; + } + Ok(true) + })?; + + if all_packages.is_empty() { + match errors.pop() { + Some(err) => Err(err), + None => { + if find_project_manifest_exact(path, "cargo.toml").is_ok() { + Err(anyhow::format_err!( + "Could not find Cargo.toml in `{}`, but found cargo.toml please try to rename it to Cargo.toml", + path.display() + )) + } else { + Err(anyhow::format_err!( + "Could not find Cargo.toml in `{}`", + path.display() + )) + } + } + } + } else { + Ok(all_packages.into_iter().map(|(_, v)| v).collect()) + } +} + +fn nested_paths(manifest: &Manifest) -> Vec { + let mut nested_paths = Vec::new(); + let resolved = manifest.resolved_toml(); + let dependencies = resolved + .dependencies + .iter() + .chain(resolved.build_dependencies()) + .chain(resolved.dev_dependencies()) + .chain( + resolved + .target + .as_ref() + .into_iter() + .flat_map(|t| t.values()) + .flat_map(|t| { + t.dependencies + .iter() + .chain(t.build_dependencies()) + .chain(t.dev_dependencies()) + }), + ); + for dep_table in dependencies { + for dep in dep_table.values() { + let cargo_util_schemas::manifest::InheritableDependency::Value(dep) = dep else { + continue; + }; + let cargo_util_schemas::manifest::TomlDependency::Detailed(dep) = dep else { + continue; + }; + let Some(path) = dep.path.as_ref() else { + continue; + }; + nested_paths.push(PathBuf::from(path.as_str())); + } + } + nested_paths +} + +fn walk(path: &Path, callback: &mut dyn FnMut(&Path) -> CargoResult) -> CargoResult<()> { + if !callback(path)? { + trace!("not processing {}", path.display()); + return Ok(()); + } + + // Ignore any permission denied errors because temporary directories + // can often have some weird permissions on them. + let dirs = match fs::read_dir(path) { + Ok(dirs) => dirs, + Err(ref e) if e.kind() == io::ErrorKind::PermissionDenied => return Ok(()), + Err(e) => { + let cx = format!("failed to read directory `{}`", path.display()); + let e = anyhow::Error::from(e); + return Err(e.context(cx)); + } + }; + for dir in dirs { + let dir = dir?; + if dir.file_type()?.is_dir() { + walk(&dir.path(), callback)?; + } + } + Ok(()) +} + +fn has_manifest(path: &Path) -> bool { + find_project_manifest_exact(path, "Cargo.toml").is_ok() +} + +fn read_nested_packages( + path: &Path, + all_packages: &mut HashMap, + source_id: SourceId, + gctx: &GlobalContext, + visited: &mut HashSet, + errors: &mut Vec, +) -> CargoResult<()> { + if !visited.insert(path.to_path_buf()) { + return Ok(()); + } + + let manifest_path = find_project_manifest_exact(path, "Cargo.toml")?; + + let manifest = match read_manifest(&manifest_path, source_id, gctx) { + Err(err) => { + // Ignore malformed manifests found on git repositories + // + // git source try to find and read all manifests from the repository + // but since it's not possible to exclude folders from this search + // it's safer to ignore malformed manifests to avoid + // + // TODO: Add a way to exclude folders? + info!( + "skipping malformed package found at `{}`", + path.to_string_lossy() + ); + errors.push(err.into()); + return Ok(()); + } + Ok(tuple) => tuple, + }; + + let manifest = match manifest { + EitherManifest::Real(manifest) => manifest, + EitherManifest::Virtual(..) => return Ok(()), + }; + let nested = nested_paths(&manifest); + let pkg = Package::new(manifest, &manifest_path); + + let pkg_id = pkg.package_id(); + use std::collections::hash_map::Entry; + match all_packages.entry(pkg_id) { + Entry::Vacant(v) => { + v.insert(pkg); + } + Entry::Occupied(_) => { + // We can assume a package with publish = false isn't intended to be seen + // by users so we can hide the warning about those since the user is unlikely + // to care about those cases. + if pkg.publish().is_none() { + let _ = gctx.shell().warn(format!( + "skipping duplicate package `{}` found at `{}`", + pkg.name(), + path.display() + )); + } + } + } + + // Registry sources are not allowed to have `path=` dependencies because + // they're all translated to actual registry dependencies. + // + // We normalize the path here ensure that we don't infinitely walk around + // looking for crates. By normalizing we ensure that we visit this crate at + // most once. + // + // TODO: filesystem/symlink implications? + if !source_id.is_registry() { + for p in nested.iter() { + let path = paths::normalize_path(&path.join(p)); + let result = + read_nested_packages(&path, all_packages, source_id, gctx, visited, errors); + // Ignore broken manifests found on git repositories. + // + // A well formed manifest might still fail to load due to reasons + // like referring to a "path" that requires an extra build step. + // + // See https://github.com/rust-lang/cargo/issues/6822. + if let Err(err) = result { + if source_id.is_git() { + info!( + "skipping nested package found at `{}`: {:?}", + path.display(), + &err, + ); + errors.push(err); + } else { + return Err(err); + } + } + } + } + + Ok(()) +} From 526138602af267e917a04bddc79961533b14b61e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 15:36:25 -0500 Subject: [PATCH 4/5] refactor(source): Flatten RecursivePathSource::read_packages_inner --- src/cargo/sources/path.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index cae6981498a..bdb74450ac2 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -256,10 +256,6 @@ impl<'gctx> RecursivePathSource<'gctx> { Ok(self.packages.clone()) } - fn read_packages_inner(&self) -> CargoResult> { - read_packages(&self.path, self.source_id, self.gctx) - } - /// List all files relevant to building this package inside this source. /// /// This function will use the appropriate methods to determine the @@ -293,7 +289,7 @@ impl<'gctx> RecursivePathSource<'gctx> { /// Discovers packages inside this source if it hasn't yet done. pub fn load(&mut self) -> CargoResult<()> { if !self.loaded { - self.packages = self.read_packages_inner()?; + self.packages = read_packages(&self.path, self.source_id, self.gctx)?; self.loaded = true; } From 848dd7f1cd2a503c83b724776e2d50adbd07e09c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 15:45:04 -0500 Subject: [PATCH 5/5] refactor(source): Reuse the package hash map Primary benefit: looking to track more state in `RecursivePathSource` and this is a stepping stone. Minor benefits: - Cleaner - Avoid re-allocating - Faster lookup for `download` --- src/cargo/sources/path.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index bdb74450ac2..43dc794c807 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -226,7 +226,7 @@ pub struct RecursivePathSource<'gctx> { /// Whether this source has loaded all package information it may contain. loaded: bool, /// Packages that this sources has discovered. - packages: Vec, + packages: HashMap, gctx: &'gctx GlobalContext, } @@ -244,7 +244,7 @@ impl<'gctx> RecursivePathSource<'gctx> { source_id, path: root.to_path_buf(), loaded: false, - packages: Vec::new(), + packages: Default::default(), gctx, } } @@ -253,7 +253,7 @@ impl<'gctx> RecursivePathSource<'gctx> { /// filesystem if package information haven't yet loaded. pub fn read_packages(&mut self) -> CargoResult> { self.load()?; - Ok(self.packages.clone()) + Ok(self.packages.iter().map(|(_, v)| v.clone()).collect()) } /// List all files relevant to building this package inside this source. @@ -311,7 +311,7 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { f: &mut dyn FnMut(IndexSummary), ) -> Poll> { self.load()?; - for s in self.packages.iter().map(|p| p.summary()) { + for s in self.packages.values().map(|p| p.summary()) { let matched = match kind { QueryKind::Exact => dep.matches(s), QueryKind::Alternatives => true, @@ -339,7 +339,7 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { fn download(&mut self, id: PackageId) -> CargoResult { trace!("getting packages; id={}", id); self.load()?; - let pkg = self.packages.iter().find(|pkg| pkg.package_id() == id); + let pkg = self.packages.get(&id); pkg.cloned() .map(MaybePackage::Ready) .ok_or_else(|| internal(format!("failed to find {} in path source", id))) @@ -758,7 +758,7 @@ fn read_packages( path: &Path, source_id: SourceId, gctx: &GlobalContext, -) -> CargoResult> { +) -> CargoResult> { let mut all_packages = HashMap::new(); let mut visited = HashSet::::new(); let mut errors = Vec::::new(); @@ -823,7 +823,7 @@ fn read_packages( } } } else { - Ok(all_packages.into_iter().map(|(_, v)| v).collect()) + Ok(all_packages) } }