Skip to content

Commit

Permalink
Add upward searching as an option
Browse files Browse the repository at this point in the history
  • Loading branch information
chipsenkbeil committed May 12, 2023
1 parent bbf74f1 commit 5b19870
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `distant spawn` is a refactoring of `distant client action proc-spawn`
with `distant client lsp` merged in using the `--lsp` flag
- `distant system-info` is a refactoring of `distant client action system-info`
- Search now supports `upward` as a directional setting to traverse upward
looking for results rather than recursing downward

### Changed

Expand Down
195 changes: 185 additions & 10 deletions distant-core/src/api/local/state/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,21 +297,51 @@ impl SearchQueryExecutor {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "missing paths"));
}

let mut walker_builder = WalkBuilder::new(&query.paths[0]);
for path in &query.paths[1..] {
// Build our list of paths so we can ensure we weed out duplicates
let mut target_paths = Vec::new();
for mut path in query.paths.iter().map(Deref::deref) {
// For each explicit path, we will add it directly UNLESS we
// are searching upward and have a max depth > 0 to avoid
// searching this path twice
if !query.options.upward || query.options.max_depth == Some(0) {
target_paths.push(path);
}

// For going in the upward direction, we will add ancestor paths as long
// as the max depth allows it
if query.options.upward {
let mut remaining = query.options.max_depth;
if query.options.max_depth.is_none() || query.options.max_depth > Some(0) {
while let Some(parent) = path.parent() {
// If we have a maximum depth and it has reached zero, we
// don't want to include any more paths
if remaining == Some(0) {
break;
}

path = parent;
target_paths.push(path);

if let Some(x) = remaining.as_mut() {
*x -= 1;
}
}
}
}
}

target_paths.sort_unstable();
target_paths.dedup();

// Construct the walker with our paths
let mut walker_builder = WalkBuilder::new(target_paths[0]);
for path in &target_paths[1..] {
walker_builder.add(path);
}

// Apply common configuration options to our walker
walker_builder
.follow_links(query.options.follow_symbolic_links)
.max_depth(
query
.options
.max_depth
.as_ref()
.copied()
.map(|d| d as usize),
)
.threads(cmp::min(MAXIMUM_SEARCH_THREADS, num_cpus::get()))
.types(
TypesBuilder::new()
Expand All @@ -321,6 +351,24 @@ impl SearchQueryExecutor {
)
.skip_stdout(true);

if query.options.upward {
// If traversing upward, we need to use max depth to determine how many
// path segments to support, break those up, and add them. The max
// depth setting itself should be 1 to avoid searching anything but
// the immediate children of each path component
walker_builder.max_depth(Some(1));
} else {
// Otherwise, we apply max depth like expected
walker_builder.max_depth(
query
.options
.max_depth
.as_ref()
.copied()
.map(|d| d as usize),
);
}

Ok(Self {
id: rand::random(),
query,
Expand Down Expand Up @@ -1895,4 +1943,131 @@ mod tests {

assert_eq!(rx.recv().await, None);
}

#[test(tokio::test)]
async fn should_support_searching_upward_with_max_depth_applying_in_reverse() {
let root = setup_dir(vec![
("path/to/file1.txt", ""),
("path/to/file2.txt", ""),
("path/to/child/file1.txt", ""),
("path/to/child/file2.txt", ""),
("path/file1.txt", ""),
("path/file2.txt", ""),
("other/file1.txt", ""),
("other/file2.txt", ""),
("file1.txt", ""),
("file2.txt", ""),
]);

// Make a path within root path
let p = |path: &str| root.child(make_path(path)).to_path_buf();

async fn test_max_depth(
path: PathBuf,
regex: &str,
depth: impl Into<Option<u64>>,
expected_paths: Vec<PathBuf>,
) {
let state = SearchState::new();
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
paths: vec![path],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(regex),
options: SearchQueryOptions {
max_depth: depth.into(),
upward: true,
..Default::default()
},
};

let search_id = state.start(query, Box::new(reply)).await.unwrap();

// If we expect to get no paths, then there won't be results, otherwise check
if !expected_paths.is_empty() {
let mut paths = get_matches(rx.recv().await.unwrap())
.into_iter()
.filter_map(|m| m.into_path_match())
.map(|m| m.path)
.collect::<Vec<_>>();

paths.sort_unstable();

assert_eq!(paths, expected_paths);
}

let data = rx.recv().await;
assert_eq!(
data,
Some(DistantResponseData::SearchDone { id: search_id })
);

assert_eq!(rx.recv().await, None);
}

// Maximum depth of 0 should only include current file if it matches
test_max_depth(
p("path/to/file1.txt"),
"to",
0,
vec![p("path/to/file1.txt")],
)
.await;
test_max_depth(p("path/to"), "other", 0, vec![]).await;

// Maximum depth of 0 will still look through an explicit path's entries
test_max_depth(
p("path/to"),
"to",
0,
vec![
p("path/to"),
p("path/to/child"),
p("path/to/file1.txt"),
p("path/to/file2.txt"),
],
)
.await;

// Maximum depth of 1 should only include path and its parent directory & entries
test_max_depth(
p("path/to/file1.txt"),
"to",
1,
vec![
p("path/to"),
p("path/to/child"),
p("path/to/file1.txt"),
p("path/to/file2.txt"),
],
)
.await;

// Maximum depth of 2 should search path, parent, and grandparent
test_max_depth(
p("path/to/file1.txt"),
"file1",
2,
vec![p("path/file1.txt"), p("path/to/file1.txt")],
)
.await;

// Maximum depth greater than total path elements should just search all of them
test_max_depth(
p("path/to/file1.txt"),
"file1",
99,
vec![p("file1.txt"), p("path/file1.txt"), p("path/to/file1.txt")],
)
.await;

// No max depth will also search all ancestor paths
test_max_depth(
p("path/to/file1.txt"),
"file1",
None,
vec![p("file1.txt"), p("path/file1.txt"), p("path/to/file1.txt")],
)
.await;
}
}
31 changes: 18 additions & 13 deletions distant-core/src/data/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,26 +170,33 @@ impl FromStr for SearchQueryCondition {
/// Options associated with a search query
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[serde(default)]
pub struct SearchQueryOptions {
/// Restrict search to only these file types (otherwise all are allowed)
#[serde(default)]
/// Restrict search to only these file types (otherwise all are allowed).
pub allowed_file_types: HashSet<FileType>,

/// Regex to use to filter paths being searched to only those that match the include condition
#[serde(default)]
/// Regex to use to filter paths being searched to only those that match the include condition.
pub include: Option<SearchQueryCondition>,

/// Regex to use to filter paths being searched to only those that do not match the exclude
/// Regex to use to filter paths being searched to only those that do not match the exclude.
/// condition
#[serde(default)]
pub exclude: Option<SearchQueryCondition>,

/// Search should follow symbolic links
#[serde(default)]
/// If true, will search upward through parent directories rather than the traditional downward
/// search that recurses through all children directories.
///
/// Note that this will use maximum depth to apply to the reverse direction, and will only look
/// through each ancestor directory's immediate entries. In other words, this will not result
/// in recursing through sibling directories.
///
/// An upward search will ALWAYS search the contents of a directory, so this means providing a
/// path to a directory will search its entries EVEN if the max_depth is 0.
pub upward: bool,

/// Search should follow symbolic links.
pub follow_symbolic_links: bool,

/// Maximum results to return before stopping the query
#[serde(default)]
/// Maximum results to return before stopping the query.
pub limit: Option<u64>,

/// Maximum depth (directories) to search
Expand All @@ -200,12 +207,10 @@ pub struct SearchQueryOptions {
///
/// Note that this will not simply filter the entries of the iterator, but it will actually
/// avoid descending into directories when the depth is exceeded.
#[serde(default)]
pub max_depth: Option<u64>,

/// Amount of results to batch before sending back excluding final submission that will always
/// include the remaining results even if less than pagination request
#[serde(default)]
/// include the remaining results even if less than pagination request.
pub pagination: Option<u64>,
}

Expand Down
13 changes: 13 additions & 0 deletions src/options/common/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ pub struct CliSearchQueryOptions {
#[clap(long)]
pub exclude: Option<CliSearchQueryCondition>,

/// Search upward through parent directories rather than the traditional downward search that
/// recurses through all children directories.
///
/// Note that this will use maximum depth to apply to the reverse direction, and will only look
/// through each ancestor directory's immediate entries. In other words, this will not result
/// in recursing through sibling directories.
///
/// An upward search will ALWAYS search the contents of a directory, so this means providing a
/// path to a directory will search its entries EVEN if the max_depth is 0.
#[clap(long)]
pub upward: bool,

/// Search should follow symbolic links
#[clap(long)]
pub follow_symbolic_links: bool,
Expand Down Expand Up @@ -52,6 +64,7 @@ impl From<CliSearchQueryOptions> for SearchQueryOptions {
allowed_file_types: x.allowed_file_types,
include: x.include,
exclude: x.exclude,
upward: x.upward,
follow_symbolic_links: x.follow_symbolic_links,
limit: x.limit,
max_depth: x.max_depth,
Expand Down

0 comments on commit 5b19870

Please sign in to comment.