diff --git a/Cargo.lock b/Cargo.lock index d9de33f81..cb8bb9264 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2418,6 +2418,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "similar", "tokio", "tree-sitter-facade-sg", ] diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index d5f400436..823aeb3a0 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -32,6 +32,7 @@ serde_json = { version = "1.0.114" } futures = { version = "0.3.29", optional = true } http = { version = "0.2.11" } fs-err = { version = "2.11.0" } +similar = { version = "2.2.1" } napi = { version = "2.16.4", default-features = false, features = [ diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs new file mode 100644 index 000000000..9304e486f --- /dev/null +++ b/crates/util/src/diff_standardizer.rs @@ -0,0 +1,219 @@ +/* ================================================================================ + + getMulti. + +================================================================================ */ + +use anyhow::Result; +use similar::{ChangeTag, TextDiff}; + +/// Given a before and after for a file, edit the *after* to not include any spurious changes +pub fn standardize_rewrite(before: String, after: String) -> Result { + let mut differ = TextDiff::configure(); + differ.algorithm(similar::Algorithm::Myers); + let diff = differ.diff_lines(&before, &after); + let mut standardized_after = String::new(); + + for op in diff.ops() { + match op.tag() { + similar::DiffTag::Equal | similar::DiffTag::Insert => { + for line in diff.iter_changes(op) { + standardized_after.push_str(line.value()); + } + } + similar::DiffTag::Delete => { + // Simply skip deleted lines + } + similar::DiffTag::Replace => { + let mut before_cache = Option::None; + for line in diff.iter_changes(op) { + match line.tag() { + ChangeTag::Delete => { + before_cache = Some(line.value()); + } + ChangeTag::Insert => { + let value = line.value(); + if let Some(before) = before_cache { + if before.trim() == value.trim() { + // skip whitespace-only changes + standardized_after.push_str(before); + } else { + // Otherwise, include the line + standardized_after.push_str(value); + } + } else { + standardized_after.push_str(value); + } + before_cache = None; + } + ChangeTag::Equal => { + standardized_after.push_str(line.value()); + before_cache = None; + } + } + } + } + } + } + + Ok(standardized_after) +} + +#[cfg(test)] +mod tests { + use super::*; + use insta::assert_snapshot; + + #[test] + fn test_basic_rewrite() -> Result<()> { + let before = "Hello world\n".to_string(); + let after = "Hello Rust\n".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, "Hello Rust\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_whitespace_handling() -> Result<()> { + let before = "function test() {\n console.bob('test');\n}\n".to_string(); + let after = "function test() {\nconsole.log('test');\n}\n".to_string(); + let after_standard = "function test() {\nconsole.log('test');\n}\n".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, after_standard); + Ok(()) + } + + #[test] + fn test_empty_files() -> Result<()> { + let before = "".to_string(); + let after = "".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, ""); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_multiline_changes() -> Result<()> { + let before = "line1\nline2\n line3\n".to_string(); + let after1 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + let after2 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + let result = standardize_rewrite(before, after1)?; + assert_eq!(result, after2); + Ok(()) + } + + #[test] + fn test_no_changes() -> Result<()> { + let content = "unchanged content\n".to_string(); + let result = standardize_rewrite(content.clone(), content)?; + assert_eq!(result, "unchanged content\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_mixed_changes_in_large_file() -> Result<()> { + let before = r#" +/* ================================================================================ + + getMulti. + +================================================================================ */ +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + println!("Total: {}", total); +} + +fn third_function() { + let message = "Hello"; + println!("{}", message); +} +"# + .to_string(); + + let after = r#" +/* ================================================================================ + + getMulti. + +================================================================================ */ +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + println!("Total: {}", total); +} + +fn third_function() { + let thing = "Hello"; + debug!("{}", thing); +} +"# + .to_string(); + + let result = standardize_rewrite(before, after)?; + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_code_removal() -> Result<()> { + let before = r#"fn main() { + // First we do some setup + let mut total = 0; + + // Then we do a big calculation + for i in 0..100 { + if i % 2 == 0 { + total += i; + } else { + total -= i; + } + } + + // Finally print the result + println!("The total is: {}", total); +} +"# + .to_string(); + + let after = r#"fn main() { + let mut total = 0; + println!("The total is: {}", total); +} +"# + .to_string(); + + let result = standardize_rewrite(before, after.clone())?; + assert_eq!(result, after); + Ok(()) + } + + #[test] + fn test_remove_early_add_late() -> Result<()> { + let before = r#"fn main() { + let early = "remove me"; + let keep = "stay"; + let middle = "remove me too"; + let end = "keep me"; +}"# + .to_string(); + + let after = r#"fn main() { + let keep = "stay"; + let end = "keep me"; + let new = "add me"; +}"# + .to_string(); + + let result = standardize_rewrite(before, after.clone())?; + assert_eq!(result, after); + Ok(()) + } +} diff --git a/crates/util/src/lib.rs b/crates/util/src/lib.rs index 124ce80a8..1e49b0705 100644 --- a/crates/util/src/lib.rs +++ b/crates/util/src/lib.rs @@ -10,3 +10,7 @@ pub mod print_node; pub mod rich_path; pub mod runtime; pub mod url; + +mod diff_standardizer; + +pub use diff_standardizer::standardize_rewrite; diff --git a/crates/util/src/runtime.rs b/crates/util/src/runtime.rs index 111c49bbc..19353a6f0 100644 --- a/crates/util/src/runtime.rs +++ b/crates/util/src/runtime.rs @@ -56,7 +56,7 @@ pub struct ExecutionContext { pub ignore_limit_pattern: bool, } -#[cfg(not(feature = "network_requests_common"))] +#[cfg(not(any(test, feature = "network_requests_common")))] #[derive(Clone, Debug)] pub struct ExecutionContext { llm_api: Option, diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap new file mode 100644 index 000000000..c64f47f38 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +Hello Rust diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap new file mode 100644 index 000000000..dc39acec4 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- + diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap new file mode 100644 index 000000000..44ad23d49 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -0,0 +1,21 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +/* ================================================================================ + + getMulti. + +================================================================================ */ +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + println!("Total: {}", total); +} + +fn third_function() { + let thing = "Hello"; + debug!("{}", thing); +} diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap new file mode 100644 index 000000000..0b7e8fd55 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap @@ -0,0 +1,8 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +line1 +modified line2 +line3 +new line4 diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap new file mode 100644 index 000000000..c0e288f76 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +unchanged content diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap new file mode 100644 index 000000000..871ef5e34 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap @@ -0,0 +1,18 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +// this starts us off + +fn middle_function() { + // This stays the same + println!("Hello"); +} + +fn last_function() { + println!("New line added here"); + println!("And another one"); + println!("Original"); +} + +// This is other content diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap new file mode 100644 index 000000000..7e348ec60 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap @@ -0,0 +1,7 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +function test(){ +console.log('test'); +}