From d7a53b232dc5ef48509741e24dba40aa4dab5791 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:03:24 -0500 Subject: [PATCH 01/21] feat: add diff normalization logic --- crates/util/src/diff_standardizer.rs | 212 +++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 crates/util/src/diff_standardizer.rs diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs new file mode 100644 index 000000000..04db3ca37 --- /dev/null +++ b/crates/util/src/diff_standardizer.rs @@ -0,0 +1,212 @@ +use git2::DiffOptions; +use marzano_util::diff::{parse_modified_ranges, FileDiff}; +use tempfile::tempdir; +use anyhow::Result; + +pub fn parse_unified_diff(contents: String) -> Result> { + let parsed = parse_modified_ranges(&contents) + .map_err(|e| anyhow::anyhow!("{:?}", e))?; + Ok(parsed) +} + +/// Given a before and after for a file, edit the *after* to not include any spurious changes +pub fn standardize_rewrite(before: String, after: String) -> Result { + let mut diff_opts = DiffOptions::new(); + diff_opts.ignore_whitespace(true); + + // Create a temporary directory for the repository + let temp_dir = tempdir() + .map_err(|e| anyhow::anyhow!("Failed to create temp directory: {:?}", e))?; + + // Create blobs for diffing + let repo = git2::Repository::init_opts( + temp_dir.path(), + &git2::RepositoryInitOptions::new() + .bare(true) + .initial_head("main"), + ) + .map_err(|e| anyhow::anyhow!("Failed to create temp repo: {:?}", e))?; + + let left_oid = repo + .blob(before.as_bytes()) + .map_err(|e| anyhow::anyhow!("Failed to create left blob: {:?}", e))?; + let right_oid = repo + .blob(after.as_bytes()) + .map_err(|e| anyhow::anyhow!("Failed to create right blob: {:?}", e))?; + + let left_blob = repo + .find_blob(left_oid) + .map_err(|e| anyhow::anyhow!("Failed to find left blob: {:?}", e))?; + let right_blob = repo + .find_blob(right_oid) + .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; + + // Generate diff + let mut standardized = after.clone(); // Start with the full after content + let mut has_changes = false; + let diff = repo + .diff_blobs( + Some(&left_blob), + None, + Some(&right_blob), + None, + Some(&mut diff_opts), + None, + None, + None, + Some(&mut |delta, _hunk, line| { + has_changes = true; + match line.origin() { + '+' | ' ' => { + if let Ok(_) = std::str::from_utf8(line.content()) { + // Content will be taken from the after string + } + } + '-' => { + // Removed lines are ignored as we're using the after content + } + _ => {} + } + true + }), + ) + .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; + + // If no changes were detected in the diff, return the after content as is + if !has_changes { + standardized = after; + } + + // The temporary directory will be automatically cleaned up when temp_dir is dropped + Ok(standardized) +} + +#[cfg(test)] +mod tests { + use super::*; + use insta::assert_snapshot; + + #[test] + fn test_basic_rewrite() -> Result<()> { + let before = "Hello world\n".to_string(); + let after = "Hello Rust\n".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, "Hello Rust\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_whitespace_handling() -> Result<()> { + let before = "function test() {\n console.log('test');\n}\n".to_string(); + let after = "function test(){\nconsole.log('test');\n}\n".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, "function test(){\nconsole.log('test');\n}\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_empty_files() -> Result<()> { + let before = "".to_string(); + let after = "".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, ""); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_multiline_changes() -> Result<()> { + let before = "line1\nline2\nline3\n".to_string(); + let after = "line1\nmodified line2\nline3\nnew line4\n".to_string(); + let result = standardize_rewrite(before, after)?; + assert_eq!(result, "line1\nmodified line2\nline3\nnew line4\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_no_changes() -> Result<()> { + let content = "unchanged content\n".to_string(); + let result = standardize_rewrite(content.clone(), content)?; + assert_eq!(result, "unchanged content\n"); + assert_snapshot!(result); + Ok(()) + } + + #[test] + fn test_mixed_changes_in_large_file() -> Result<()> { + let before = r#" +// This is a large file with multiple sections + +fn first_function() { + // Some code here + let x = 42; + println!("Value: {}", x); +} + +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + + + // Extra whitespace above + println!("Total: {}", total); +} + +fn third_function() { + let message = "Hello"; + println!("{}", message); +} +"# + .to_string(); + + let after = r#" +// This is a large file with multiple sections + +fn first_function() { + // Some code here + let x = 42; + println!("Value: {}", x); +} + +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + // No extra whitespace + println!("Total: {}", total); +} + +fn third_function() { + let message = "Hello, World!"; // Changed this line + println!("{}", message); +} +"# + .to_string(); + + let result = standardize_rewrite(before, after)?; + + // The result should: + // 1. Keep first_function exactly the same + // 2. Ignore whitespace changes in second_function + // 3. Include the actual code change in third_function + assert!(result.contains("fn first_function()")); + assert!(result.contains("let x = 42")); + assert!(result.contains("fn second_function()")); + assert!(result.contains("println!(\"Total: {}\", total)")); + assert!(result.contains("let message = \"Hello, World!\"")); + + // Verify whitespace changes were ignored + assert!(!result.contains(" \n \n")); + + // Add snapshot test + assert_snapshot!(result); + + Ok(()) + } +} From 89d85b56591b0ee8c0e90d3f706028ea4b68bbcc Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:20:57 -0500 Subject: [PATCH 02/21] [skip ci] yolo --- Cargo.lock | 2 + crates/util/Cargo.toml | 7 +++ crates/util/src/diff_standardizer.rs | 56 +++++++++---------- crates/util/src/lib.rs | 4 ++ crates/util/src/runtime.rs | 2 +- ...ff_standardizer__tests__basic_rewrite.snap | 5 ++ ...diff_standardizer__tests__empty_files.snap | 5 ++ ...r__tests__mixed_changes_in_large_file.snap | 25 +++++++++ ...tandardizer__tests__multiline_changes.snap | 8 +++ ..._diff_standardizer__tests__no_changes.snap | 5 ++ ...ndardizer__tests__whitespace_handling.snap | 7 +++ 11 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap diff --git a/Cargo.lock b/Cargo.lock index d9de33f81..568ed054d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2407,6 +2407,7 @@ dependencies = [ "base64 0.21.7", "fs-err", "futures", + "git2", "grit-util", "http 0.2.12", "ignore", @@ -2418,6 +2419,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "tempfile", "tokio", "tree-sitter-facade-sg", ] diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index d5f400436..958787a2d 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -32,6 +32,13 @@ serde_json = { version = "1.0.114" } futures = { version = "0.3.29", optional = true } http = { version = "0.2.11" } fs-err = { version = "2.11.0" } +git2 = { version = "0.19.0", default-features = false, features = [ + "vendored-openssl", + "vendored-libgit2", + "ssh", + "https", +] } +tempfile = { version = "3.1" } napi = { version = "2.16.4", default-features = false, features = [ diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 04db3ca37..c0a6a7081 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -1,32 +1,11 @@ -use git2::DiffOptions; -use marzano_util::diff::{parse_modified_ranges, FileDiff}; -use tempfile::tempdir; use anyhow::Result; - -pub fn parse_unified_diff(contents: String) -> Result> { - let parsed = parse_modified_ranges(&contents) - .map_err(|e| anyhow::anyhow!("{:?}", e))?; - Ok(parsed) -} +use git2::{DiffOptions, Repository}; +use tempfile::tempdir; /// Given a before and after for a file, edit the *after* to not include any spurious changes -pub fn standardize_rewrite(before: String, after: String) -> Result { +pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { let mut diff_opts = DiffOptions::new(); diff_opts.ignore_whitespace(true); - - // Create a temporary directory for the repository - let temp_dir = tempdir() - .map_err(|e| anyhow::anyhow!("Failed to create temp directory: {:?}", e))?; - - // Create blobs for diffing - let repo = git2::Repository::init_opts( - temp_dir.path(), - &git2::RepositoryInitOptions::new() - .bare(true) - .initial_head("main"), - ) - .map_err(|e| anyhow::anyhow!("Failed to create temp repo: {:?}", e))?; - let left_oid = repo .blob(before.as_bytes()) .map_err(|e| anyhow::anyhow!("Failed to create left blob: {:?}", e))?; @@ -86,11 +65,23 @@ mod tests { use super::*; use insta::assert_snapshot; + fn setup_test_repo() -> Result<(Repository, tempfile::TempDir)> { + let temp_dir = tempdir()?; + let repo = Repository::init_opts( + temp_dir.path(), + &git2::RepositoryInitOptions::new() + .bare(true) + .initial_head("main"), + )?; + Ok((repo, temp_dir)) + } + #[test] fn test_basic_rewrite() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let before = "Hello world\n".to_string(); let after = "Hello Rust\n".to_string(); - let result = standardize_rewrite(before, after)?; + let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, "Hello Rust\n"); assert_snapshot!(result); Ok(()) @@ -98,9 +89,10 @@ mod tests { #[test] fn test_whitespace_handling() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let before = "function test() {\n console.log('test');\n}\n".to_string(); let after = "function test(){\nconsole.log('test');\n}\n".to_string(); - let result = standardize_rewrite(before, after)?; + let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, "function test(){\nconsole.log('test');\n}\n"); assert_snapshot!(result); Ok(()) @@ -108,9 +100,10 @@ mod tests { #[test] fn test_empty_files() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let before = "".to_string(); let after = "".to_string(); - let result = standardize_rewrite(before, after)?; + let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, ""); assert_snapshot!(result); Ok(()) @@ -118,9 +111,10 @@ mod tests { #[test] fn test_multiline_changes() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let before = "line1\nline2\nline3\n".to_string(); let after = "line1\nmodified line2\nline3\nnew line4\n".to_string(); - let result = standardize_rewrite(before, after)?; + let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, "line1\nmodified line2\nline3\nnew line4\n"); assert_snapshot!(result); Ok(()) @@ -128,8 +122,9 @@ mod tests { #[test] fn test_no_changes() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let content = "unchanged content\n".to_string(); - let result = standardize_rewrite(content.clone(), content)?; + let result = standardize_rewrite(&repo, content.clone(), content)?; assert_eq!(result, "unchanged content\n"); assert_snapshot!(result); Ok(()) @@ -137,6 +132,7 @@ mod tests { #[test] fn test_mixed_changes_in_large_file() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; let before = r#" // This is a large file with multiple sections @@ -189,7 +185,7 @@ fn third_function() { "# .to_string(); - let result = standardize_rewrite(before, after)?; + let result = standardize_rewrite(&repo, before, after)?; // The result should: // 1. Keep first_function exactly the same diff --git a/crates/util/src/lib.rs b/crates/util/src/lib.rs index 124ce80a8..1e49b0705 100644 --- a/crates/util/src/lib.rs +++ b/crates/util/src/lib.rs @@ -10,3 +10,7 @@ pub mod print_node; pub mod rich_path; pub mod runtime; pub mod url; + +mod diff_standardizer; + +pub use diff_standardizer::standardize_rewrite; diff --git a/crates/util/src/runtime.rs b/crates/util/src/runtime.rs index 111c49bbc..19353a6f0 100644 --- a/crates/util/src/runtime.rs +++ b/crates/util/src/runtime.rs @@ -56,7 +56,7 @@ pub struct ExecutionContext { pub ignore_limit_pattern: bool, } -#[cfg(not(feature = "network_requests_common"))] +#[cfg(not(any(test, feature = "network_requests_common")))] #[derive(Clone, Debug)] pub struct ExecutionContext { llm_api: Option, diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap new file mode 100644 index 000000000..c64f47f38 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__basic_rewrite.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +Hello Rust diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap new file mode 100644 index 000000000..dc39acec4 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__empty_files.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- + diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap new file mode 100644 index 000000000..6cd1eb639 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -0,0 +1,25 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +// This is a large file with multiple sections + +fn first_function() { + // Some code here + let x = 42; + println!("Value: {}", x); +} + +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + // No extra whitespace + println!("Total: {}", total); +} + +fn third_function() { + let message = "Hello, World!"; // Changed this line + println!("{}", message); +} diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap new file mode 100644 index 000000000..0b7e8fd55 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__multiline_changes.snap @@ -0,0 +1,8 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +line1 +modified line2 +line3 +new line4 diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap new file mode 100644 index 000000000..c0e288f76 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__no_changes.snap @@ -0,0 +1,5 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +unchanged content diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap new file mode 100644 index 000000000..7e348ec60 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__whitespace_handling.snap @@ -0,0 +1,7 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +function test(){ +console.log('test'); +} From 13174f1932b3ccbb8069138f34de96a7db0e45c4 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:23:44 -0500 Subject: [PATCH 03/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 51 ++++++++++------------------ 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index c0a6a7081..814eb0eb8 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -135,21 +135,11 @@ mod tests { let (repo, _temp) = setup_test_repo()?; let before = r#" // This is a large file with multiple sections - -fn first_function() { - // Some code here - let x = 42; - println!("Value: {}", x); -} - fn second_function() { let mut total = 0; for i in 0..10 { total += i; } - - - // Extra whitespace above println!("Total: {}", total); } @@ -162,46 +152,41 @@ fn third_function() { let after = r#" // This is a large file with multiple sections +fn second_function() { + let mut total = 0; + for i in 0..10 { + total += i; + } + println!("Total: {}", total); +} -fn first_function() { - // Some code here - let x = 42; - println!("Value: {}", x); +fn third_function() { + let thing = "Hello"; + debug!("{}", message); } +"# + .to_string(); + let after_standardized = r#" +// This is a large file with multiple sections fn second_function() { let mut total = 0; for i in 0..10 { total += i; } - // No extra whitespace println!("Total: {}", total); } fn third_function() { - let message = "Hello, World!"; // Changed this line - println!("{}", message); + let thing = "Hello"; + debug!("{}", message); } -"# + "# .to_string(); let result = standardize_rewrite(&repo, before, after)?; - // The result should: - // 1. Keep first_function exactly the same - // 2. Ignore whitespace changes in second_function - // 3. Include the actual code change in third_function - assert!(result.contains("fn first_function()")); - assert!(result.contains("let x = 42")); - assert!(result.contains("fn second_function()")); - assert!(result.contains("println!(\"Total: {}\", total)")); - assert!(result.contains("let message = \"Hello, World!\"")); - - // Verify whitespace changes were ignored - assert!(!result.contains(" \n \n")); - - // Add snapshot test - assert_snapshot!(result); + assert_eq!(result, after_standardized); Ok(()) } From 96a0463dc08e8288ebedc58536217ffb8d3d6748 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:28:40 -0500 Subject: [PATCH 04/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 28 ++++--------------- ...r__tests__mixed_changes_in_large_file.snap | 12 ++------ 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 814eb0eb8..1105d533d 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -1,6 +1,5 @@ use anyhow::Result; use git2::{DiffOptions, Repository}; -use tempfile::tempdir; /// Given a before and after for a file, edit the *after* to not include any spurious changes pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { @@ -66,10 +65,10 @@ mod tests { use insta::assert_snapshot; fn setup_test_repo() -> Result<(Repository, tempfile::TempDir)> { - let temp_dir = tempdir()?; + let temp_dir = tempfile::tempdir()?; let repo = Repository::init_opts( temp_dir.path(), - &git2::RepositoryInitOptions::new() + git2::RepositoryInitOptions::new() .bare(true) .initial_head("main"), )?; @@ -152,23 +151,6 @@ fn third_function() { let after = r#" // This is a large file with multiple sections -fn second_function() { - let mut total = 0; - for i in 0..10 { - total += i; - } - println!("Total: {}", total); -} - -fn third_function() { - let thing = "Hello"; - debug!("{}", message); -} -"# - .to_string(); - - let after_standardized = r#" -// This is a large file with multiple sections fn second_function() { let mut total = 0; for i in 0..10 { @@ -179,14 +161,14 @@ fn second_function() { fn third_function() { let thing = "Hello"; - debug!("{}", message); + debug!("{}", thing); } - "# +"# .to_string(); let result = standardize_rewrite(&repo, before, after)?; - assert_eq!(result, after_standardized); + assert_snapshot!(result); Ok(()) } diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 6cd1eb639..9a5637680 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -3,23 +3,15 @@ source: crates/util/src/diff_standardizer.rs expression: result --- // This is a large file with multiple sections - -fn first_function() { - // Some code here - let x = 42; - println!("Value: {}", x); -} - fn second_function() { let mut total = 0; for i in 0..10 { total += i; } - // No extra whitespace println!("Total: {}", total); } fn third_function() { - let message = "Hello, World!"; // Changed this line - println!("{}", message); + let thing = "Hello"; + debug!("{}", thing); } From 76098f79200261b3fc470f0c83520d19d58154c8 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:30:06 -0500 Subject: [PATCH 05/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 1105d533d..d5c004bf2 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -135,11 +135,11 @@ mod tests { let before = r#" // This is a large file with multiple sections fn second_function() { - let mut total = 0; + let mut total = 0; for i in 0..10 { total += i; } - println!("Total: {}", total); + println!("Total: {}", total); } fn third_function() { From 5d1d9b7c126689de38fb9cc7d60d1ffd806f9fcd Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:32:00 -0500 Subject: [PATCH 06/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 12 ++++++++++-- ...dardizer__tests__mixed_changes_in_large_file.snap | 6 +++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index d5c004bf2..c3854d8ef 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -133,7 +133,11 @@ mod tests { fn test_mixed_changes_in_large_file() -> Result<()> { let (repo, _temp) = setup_test_repo()?; let before = r#" -// This is a large file with multiple sections +/* ================================================================================ + + getMulti. + +================================================================================ */ fn second_function() { let mut total = 0; for i in 0..10 { @@ -150,7 +154,11 @@ fn third_function() { .to_string(); let after = r#" -// This is a large file with multiple sections +/* ================================================================================ + + getMulti. + +================================================================================ */ fn second_function() { let mut total = 0; for i in 0..10 { diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 9a5637680..28fd865fa 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -2,7 +2,11 @@ source: crates/util/src/diff_standardizer.rs expression: result --- -// This is a large file with multiple sections +/* ================================================================================ + + getMulti. + +================================================================================ */ fn second_function() { let mut total = 0; for i in 0..10 { From d8a6d600e832cd480adb7b6d4692db6316301ff9 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:32:31 -0500 Subject: [PATCH 07/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index c3854d8ef..2f0ee643d 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -135,7 +135,7 @@ mod tests { let before = r#" /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { From e14428c960d30811a28333502b9fd11b408cd606 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:44:13 -0500 Subject: [PATCH 08/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 60 ++++++++++++---------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 2f0ee643d..df5dedf41 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -5,6 +5,7 @@ use git2::{DiffOptions, Repository}; pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { let mut diff_opts = DiffOptions::new(); diff_opts.ignore_whitespace(true); + let left_oid = repo .blob(before.as_bytes()) .map_err(|e| anyhow::anyhow!("Failed to create left blob: {:?}", e))?; @@ -20,42 +21,33 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; // Generate diff - let mut standardized = after.clone(); // Start with the full after content - let mut has_changes = false; - let diff = repo - .diff_blobs( - Some(&left_blob), - None, - Some(&right_blob), - None, - Some(&mut diff_opts), - None, - None, - None, - Some(&mut |delta, _hunk, line| { - has_changes = true; + let mut standardized = String::new(); + + // There are changes, build up the new content from the diff + let mut diff_opts = DiffOptions::new(); + repo.diff_blobs( + Some(&left_blob), + None, + Some(&right_blob), + None, + Some(&mut diff_opts), + None, + None, + None, + Some(&mut |_delta, _hunk, line| { + if let Ok(content) = std::str::from_utf8(line.content()) { match line.origin() { - '+' | ' ' => { - if let Ok(_) = std::str::from_utf8(line.content()) { - // Content will be taken from the after string - } - } - '-' => { - // Removed lines are ignored as we're using the after content + ' ' | '+' => { + standardized.push_str(content); } _ => {} } - true - }), - ) - .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - - // If no changes were detected in the diff, return the after content as is - if !has_changes { - standardized = after; - } + } + true + }), + ) + .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - // The temporary directory will be automatically cleaned up when temp_dir is dropped Ok(standardized) } @@ -89,11 +81,11 @@ mod tests { #[test] fn test_whitespace_handling() -> Result<()> { let (repo, _temp) = setup_test_repo()?; - let before = "function test() {\n console.log('test');\n}\n".to_string(); + let before = "function test() {\n console.bob('test');\n}\n".to_string(); let after = "function test(){\nconsole.log('test');\n}\n".to_string(); + let after_standard = "function test() {\n console.log('test');\n}\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; - assert_eq!(result, "function test(){\nconsole.log('test');\n}\n"); - assert_snapshot!(result); + assert_eq!(result, after_standard); Ok(()) } From 670c7076da5e075adb58bbb92ecbfe240bc80a24 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 00:51:36 -0500 Subject: [PATCH 09/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 34 +++++++++++++++---- ...r__tests__mixed_changes_in_large_file.snap | 2 +- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index df5dedf41..d252eb38c 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -22,6 +22,8 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> // Generate diff let mut standardized = String::new(); + let mut added_lines = Vec::new(); + let mut current_line = 0; // There are changes, build up the new content from the diff let mut diff_opts = DiffOptions::new(); @@ -37,8 +39,17 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Some(&mut |_delta, _hunk, line| { if let Ok(content) = std::str::from_utf8(line.content()) { match line.origin() { - ' ' | '+' => { + ' ' => { standardized.push_str(content); + current_line += 1; + } + '+' => { + standardized.push_str(content); + added_lines.push(current_line); + current_line += 1; + } + '-' => { + // Skip removed lines but don't increment current_line } _ => {} } @@ -48,6 +59,17 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> ) .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; + // Add any remaining content from the right blob that wasn't encountered + if let Ok(remaining_content) = std::str::from_utf8(right_blob.content()) { + let remaining_lines: Vec<&str> = remaining_content.lines().collect(); + for (i, line) in remaining_lines.iter().enumerate() { + if !added_lines.contains(&i) { + standardized.push_str(line); + standardized.push('\n'); + } + } + } + Ok(standardized) } @@ -82,7 +104,7 @@ mod tests { fn test_whitespace_handling() -> Result<()> { let (repo, _temp) = setup_test_repo()?; let before = "function test() {\n console.bob('test');\n}\n".to_string(); - let after = "function test(){\nconsole.log('test');\n}\n".to_string(); + let after = "function test() {\nconsole.log('test');\n}\n".to_string(); let after_standard = "function test() {\n console.log('test');\n}\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, after_standard); @@ -103,11 +125,11 @@ mod tests { #[test] fn test_multiline_changes() -> Result<()> { let (repo, _temp) = setup_test_repo()?; - let before = "line1\nline2\nline3\n".to_string(); - let after = "line1\nmodified line2\nline3\nnew line4\n".to_string(); + let before = "line1\nline2\n line3\n".to_string(); + let after = "line1\nmodified line2\n\tline3\nnew line4\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; - assert_eq!(result, "line1\nmodified line2\nline3\nnew line4\n"); - assert_snapshot!(result); + let after = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + assert_eq!(result, after); Ok(()) } diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 28fd865fa..ec69fb155 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -4,7 +4,7 @@ expression: result --- /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { From 681067bcd55c8bf22fb9fa58139b09791b932b5c Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 01:04:18 -0500 Subject: [PATCH 10/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 59 ++++++------------- ...r__tests__mixed_changes_in_large_file.snap | 2 +- 2 files changed, 20 insertions(+), 41 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index d252eb38c..702d2b424 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -20,57 +20,36 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .find_blob(right_oid) .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; - // Generate diff - let mut standardized = String::new(); - let mut added_lines = Vec::new(); - let mut current_line = 0; - - // There are changes, build up the new content from the diff - let mut diff_opts = DiffOptions::new(); + // Run the diff to check if there are changes + let mut right_oid = Option::::None; repo.diff_blobs( Some(&left_blob), None, Some(&right_blob), None, Some(&mut diff_opts), + Some(&mut |delta, _progress| { + right_oid = Some(delta.new_file().id()); + true + }), None, None, None, - Some(&mut |_delta, _hunk, line| { - if let Ok(content) = std::str::from_utf8(line.content()) { - match line.origin() { - ' ' => { - standardized.push_str(content); - current_line += 1; - } - '+' => { - standardized.push_str(content); - added_lines.push(current_line); - current_line += 1; - } - '-' => { - // Skip removed lines but don't increment current_line - } - _ => {} - } - } - true - }), ) .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - // Add any remaining content from the right blob that wasn't encountered - if let Ok(remaining_content) = std::str::from_utf8(right_blob.content()) { - let remaining_lines: Vec<&str> = remaining_content.lines().collect(); - for (i, line) in remaining_lines.iter().enumerate() { - if !added_lines.contains(&i) { - standardized.push_str(line); - standardized.push('\n'); - } - } - } + let Some(right_oid) = right_oid else { + return Ok(before); + }; - Ok(standardized) + let right_blob = repo + .find_blob(right_oid) + .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; + + let content = std::str::from_utf8(right_blob.content()) + .map_err(|e| anyhow::anyhow!("Failed to convert blob content to UTF-8: {:?}", e))?; + + Ok(content.to_string()) } #[cfg(test)] @@ -105,7 +84,7 @@ mod tests { let (repo, _temp) = setup_test_repo()?; let before = "function test() {\n console.bob('test');\n}\n".to_string(); let after = "function test() {\nconsole.log('test');\n}\n".to_string(); - let after_standard = "function test() {\n console.log('test');\n}\n".to_string(); + let after_standard = "function test() {\nconsole.log('test');\n}\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, after_standard); Ok(()) @@ -128,7 +107,7 @@ mod tests { let before = "line1\nline2\n line3\n".to_string(); let after = "line1\nmodified line2\n\tline3\nnew line4\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; - let after = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + let after = "line1\nmodified line2\n\tline3\nnew line4\n".to_string(); assert_eq!(result, after); Ok(()) } diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index ec69fb155..03c7de17d 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -4,7 +4,7 @@ expression: result --- /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { From 313b36d94d11123656f6cde0191535c0789cd5ff Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 01:05:12 -0500 Subject: [PATCH 11/21] ok --- ...__diff_standardizer__tests__mixed_changes_in_large_file.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 03c7de17d..ec69fb155 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -4,7 +4,7 @@ expression: result --- /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { From d682e8a5df43fafa8f68814accc7596dcc49ce0a Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 01:09:32 -0500 Subject: [PATCH 12/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 702d2b424..b0ddc2a14 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -1,3 +1,9 @@ +/* ================================================================================ + + getMulti. + +================================================================================ */ + use anyhow::Result; use git2::{DiffOptions, Repository}; @@ -5,6 +11,8 @@ use git2::{DiffOptions, Repository}; pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { let mut diff_opts = DiffOptions::new(); diff_opts.ignore_whitespace(true); + diff_opts.indent_heuristic(true); + diff_opts.ignore_whitespace_change(true); let left_oid = repo .blob(before.as_bytes()) From 6dc278636524c0667485dd6fae976f80e8e4b4b3 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 01:22:57 -0500 Subject: [PATCH 13/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 47 +++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index b0ddc2a14..7eef6f9b9 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -1,17 +1,16 @@ /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ use anyhow::Result; -use git2::{DiffOptions, Repository}; +use git2::{Delta, DiffOptions, Repository}; /// Given a before and after for a file, edit the *after* to not include any spurious changes pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { let mut diff_opts = DiffOptions::new(); diff_opts.ignore_whitespace(true); - diff_opts.indent_heuristic(true); diff_opts.ignore_whitespace_change(true); let left_oid = repo @@ -41,8 +40,14 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> true }), None, - None, - None, + Some(&mut |delta, _progress| { + println!("delta: {:?}, progress: {:?}", delta, _progress); + true + }), + Some(&mut |delta, _hunk, line| { + println!("line: {:?}", line); + true + }), ) .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; @@ -181,4 +186,36 @@ fn third_function() { Ok(()) } + + #[test] + fn test_code_removal() -> Result<()> { + let (repo, _temp) = setup_test_repo()?; + let before = r#"fn main() { + // First we do some setup + let mut total = 0; + + // Then we do a big calculation + for i in 0..100 { + if i % 2 == 0 { + total += i; + } else { + total -= i; + } + } + + // Finally print the result + println!("The total is: {}", total); +}"# + .to_string(); + + let after = r#"fn main() { + let mut total = 0; + println!("The total is: {}", total); +}"# + .to_string(); + + let result = standardize_rewrite(&repo, before, after.clone())?; + assert_eq!(result, after); + Ok(()) + } } From 3ff3e1149fa98e0a51fdfa98bd814dd6187b12f4 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 02:14:55 -0500 Subject: [PATCH 14/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 118 ++++++++++++++---- ...r__tests__mixed_changes_in_large_file.snap | 8 +- ...ardizer__tests__remove_early_add_late.snap | 16 +++ 3 files changed, 112 insertions(+), 30 deletions(-) create mode 100644 crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 7eef6f9b9..b06c5c2b0 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -5,13 +5,14 @@ ================================================================================ */ use anyhow::Result; -use git2::{Delta, DiffOptions, Repository}; +use git2::{Delta, DiffLineType, DiffOptions, Repository}; /// Given a before and after for a file, edit the *after* to not include any spurious changes pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { let mut diff_opts = DiffOptions::new(); diff_opts.ignore_whitespace(true); diff_opts.ignore_whitespace_change(true); + diff_opts.context_lines(0); let left_oid = repo .blob(before.as_bytes()) @@ -27,42 +28,65 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .find_blob(right_oid) .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; - // Run the diff to check if there are changes - let mut right_oid = Option::::None; + let mut before_lines = before.lines().collect::>(); + let mut standardized_after = String::new(); + let mut before_line_num = 0 as usize; + repo.diff_blobs( Some(&left_blob), None, Some(&right_blob), None, Some(&mut diff_opts), - Some(&mut |delta, _progress| { - right_oid = Some(delta.new_file().id()); - true - }), + // Some(&mut |delta, _progress| { + // right_oid = Some(delta.new_file().id()); + // true + // }), None, - Some(&mut |delta, _progress| { - println!("delta: {:?}, progress: {:?}", delta, _progress); - true - }), - Some(&mut |delta, _hunk, line| { - println!("line: {:?}", line); + None, + // Some(&mut |delta, _progress| { + // println!("delta: {:?}, progress: {:?}", delta, _progress); + // true + // }), + None, + Some(&mut |_delta, hunk, line| { + let hunk = hunk.unwrap(); + // Grab the content between before this hunk and inject it + while before_line_num < hunk.old_start().try_into().unwrap() { + standardized_after.push_str(before_lines[before_line_num]); + standardized_after.push('\n'); + before_line_num += 1; + } + + match line.origin_value() { + DiffLineType::Deletion => { + // Deletion: advance the offset by the length of the removed content + before_line_num += 1; + } + DiffLineType::Addition => { + // println!( + // "INJECTING: {} onto {}", + // std::str::from_utf8(line.content()).unwrap(), + // standardized_after + // ); + // // Addition: inject the new contnent directly + // standardized_after.push_str(std::str::from_utf8(line.content()).unwrap()); + } + _ => {} + } true }), ) .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - let Some(right_oid) = right_oid else { - return Ok(before); - }; - - let right_blob = repo - .find_blob(right_oid) - .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; - - let content = std::str::from_utf8(right_blob.content()) - .map_err(|e| anyhow::anyhow!("Failed to convert blob content to UTF-8: {:?}", e))?; + // Finally, add the rest of the content + while before_line_num < before_lines.len() { + standardized_after.push_str(before_lines[before_line_num]); + standardized_after.push('\n'); + before_line_num += 1; + } - Ok(content.to_string()) + Ok(standardized_after) } #[cfg(test)] @@ -97,7 +121,7 @@ mod tests { let (repo, _temp) = setup_test_repo()?; let before = "function test() {\n console.bob('test');\n}\n".to_string(); let after = "function test() {\nconsole.log('test');\n}\n".to_string(); - let after_standard = "function test() {\nconsole.log('test');\n}\n".to_string(); + let after_standard = "function test() {console.log('test');\n}\n".to_string(); let result = standardize_rewrite(&repo, before, after)?; assert_eq!(result, after_standard); Ok(()) @@ -218,4 +242,48 @@ fn third_function() { assert_eq!(result, after); Ok(()) } + + #[test] + fn test_remove_early_add_late() -> Result<()> { + let (repo, _td) = setup_test_repo()?; + + let before = r#" +// this starts us off + +fn first_function() { + println!("This will be removed"); + println!("This will also be removed"); +} + +fn middle_function() { + // This stays the same + println!("Hello"); +} + +fn last_function() { + println!("Original"); +} + +// This is other content"#; + + let after = r#" +// this starts us off + +fn middle_function() { + // This stays the same + println!("Hello"); +} + +fn last_function() { + println!("Original"); + println!("New line added here"); + println!("And another one"); +} + +// This is other content"#; + + let result = standardize_rewrite(&repo, before.to_string(), after.to_string())?; + assert_snapshot!(result); + Ok(()) + } } diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index ec69fb155..c86c49170 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -8,14 +8,12 @@ expression: result ================================================================================ */ fn second_function() { - let mut total = 0; + let mut total = 0; for i in 0..10 { total += i; } - println!("Total: {}", total); + println!("Total: {}", total); } fn third_function() { - let thing = "Hello"; - debug!("{}", thing); -} + let message = "Hello"; diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap new file mode 100644 index 000000000..5bb490ba3 --- /dev/null +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap @@ -0,0 +1,16 @@ +--- +source: crates/util/src/diff_standardizer.rs +expression: result +--- +// this starts us off + +fn first_function() { + // This stays the same + println!("Hello"); +} + +fn last_function() { + println!("Original"); +} + +// This is other content From 676b1b31fdd6ec35d5e34fa12f48ae5d241f223c Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 02:27:20 -0500 Subject: [PATCH 15/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 25 +++++++++++-------- ...r__tests__mixed_changes_in_large_file.snap | 4 ++- ...ardizer__tests__remove_early_add_late.snap | 4 ++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index b06c5c2b0..da7cfc7dc 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -30,7 +30,7 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> let mut before_lines = before.lines().collect::>(); let mut standardized_after = String::new(); - let mut before_line_num = 0 as usize; + let mut before_line_num_zero = 0_usize; repo.diff_blobs( Some(&left_blob), @@ -50,18 +50,20 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> // }), None, Some(&mut |_delta, hunk, line| { + println!("line: {:?}", line); let hunk = hunk.unwrap(); // Grab the content between before this hunk and inject it - while before_line_num < hunk.old_start().try_into().unwrap() { - standardized_after.push_str(before_lines[before_line_num]); + while (before_line_num_zero + 1) < hunk.old_start().try_into().unwrap() { + println!("Pushing: {}", before_lines[before_line_num_zero]); + standardized_after.push_str(before_lines[before_line_num_zero]); standardized_after.push('\n'); - before_line_num += 1; + before_line_num_zero += 1; } match line.origin_value() { DiffLineType::Deletion => { // Deletion: advance the offset by the length of the removed content - before_line_num += 1; + before_line_num_zero += line.num_lines() as usize; } DiffLineType::Addition => { // println!( @@ -69,8 +71,8 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> // std::str::from_utf8(line.content()).unwrap(), // standardized_after // ); - // // Addition: inject the new contnent directly - // standardized_after.push_str(std::str::from_utf8(line.content()).unwrap()); + // // Addition: inject the new content directly + standardized_after.push_str(std::str::from_utf8(line.content()).unwrap()); } _ => {} } @@ -80,10 +82,10 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; // Finally, add the rest of the content - while before_line_num < before_lines.len() { - standardized_after.push_str(before_lines[before_line_num]); + while before_line_num_zero < before_lines.len() { + standardized_after.push_str(before_lines[before_line_num_zero]); standardized_after.push('\n'); - before_line_num += 1; + before_line_num_zero += 1; } Ok(standardized_after) @@ -235,7 +237,8 @@ fn third_function() { let after = r#"fn main() { let mut total = 0; println!("The total is: {}", total); -}"# +} + "# .to_string(); let result = standardize_rewrite(&repo, before, after.clone())?; diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index c86c49170..44ad23d49 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -16,4 +16,6 @@ fn second_function() { } fn third_function() { - let message = "Hello"; + let thing = "Hello"; + debug!("{}", thing); +} diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap index 5bb490ba3..871ef5e34 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__remove_early_add_late.snap @@ -4,12 +4,14 @@ expression: result --- // this starts us off -fn first_function() { +fn middle_function() { // This stays the same println!("Hello"); } fn last_function() { + println!("New line added here"); + println!("And another one"); println!("Original"); } From ed5bc9dcdeeb4b1c51a8f0b824f91657322e7cc4 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 02:52:13 -0500 Subject: [PATCH 16/21] [skip ci] yolo --- crates/util/src/diff_standardizer.rs | 47 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index da7cfc7dc..f4829fb6b 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -30,7 +30,7 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> let mut before_lines = before.lines().collect::>(); let mut standardized_after = String::new(); - let mut before_line_num_zero = 0_usize; + let mut before_line_num = 1_usize; repo.diff_blobs( Some(&left_blob), @@ -50,20 +50,29 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> // }), None, Some(&mut |_delta, hunk, line| { - println!("line: {:?}", line); let hunk = hunk.unwrap(); + println!( + "offset: {}, hunk: {}, line: {:?}", + before_line_num, + hunk.old_start(), + line, + ); // Grab the content between before this hunk and inject it - while (before_line_num_zero + 1) < hunk.old_start().try_into().unwrap() { - println!("Pushing: {}", before_lines[before_line_num_zero]); - standardized_after.push_str(before_lines[before_line_num_zero]); + while before_line_num < hunk.old_start().try_into().unwrap() { + let this_line = before_lines[before_line_num - 1]; + println!("Pushing: {}", this_line); + standardized_after.push_str(this_line); standardized_after.push('\n'); - before_line_num_zero += 1; + before_line_num += 1; } + // Now advance the cursor by the number of old lines we covered + // before_line_num += (hunk.old_lines() as usize); + match line.origin_value() { DiffLineType::Deletion => { - // Deletion: advance the offset by the length of the removed content - before_line_num_zero += line.num_lines() as usize; + // Deletion: we don't actually need to do anything with deleted content + before_line_num += line.num_lines() as usize; } DiffLineType::Addition => { // println!( @@ -74,7 +83,13 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> // // Addition: inject the new content directly standardized_after.push_str(std::str::from_utf8(line.content()).unwrap()); } - _ => {} + // DiffLineType::Context => { + // // Context: do nothing + // println!("Context: {}", std::str::from_utf8(line.content()).unwrap()); + // } + _ => { + println!("fuck you!: {}", line.num_lines()); + } } true }), @@ -82,10 +97,10 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; // Finally, add the rest of the content - while before_line_num_zero < before_lines.len() { - standardized_after.push_str(before_lines[before_line_num_zero]); + while before_line_num < before_lines.len() { + standardized_after.push_str(before_lines[before_line_num - 1]); standardized_after.push('\n'); - before_line_num_zero += 1; + before_line_num += 1; } Ok(standardized_after) @@ -144,10 +159,10 @@ mod tests { fn test_multiline_changes() -> Result<()> { let (repo, _temp) = setup_test_repo()?; let before = "line1\nline2\n line3\n".to_string(); - let after = "line1\nmodified line2\n\tline3\nnew line4\n".to_string(); - let result = standardize_rewrite(&repo, before, after)?; - let after = "line1\nmodified line2\n\tline3\nnew line4\n".to_string(); - assert_eq!(result, after); + let after1 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + let after2 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); + let result = standardize_rewrite(&repo, before, after1)?; + assert_eq!(result, after2); Ok(()) } From d545b698d4e0488687c92ed7cf4d2536964e9aa9 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 03:17:18 -0500 Subject: [PATCH 17/21] checkpoint where I give up on git, I hate libgit2 --- crates/util/src/diff_standardizer.rs | 104 +++++++++++++++++---------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index f4829fb6b..1dbf162eb 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -28,9 +28,10 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> .find_blob(right_oid) .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; - let mut before_lines = before.lines().collect::>(); + let before_lines = before.lines().collect::>(); let mut standardized_after = String::new(); let mut before_line_num = 1_usize; + let mut pending_additions: Vec<(usize, String)> = Vec::new(); repo.diff_blobs( Some(&left_blob), @@ -38,69 +39,100 @@ pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Some(&right_blob), None, Some(&mut diff_opts), - // Some(&mut |delta, _progress| { - // right_oid = Some(delta.new_file().id()); - // true - // }), None, None, - // Some(&mut |delta, _progress| { - // println!("delta: {:?}, progress: {:?}", delta, _progress); - // true - // }), None, Some(&mut |_delta, hunk, line| { let hunk = hunk.unwrap(); + println!("\n=== Processing Hunk ==="); println!( - "offset: {}, hunk: {}, line: {:?}", + "Current before_line_num: {}, Hunk old_start: {}, old_lines: {}, new_lines: {}", before_line_num, hunk.old_start(), - line, + hunk.old_lines(), + hunk.new_lines() ); - // Grab the content between before this hunk and inject it - while before_line_num < hunk.old_start().try_into().unwrap() { + println!( + "Hunk header: {}", + std::str::from_utf8(hunk.header()).unwrap() + ); + + let hunk_start: usize = hunk.old_start().try_into().unwrap(); + + // Copy any unchanged lines up to this hunk + while before_line_num < hunk_start { let this_line = before_lines[before_line_num - 1]; - println!("Pushing: {}", this_line); + println!( + "Injecting unchanged line {}: '{}'", + before_line_num, this_line + ); standardized_after.push_str(this_line); standardized_after.push('\n'); before_line_num += 1; } - // Now advance the cursor by the number of old lines we covered - // before_line_num += (hunk.old_lines() as usize); - match line.origin_value() { DiffLineType::Deletion => { - // Deletion: we don't actually need to do anything with deleted content - before_line_num += line.num_lines() as usize; + println!( + "Deletion at line {}: '{}'", + before_line_num, + before_lines[before_line_num - 1] + ); + before_line_num += 1; } DiffLineType::Addition => { - // println!( - // "INJECTING: {} onto {}", - // std::str::from_utf8(line.content()).unwrap(), - // standardized_after - // ); - // // Addition: inject the new content directly - standardized_after.push_str(std::str::from_utf8(line.content()).unwrap()); + let new_content = std::str::from_utf8(line.content()).unwrap().to_string(); + let target_position = hunk.new_start() as usize; + println!( + "Queueing addition at position {}: '{}'", + target_position, new_content + ); + // Insert into pending_additions in sorted order by position + let insert_pos = + pending_additions.partition_point(|(pos, _)| *pos <= target_position); + pending_additions.insert(insert_pos, (target_position, new_content)); } - // DiffLineType::Context => { - // // Context: do nothing - // println!("Context: {}", std::str::from_utf8(line.content()).unwrap()); - // } _ => { - println!("fuck you!: {}", line.num_lines()); + println!( + "Other line type: {:?}, num_lines: {}", + line.origin_value(), + line.num_lines() + ); } } + + // Process any pending additions that should come before the next hunk + let mut current_line = before_line_num; + while let Some((pos, content)) = pending_additions.first() { + if *pos <= current_line { + standardized_after.push_str(&content); + pending_additions.remove(0); + } else { + break; + } + } + true }), ) .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - // Finally, add the rest of the content - while before_line_num < before_lines.len() { - standardized_after.push_str(before_lines[before_line_num - 1]); - standardized_after.push('\n'); - before_line_num += 1; + // Add any remaining content and pending additions + while before_line_num <= before_lines.len() || !pending_additions.is_empty() { + let next_pending = pending_additions.first().map(|(pos, _)| *pos); + let should_add_original = match next_pending { + Some(pos) => before_line_num < pos, + None => true, + }; + + if should_add_original && before_line_num <= before_lines.len() { + standardized_after.push_str(before_lines[before_line_num - 1]); + standardized_after.push('\n'); + before_line_num += 1; + } else if let Some((_, content)) = pending_additions.first() { + standardized_after.push_str(content); + pending_additions.remove(0); + } } Ok(standardized_after) From 7146df5bad085d5971ac5c7b68d6d46b351211d3 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 03:27:08 -0500 Subject: [PATCH 18/21] [skip ci] yolo --- Cargo.lock | 1 + crates/util/Cargo.toml | 1 + crates/util/src/diff_standardizer.rs | 218 +++--------------- ...r__tests__mixed_changes_in_large_file.snap | 6 +- 4 files changed, 36 insertions(+), 190 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 568ed054d..ea1bef644 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2419,6 +2419,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "similar", "tempfile", "tokio", "tree-sitter-facade-sg", diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index 958787a2d..98cc279d2 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -39,6 +39,7 @@ git2 = { version = "0.19.0", default-features = false, features = [ "https", ] } tempfile = { version = "3.1" } +similar = { version = "2.2.1" } napi = { version = "2.16.4", default-features = false, features = [ diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index 1dbf162eb..a15a303a9 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -5,133 +5,19 @@ ================================================================================ */ use anyhow::Result; -use git2::{Delta, DiffLineType, DiffOptions, Repository}; +use similar::{ChangeTag, TextDiff}; /// Given a before and after for a file, edit the *after* to not include any spurious changes -pub fn standardize_rewrite(repo: &Repository, before: String, after: String) -> Result { - let mut diff_opts = DiffOptions::new(); - diff_opts.ignore_whitespace(true); - diff_opts.ignore_whitespace_change(true); - diff_opts.context_lines(0); - - let left_oid = repo - .blob(before.as_bytes()) - .map_err(|e| anyhow::anyhow!("Failed to create left blob: {:?}", e))?; - let right_oid = repo - .blob(after.as_bytes()) - .map_err(|e| anyhow::anyhow!("Failed to create right blob: {:?}", e))?; - - let left_blob = repo - .find_blob(left_oid) - .map_err(|e| anyhow::anyhow!("Failed to find left blob: {:?}", e))?; - let right_blob = repo - .find_blob(right_oid) - .map_err(|e| anyhow::anyhow!("Failed to find right blob: {:?}", e))?; - - let before_lines = before.lines().collect::>(); +pub fn standardize_rewrite(before: String, after: String) -> Result { + let diff = TextDiff::from_lines(&before, &after); let mut standardized_after = String::new(); - let mut before_line_num = 1_usize; - let mut pending_additions: Vec<(usize, String)> = Vec::new(); - - repo.diff_blobs( - Some(&left_blob), - None, - Some(&right_blob), - None, - Some(&mut diff_opts), - None, - None, - None, - Some(&mut |_delta, hunk, line| { - let hunk = hunk.unwrap(); - println!("\n=== Processing Hunk ==="); - println!( - "Current before_line_num: {}, Hunk old_start: {}, old_lines: {}, new_lines: {}", - before_line_num, - hunk.old_start(), - hunk.old_lines(), - hunk.new_lines() - ); - println!( - "Hunk header: {}", - std::str::from_utf8(hunk.header()).unwrap() - ); - - let hunk_start: usize = hunk.old_start().try_into().unwrap(); - // Copy any unchanged lines up to this hunk - while before_line_num < hunk_start { - let this_line = before_lines[before_line_num - 1]; - println!( - "Injecting unchanged line {}: '{}'", - before_line_num, this_line - ); - standardized_after.push_str(this_line); - standardized_after.push('\n'); - before_line_num += 1; + for change in diff.iter_all_changes() { + match change.tag() { + ChangeTag::Delete => {} // Skip deleted lines + ChangeTag::Equal | ChangeTag::Insert => { + standardized_after.push_str(change.value()); } - - match line.origin_value() { - DiffLineType::Deletion => { - println!( - "Deletion at line {}: '{}'", - before_line_num, - before_lines[before_line_num - 1] - ); - before_line_num += 1; - } - DiffLineType::Addition => { - let new_content = std::str::from_utf8(line.content()).unwrap().to_string(); - let target_position = hunk.new_start() as usize; - println!( - "Queueing addition at position {}: '{}'", - target_position, new_content - ); - // Insert into pending_additions in sorted order by position - let insert_pos = - pending_additions.partition_point(|(pos, _)| *pos <= target_position); - pending_additions.insert(insert_pos, (target_position, new_content)); - } - _ => { - println!( - "Other line type: {:?}, num_lines: {}", - line.origin_value(), - line.num_lines() - ); - } - } - - // Process any pending additions that should come before the next hunk - let mut current_line = before_line_num; - while let Some((pos, content)) = pending_additions.first() { - if *pos <= current_line { - standardized_after.push_str(&content); - pending_additions.remove(0); - } else { - break; - } - } - - true - }), - ) - .map_err(|e| anyhow::anyhow!("Failed to generate diff: {:?}", e))?; - - // Add any remaining content and pending additions - while before_line_num <= before_lines.len() || !pending_additions.is_empty() { - let next_pending = pending_additions.first().map(|(pos, _)| *pos); - let should_add_original = match next_pending { - Some(pos) => before_line_num < pos, - None => true, - }; - - if should_add_original && before_line_num <= before_lines.len() { - standardized_after.push_str(before_lines[before_line_num - 1]); - standardized_after.push('\n'); - before_line_num += 1; - } else if let Some((_, content)) = pending_additions.first() { - standardized_after.push_str(content); - pending_additions.remove(0); } } @@ -143,23 +29,11 @@ mod tests { use super::*; use insta::assert_snapshot; - fn setup_test_repo() -> Result<(Repository, tempfile::TempDir)> { - let temp_dir = tempfile::tempdir()?; - let repo = Repository::init_opts( - temp_dir.path(), - git2::RepositoryInitOptions::new() - .bare(true) - .initial_head("main"), - )?; - Ok((repo, temp_dir)) - } - #[test] fn test_basic_rewrite() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = "Hello world\n".to_string(); let after = "Hello Rust\n".to_string(); - let result = standardize_rewrite(&repo, before, after)?; + let result = standardize_rewrite(before, after)?; assert_eq!(result, "Hello Rust\n"); assert_snapshot!(result); Ok(()) @@ -167,21 +41,19 @@ mod tests { #[test] fn test_whitespace_handling() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = "function test() {\n console.bob('test');\n}\n".to_string(); let after = "function test() {\nconsole.log('test');\n}\n".to_string(); - let after_standard = "function test() {console.log('test');\n}\n".to_string(); - let result = standardize_rewrite(&repo, before, after)?; + let after_standard = "function test() {\nconsole.log('test');\n}\n".to_string(); + let result = standardize_rewrite(before, after)?; assert_eq!(result, after_standard); Ok(()) } #[test] fn test_empty_files() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = "".to_string(); let after = "".to_string(); - let result = standardize_rewrite(&repo, before, after)?; + let result = standardize_rewrite(before, after)?; assert_eq!(result, ""); assert_snapshot!(result); Ok(()) @@ -189,20 +61,18 @@ mod tests { #[test] fn test_multiline_changes() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = "line1\nline2\n line3\n".to_string(); let after1 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); let after2 = "line1\nmodified line2\n line3\nnew line4\n".to_string(); - let result = standardize_rewrite(&repo, before, after1)?; + let result = standardize_rewrite(before, after1)?; assert_eq!(result, after2); Ok(()) } #[test] fn test_no_changes() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let content = "unchanged content\n".to_string(); - let result = standardize_rewrite(&repo, content.clone(), content)?; + let result = standardize_rewrite(content.clone(), content)?; assert_eq!(result, "unchanged content\n"); assert_snapshot!(result); Ok(()) @@ -210,7 +80,6 @@ mod tests { #[test] fn test_mixed_changes_in_large_file() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = r#" /* ================================================================================ @@ -253,16 +122,13 @@ fn third_function() { "# .to_string(); - let result = standardize_rewrite(&repo, before, after)?; - + let result = standardize_rewrite(before, after)?; assert_snapshot!(result); - Ok(()) } #[test] fn test_code_removal() -> Result<()> { - let (repo, _temp) = setup_test_repo()?; let before = r#"fn main() { // First we do some setup let mut total = 0; @@ -288,52 +154,30 @@ fn third_function() { "# .to_string(); - let result = standardize_rewrite(&repo, before, after.clone())?; + let result = standardize_rewrite(before, after.clone())?; assert_eq!(result, after); Ok(()) } #[test] fn test_remove_early_add_late() -> Result<()> { - let (repo, _td) = setup_test_repo()?; - - let before = r#" -// this starts us off - -fn first_function() { - println!("This will be removed"); - println!("This will also be removed"); -} - -fn middle_function() { - // This stays the same - println!("Hello"); -} - -fn last_function() { - println!("Original"); -} - -// This is other content"#; - - let after = r#" -// this starts us off - -fn middle_function() { - // This stays the same - println!("Hello"); -} - -fn last_function() { - println!("Original"); - println!("New line added here"); - println!("And another one"); -} + let before = r#"fn main() { + let early = "remove me"; + let keep = "stay"; + let middle = "remove me too"; + let end = "keep me"; +}"# + .to_string(); -// This is other content"#; + let after = r#"fn main() { + let keep = "stay"; + let end = "keep me"; + let new = "add me"; +}"# + .to_string(); - let result = standardize_rewrite(&repo, before.to_string(), after.to_string())?; - assert_snapshot!(result); + let result = standardize_rewrite(before, after.clone())?; + assert_eq!(result, after); Ok(()) } } diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 44ad23d49..03c7de17d 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -4,15 +4,15 @@ expression: result --- /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { - let mut total = 0; + let mut total = 0; for i in 0..10 { total += i; } - println!("Total: {}", total); + println!("Total: {}", total); } fn third_function() { From ecc8112b3b4e0a2da532d0f78b1ed335d898c0ef Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 03:27:59 -0500 Subject: [PATCH 19/21] repair one test --- ...ff_standardizer__tests__mixed_changes_in_large_file.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap index 03c7de17d..44ad23d49 100644 --- a/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap +++ b/crates/util/src/snapshots/marzano_util__diff_standardizer__tests__mixed_changes_in_large_file.snap @@ -4,15 +4,15 @@ expression: result --- /* ================================================================================ - getMulti. + getMulti. ================================================================================ */ fn second_function() { - let mut total = 0; + let mut total = 0; for i in 0..10 { total += i; } - println!("Total: {}", total); + println!("Total: {}", total); } fn third_function() { From 684ae32b2717295ebf289e1fe2c72bf3111a1863 Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 03:59:29 -0500 Subject: [PATCH 20/21] run the tests --- crates/util/src/diff_standardizer.rs | 52 +++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/crates/util/src/diff_standardizer.rs b/crates/util/src/diff_standardizer.rs index a15a303a9..9304e486f 100644 --- a/crates/util/src/diff_standardizer.rs +++ b/crates/util/src/diff_standardizer.rs @@ -9,14 +9,49 @@ use similar::{ChangeTag, TextDiff}; /// Given a before and after for a file, edit the *after* to not include any spurious changes pub fn standardize_rewrite(before: String, after: String) -> Result { - let diff = TextDiff::from_lines(&before, &after); + let mut differ = TextDiff::configure(); + differ.algorithm(similar::Algorithm::Myers); + let diff = differ.diff_lines(&before, &after); let mut standardized_after = String::new(); - for change in diff.iter_all_changes() { - match change.tag() { - ChangeTag::Delete => {} // Skip deleted lines - ChangeTag::Equal | ChangeTag::Insert => { - standardized_after.push_str(change.value()); + for op in diff.ops() { + match op.tag() { + similar::DiffTag::Equal | similar::DiffTag::Insert => { + for line in diff.iter_changes(op) { + standardized_after.push_str(line.value()); + } + } + similar::DiffTag::Delete => { + // Simply skip deleted lines + } + similar::DiffTag::Replace => { + let mut before_cache = Option::None; + for line in diff.iter_changes(op) { + match line.tag() { + ChangeTag::Delete => { + before_cache = Some(line.value()); + } + ChangeTag::Insert => { + let value = line.value(); + if let Some(before) = before_cache { + if before.trim() == value.trim() { + // skip whitespace-only changes + standardized_after.push_str(before); + } else { + // Otherwise, include the line + standardized_after.push_str(value); + } + } else { + standardized_after.push_str(value); + } + before_cache = None; + } + ChangeTag::Equal => { + standardized_after.push_str(line.value()); + before_cache = None; + } + } + } } } } @@ -144,14 +179,15 @@ fn third_function() { // Finally print the result println!("The total is: {}", total); -}"# +} +"# .to_string(); let after = r#"fn main() { let mut total = 0; println!("The total is: {}", total); } - "# +"# .to_string(); let result = standardize_rewrite(before, after.clone())?; From 1bebf5018bb2ddc9fe983644317c6b2124fa8b0c Mon Sep 17 00:00:00 2001 From: Morgante Pell Date: Sun, 17 Nov 2024 04:01:55 -0500 Subject: [PATCH 21/21] run the tests --- Cargo.lock | 2 -- crates/util/Cargo.toml | 7 ------- 2 files changed, 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea1bef644..cb8bb9264 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2407,7 +2407,6 @@ dependencies = [ "base64 0.21.7", "fs-err", "futures", - "git2", "grit-util", "http 0.2.12", "ignore", @@ -2420,7 +2419,6 @@ dependencies = [ "serde_json", "sha2", "similar", - "tempfile", "tokio", "tree-sitter-facade-sg", ] diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index 98cc279d2..823aeb3a0 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -32,13 +32,6 @@ serde_json = { version = "1.0.114" } futures = { version = "0.3.29", optional = true } http = { version = "0.2.11" } fs-err = { version = "2.11.0" } -git2 = { version = "0.19.0", default-features = false, features = [ - "vendored-openssl", - "vendored-libgit2", - "ssh", - "https", -] } -tempfile = { version = "3.1" } similar = { version = "2.2.1" }