From 2f684987df3444e6c381a7d974bccaca32bd96ba Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Wed, 3 Apr 2024 02:42:36 +0200 Subject: [PATCH 1/2] fix: child slice selectors only selecting first matching index - Fixed a bug where the compiler would erronously mark states with a single slice transition as unitary, even though such transitions could match more than one index. --- crates/rsonpath-lib/src/automaton.rs | 9 +++ .../rsonpath-lib/src/automaton/minimizer.rs | 64 +++++++++++++++++-- .../rsonpath-test/documents/toml/lists.toml | 28 ++++++++ 3 files changed, 97 insertions(+), 4 deletions(-) diff --git a/crates/rsonpath-lib/src/automaton.rs b/crates/rsonpath-lib/src/automaton.rs index 38ba9b5b..74c639dc 100644 --- a/crates/rsonpath-lib/src/automaton.rs +++ b/crates/rsonpath-lib/src/automaton.rs @@ -122,6 +122,15 @@ impl ArrayTransitionLabel { Self::Slice(s) => s.contains(index), } } + + fn matches_at_most_once(&self) -> bool { + match self { + Self::Index(_) => true, + Self::Slice(slice) => { + slice.step == JsonUInt::ZERO && slice.end.map_or(false, |end| slice.start.as_u64() + 1 >= end.as_u64()) + } + } + } } impl From for ArrayTransitionLabel { diff --git a/crates/rsonpath-lib/src/automaton/minimizer.rs b/crates/rsonpath-lib/src/automaton/minimizer.rs index ace0d8fc..37247fae 100644 --- a/crates/rsonpath-lib/src/automaton/minimizer.rs +++ b/crates/rsonpath-lib/src/automaton/minimizer.rs @@ -188,10 +188,7 @@ impl<'q> Minimizer<'q> { debug!("{id} is rejecting"); attrs = attrs.rejecting(); } - if array_transitions.len() + member_transitions.len() == 1 && fallback == Self::rejecting_state() { - debug!("{id} is unitary"); - attrs = attrs.unitary(); - } + if self.accepting.contains(fallback.0) || array_transitions .iter() @@ -213,6 +210,23 @@ impl<'q> Minimizer<'q> { attrs = attrs.has_array_transition_to_accepting(); } + // Unitarity check: + // 1. Fallback rejects. + // 2. Only one transition that can match at most one element in a JSON, either: + // a) member transition; or + // b) array transition that matches only one index. + let is_unitary = { + fallback == Self::rejecting_state() + && ((member_transitions.len() == 1 && array_transitions.is_empty()) + || (array_transitions.len() == 1 + && member_transitions.is_empty() + && array_transitions[0].label.matches_at_most_once())) + }; + if is_unitary { + debug!("{id} is unitary"); + attrs = attrs.unitary(); + } + attrs.into() } @@ -1260,6 +1274,48 @@ mod tests { assert_eq!(result, expected); } + #[test] + fn direct_slice() { + // Query = $[1:3] + let label = SimpleSlice::new(1.into(), Some(3.into()), 1.into()); + + let nfa = NondeterministicAutomaton { + ordered_states: vec![ + NfaState::Direct(nfa::Transition::Array(label.into())), + NfaState::Accepting, + ], + }; + + let mut result = minimize(nfa).unwrap(); + make_canonical(&mut result); + let expected = Automaton { + states: vec![ + StateTable { + array_transitions: smallvec![], + member_transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::REJECTING, + }, + StateTable { + array_transitions: smallvec![ArrayTransition::new(ArrayTransitionLabel::Slice(label), State(2)),], + member_transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING + | StateAttributes::HAS_ARRAY_TRANSITION + | StateAttributes::HAS_ARRAY_TRANSITION_TO_ACCEPTING, + }, + StateTable { + array_transitions: smallvec![], + member_transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::ACCEPTING, + }, + ], + }; + + assert_eq!(result, expected); + } + /// DFA creation is unstable - it can result in many different isomorphic automaton structures. /// This function relabels the states in a canonical way so that they can be compared for equality. fn make_canonical(dfa: &mut Automaton) { diff --git a/crates/rsonpath-test/documents/toml/lists.toml b/crates/rsonpath-test/documents/toml/lists.toml index d04f76f9..2d465fdb 100644 --- a/crates/rsonpath-test/documents/toml/lists.toml +++ b/crates/rsonpath-test/documents/toml/lists.toml @@ -377,3 +377,31 @@ nodes = ['''[ 0 ] ]'''] + +[[queries]] +query = "$[1:3]" +description = "select the second and third elements" + +[queries.results] +count = 2 +spans = [[31, 33], [39, 278]] +nodes = [ + '[]', + '''[ + [], + [ + [ + [], + 0 + ], + [ + [], + 0 + ], + [ + [], + 0 + ] + ] + ]''' +] \ No newline at end of file From d683e83d4900eb0d2108c10455c863a27c2ace90 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Wed, 3 Apr 2024 03:09:05 +0200 Subject: [PATCH 2/2] chore(ci): fix cache and timeout related CI issues --- .github/workflows/book.yml | 2 -- .github/workflows/clusterfuzzlite-cron.yml | 4 ++-- .github/workflows/rust.yml | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index 14bd89c1..5ffeb42b 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -65,7 +65,6 @@ jobs: target/ key: book-cargo-${{ hashFiles('**/Cargo.toml') }} - name: cargo install mdbook - if: steps.cache-restore-cargo.outputs.cache-hit != 'true' uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1 with: # Name of the crate to install @@ -73,7 +72,6 @@ jobs: env: CARGO_TARGET_DIR: target/ - name: cargo install mdbook-katex - if: steps.cache-restore-cargo.outputs.cache-hit != 'true' uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1 with: # Name of the crate to install diff --git a/.github/workflows/clusterfuzzlite-cron.yml b/.github/workflows/clusterfuzzlite-cron.yml index 86e5f907..b980598f 100644 --- a/.github/workflows/clusterfuzzlite-cron.yml +++ b/.github/workflows/clusterfuzzlite-cron.yml @@ -38,7 +38,7 @@ jobs: uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 600 + fuzz-seconds: 1200 mode: 'prune' output-sarif: true storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git @@ -64,7 +64,7 @@ jobs: uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 600 + fuzz-seconds: 1200 mode: 'coverage' sanitizer: 'coverage' storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 277434ea..c57201d1 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -138,7 +138,6 @@ jobs: target/ key: ${{ matrix.toolchain }}-${{ matrix.target_triple }}-cargo-${{ hashFiles('**/Cargo.toml') }} - name: cargo install cargo-hack - if: steps.cache-restore-cargo.outputs.cache-hit != 'true' uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1 with: # Name of the crate to install