Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: child slice selectors only selecting first matching index #499

Merged
merged 2 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/book.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,13 @@ jobs:
target/
key: book-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: cargo install mdbook
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
crate: mdbook
env:
CARGO_TARGET_DIR: target/
- name: cargo install mdbook-katex
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/clusterfuzzlite-cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600
fuzz-seconds: 1200
mode: 'prune'
output-sarif: true
storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git
Expand All @@ -64,7 +64,7 @@ jobs:
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600
fuzz-seconds: 1200
mode: 'coverage'
sanitizer: 'coverage'
storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ jobs:
target/
key: ${{ matrix.toolchain }}-${{ matrix.target_triple }}-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: cargo install cargo-hack
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
Expand Down
9 changes: 9 additions & 0 deletions crates/rsonpath-lib/src/automaton.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ impl ArrayTransitionLabel {
Self::Slice(s) => s.contains(index),
}
}

fn matches_at_most_once(&self) -> bool {
match self {
Self::Index(_) => true,
Self::Slice(slice) => {
slice.step == JsonUInt::ZERO && slice.end.map_or(false, |end| slice.start.as_u64() + 1 >= end.as_u64())
}
}
}
}

impl From<JsonUInt> for ArrayTransitionLabel {
Expand Down
64 changes: 60 additions & 4 deletions crates/rsonpath-lib/src/automaton/minimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,7 @@ impl<'q> Minimizer<'q> {
debug!("{id} is rejecting");
attrs = attrs.rejecting();
}
if array_transitions.len() + member_transitions.len() == 1 && fallback == Self::rejecting_state() {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

if self.accepting.contains(fallback.0)
|| array_transitions
.iter()
Expand All @@ -213,6 +210,23 @@ impl<'q> Minimizer<'q> {
attrs = attrs.has_array_transition_to_accepting();
}

// Unitarity check:
// 1. Fallback rejects.
// 2. Only one transition that can match at most one element in a JSON, either:
// a) member transition; or
// b) array transition that matches only one index.
let is_unitary = {
fallback == Self::rejecting_state()
&& ((member_transitions.len() == 1 && array_transitions.is_empty())
|| (array_transitions.len() == 1
&& member_transitions.is_empty()
&& array_transitions[0].label.matches_at_most_once()))
};
if is_unitary {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

attrs.into()
}

Expand Down Expand Up @@ -1260,6 +1274,48 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn direct_slice() {
// Query = $[1:3]
let label = SimpleSlice::new(1.into(), Some(3.into()), 1.into());

let nfa = NondeterministicAutomaton {
ordered_states: vec![
NfaState::Direct(nfa::Transition::Array(label.into())),
NfaState::Accepting,
],
};

let mut result = minimize(nfa).unwrap();
make_canonical(&mut result);
let expected = Automaton {
states: vec![
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::REJECTING,
},
StateTable {
array_transitions: smallvec![ArrayTransition::new(ArrayTransitionLabel::Slice(label), State(2)),],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING
| StateAttributes::HAS_ARRAY_TRANSITION
| StateAttributes::HAS_ARRAY_TRANSITION_TO_ACCEPTING,
},
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::ACCEPTING,
},
],
};

assert_eq!(result, expected);
}

/// DFA creation is unstable - it can result in many different isomorphic automaton structures.
/// This function relabels the states in a canonical way so that they can be compared for equality.
fn make_canonical(dfa: &mut Automaton) {
Expand Down
28 changes: 28 additions & 0 deletions crates/rsonpath-test/documents/toml/lists.toml
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,31 @@ nodes = ['''[
0
]
]''']

[[queries]]
query = "$[1:3]"
description = "select the second and third elements"

[queries.results]
count = 2
spans = [[31, 33], [39, 278]]
nodes = [
'[]',
'''[
[],
[
[
[],
0
],
[
[],
0
],
[
[],
0
]
]
]'''
]
Loading