Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added comparable row-oriented representation of a collection of [Array]. #1287

Merged
merged 6 commits into from
Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions src/compute/sort/row/interner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use std::hash::BuildHasher;
use std::hash::{BuildHasher, Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Index;

Expand Down Expand Up @@ -43,6 +43,16 @@ pub struct OrderPreservingInterner {
lookup: HashMap<Interned, (), ()>,
}

/// Calculates the hash of a single value.
#[inline]
fn hash_one<T: Hash>(hasher: &ahash::RandomState, x: T) -> u64 {
// Rewrite as `hasher.hash——one(&divisor)` after
// https://github.com/rust-lang/rust/issues/86161 is merged.
let mut hasher = hasher.build_hasher();
x.hash(&mut hasher);
hasher.finish()
}

impl OrderPreservingInterner {
/// Interns an iterator of values returning a list of [`Interned`] which can be
/// used with [`Self::normalized_key`] to retrieve the normalized keys with a
Expand Down Expand Up @@ -70,7 +80,7 @@ impl OrderPreservingInterner {
};

let v = value.as_ref();
let hash = self.hasher.hash_one(v);
let hash = hash_one(&self.hasher, v);
let entry = self
.lookup
.raw_entry_mut()
Expand Down Expand Up @@ -113,9 +123,10 @@ impl OrderPreservingInterner {
self.keys.values.push(0);
let interned = self.keys.append();

let hasher = &mut self.hasher;
let values = &self.values;
v.insert_with_hasher(hash, interned, (), |key| hasher.hash_one(&values[*key]));
v.insert_with_hasher(hash, interned, (), |key| {
hash_one(&self.hasher, &values[*key])
});
out[idx] = Some(interned);
}
}
Expand Down Expand Up @@ -155,6 +166,12 @@ impl OrderPreservingInterner {
let slot_idx = normalized_key[len - 2].checked_sub(2)?;
Some(bucket.slots.get(slot_idx as usize)?.value)
}

#[cfg(test)]
/// Returns the interned value for a given [`Interned`]
pub fn value(&self, key: Interned) -> &[u8] {
self.values.index(key)
}
}

/// A buffer of `[u8]` indexed by `[Interned]`
Expand Down
12 changes: 10 additions & 2 deletions src/compute/sort/row/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,18 @@ pub const EMPTY_SENTINEL: u8 = 1;
/// Indicates a non-empty string
pub const NON_EMPTY_SENTINEL: u8 = 2;

/// Returns the ceil of `value`/`divisor`
#[inline]
fn div_ceil(value: usize, divisor: usize) -> usize {
// Rewrite as `value.div_ceil(&divisor)` after
// https://github.com/rust-lang/rust/issues/88581 is merged.
value / divisor + (0 != value % divisor) as usize
}

/// Returns the length of the encoded representation of a byte array, including the null byte
pub fn encoded_len(a: Option<&[u8]>) -> usize {
match a {
Some(a) => 1 + a.len().div_ceil(BLOCK_SIZE) * (BLOCK_SIZE + 1),
Some(a) => 1 + div_ceil(a.len(), BLOCK_SIZE) * (BLOCK_SIZE + 1),
None => 1,
}
}
Expand All @@ -61,7 +69,7 @@ pub fn encode<'a, I: Iterator<Item = Option<&'a [u8]>>>(out: &mut Rows, i: I, op
*offset += 1;
}
Some(val) => {
let block_count = val.len().div_ceil(BLOCK_SIZE);
let block_count = div_ceil(val.len(), BLOCK_SIZE);
let end_offset = *offset + 1 + block_count * (BLOCK_SIZE + 1);
let to_write = &mut out.buffer[*offset..end_offset];

Expand Down
2 changes: 0 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
#![allow(clippy::type_complexity)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "simd", feature(portable_simd))]
#![feature(build_hasher_simple_hash_one)]
RinChanNOWWW marked this conversation as resolved.
Show resolved Hide resolved
#![feature(int_roundings)]

#[macro_use]
pub mod array;
Expand Down