Skip to content

Commit

Permalink
feat: dynamically sized full-object speeds up diff-based object count…
Browse files Browse the repository at this point in the history
…ing… (#67)

…which is what happens when counting objects for fetches where only
changed objects should be sent.
  • Loading branch information
Byron committed Sep 21, 2021
1 parent 80c6994 commit d6c44e6
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 1 deletion.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ check: ## Build all code in suitable configurations
cd git-pack && cargo check --features serde1 \
&& cargo check --features pack-cache-lru-static \
&& cargo check --features pack-cache-lru-dynamic \
&& cargo check --features object-cache-dynamic \
&& cargo check
cd git-packetline && cargo check \
&& cargo check --features blocking-io \
Expand Down
2 changes: 2 additions & 0 deletions cargo-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ The library powering the command-line interface.
for the LRU-cache itself low.
* **pack-cache-lru-dynamic**
* Provide a hash-map based LRU cache whose eviction is based a memory cap calculated from object data.
* **object-cache-dynamic**
* If set, select algorithms may additionally use a full-object cache which is queried before the pack itself.

### git-actor

Expand Down
1 change: 1 addition & 0 deletions git-pack/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ doctest = false
[features]
pack-cache-lru-static = ["uluru"]
pack-cache-lru-dynamic = ["clru"]
object-cache-dynamic = ["clru"]
serde1 = ["serde", "git-object/serde1"]
internal-testing-git-features-parallel = ["git-features/parallel"]
internal-testing-to-avoid-being-run-by-cargo-test-all = []
Expand Down
2 changes: 2 additions & 0 deletions git-pack/src/cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,7 @@ impl<T: DecodeEntry + ?Sized> DecodeEntry for Box<T> {
#[cfg(any(feature = "pack-cache-lru-dynamic", feature = "pack-cache-lru-static"))]
pub mod lru;

pub mod object;

///
pub(crate) mod delta;
107 changes: 107 additions & 0 deletions git-pack/src/cache/object.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//! # Note
//!
//! This module is a bit 'misplaced' if spelled out like 'git_pack::cache::object::*' but is best placed here for code re-use and
//! general usefulnes.
#[cfg(feature = "object-cache-dynamic")]
mod memory {
use std::num::NonZeroUsize;

use clru::WeightScale;

struct Entry {
data: Vec<u8>,
kind: git_object::Kind,
}

type Key = git_hash::ObjectId;

struct CustomScale;

impl WeightScale<Key, Entry> for CustomScale {
fn weight(&self, key: &Key, value: &Entry) -> usize {
value.data.len() + std::mem::size_of::<Entry>() + key.as_bytes().len()
}
}

/// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
pub struct MemoryCappedHashmap {
inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
free_list: Vec<Vec<u8>>,
debug: git_features::cache::Debug,
}

impl MemoryCappedHashmap {
/// The amount of bytes we can hold in total, or the value we saw in `new(…)`.
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
/// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
/// object data.
pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
MemoryCappedHashmap {
inner: clru::CLruCache::with_config(
clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
.with_scale(CustomScale),
),
free_list: Vec::new(),
debug: git_features::cache::Debug::new(format!("MemoryCappedObjectHashmap({}B)", memory_cap_in_bytes)),
}
}
}

impl MemoryCappedHashmap {
/// Put the object going by `id` of `kind` with `data` into the cache.
pub fn put(&mut self, id: git_hash::ObjectId, kind: git_object::Kind, data: &[u8]) {
self.debug.put();
if let Ok(Some(previous_entry)) = self.inner.put_with_weight(
id,
Entry {
data: self
.free_list
.pop()
.map(|mut v| {
v.clear();
v.resize(data.len(), 0);
v.copy_from_slice(data);
v
})
.unwrap_or_else(|| Vec::from(data)),
kind,
},
) {
self.free_list.push(previous_entry.data)
}
}

/// Try to retrieve the object named `id` and place its data into `out` if available and return `Some(kind)` if found.
pub fn get(&mut self, id: &git_hash::ObjectId, out: &mut Vec<u8>) -> Option<git_object::Kind> {
let res = self.inner.get(id).map(|e| {
out.resize(e.data.len(), 0);
out.copy_from_slice(&e.data);
e.kind
});
if res.is_some() {
self.debug.hit()
} else {
self.debug.miss()
}
res
}
}
}
#[cfg(feature = "object-cache-dynamic")]
pub use memory::MemoryCappedHashmap;

/// A cache implementation that doesn't do any caching.
pub struct Never;

impl Never {
/// Noop
pub fn put(&mut self, _id: git_hash::ObjectId, _kind: git_object::Kind, _data: &[u8]) {}

/// Noop
pub fn get(&mut self, _id: &git_hash::ObjectId, _out: &mut Vec<u8>) -> Option<git_object::Kind> {
None
}
}
17 changes: 16 additions & 1 deletion git-pack/src/data/output/count/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ where
let mut traverse_delegate = tree::traverse::AllUnseen::new(seen_objs);
let mut changes_delegate = tree::changes::AllNew::new(seen_objs);
let mut outcome = Outcome::default();
#[cfg(feature = "object-cache-dynamic")]
let mut obj_cache = crate::cache::object::MemoryCappedHashmap::new(10 * 1024 * 1024); // TODO: make configurable
#[cfg(not(feature = "object-cache-dynamic"))]
let mut obj_cache = crate::cache::object::Never;

let stats = &mut outcome;
for id in oids.into_iter() {
if should_interrupt.load(Ordering::Relaxed) {
Expand Down Expand Up @@ -265,7 +270,17 @@ where
&mut tree_diff_state,
|oid, buf| {
stats.decoded_objects += 1;
db.find_tree_iter(oid, buf, cache).ok()
let id = oid.to_owned();
match obj_cache.get(&id, buf) {
Some(_kind) => git_object::TreeRefIter::from_bytes(buf).into(),
None => match db.find_tree_iter(oid, buf, cache).ok() {
Some(_) => {
obj_cache.put(id, git_object::Kind::Tree, buf);
git_object::TreeRefIter::from_bytes(buf).into()
}
None => None,
},
}
},
&mut changes_delegate,
)
Expand Down

0 comments on commit d6c44e6

Please sign in to comment.