Skip to content

Commit

Permalink
Always compress values when using a sink when exploding packs
Browse files Browse the repository at this point in the history
Looks like IO is a major bottleneck, followed by gzip which just
does about 20MB per core.

Of course there is also cost of extracting the objects in the first
place, but the OUTPUT part seems particularly slow.

Git is a slow writer due to that, but if done right, I would think
100MB/s on my machine are possible when writing directly into packs.
  • Loading branch information
Byron committed Jul 28, 2020
1 parent 105c845 commit 70562fa
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
2 changes: 1 addition & 1 deletion git-odb/src/sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub struct Sink {
}

impl Sink {
pub fn compress(&mut self, enable: bool) -> &mut Self {
pub fn compress(mut self, enable: bool) -> Self {
if enable {
self.compressor = Some(RefCell::new(DeflateWriter::new(io::sink())));
} else {
Expand Down
39 changes: 27 additions & 12 deletions gitoxide-core/src/pack/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,27 +86,40 @@ quick_error! {
}
}

struct OutputWriter(Option<loose::Db>);
enum OutputWriter {
Loose(loose::Db),
Sink(git_odb::Sink),
}

impl git_odb::Write for OutputWriter {
type Error = Error;

fn write_buf(&self, kind: git_object::Kind, from: &[u8], hash: HashKind) -> Result<owned::Id, Self::Error> {
match self {
OutputWriter::Loose(db) => db.write_buf(kind, from, hash).map_err(Into::into),
OutputWriter::Sink(db) => db.write_buf(kind, from, hash).map_err(Into::into),
}
}

fn write_stream(
&self,
kind: git_object::Kind,
size: u64,
from: impl Read,
hash: HashKind,
) -> Result<owned::Id, Self::Error> {
match self.0.as_ref() {
Some(db) => db.write_stream(kind, size, from, hash).map_err(Into::into),
None => git_odb::sink().write_stream(kind, size, from, hash).map_err(Into::into),
match self {
OutputWriter::Loose(db) => db.write_stream(kind, size, from, hash).map_err(Into::into),
OutputWriter::Sink(db) => db.write_stream(kind, size, from, hash).map_err(Into::into),
}
}
fn write_buf(&self, kind: git_object::Kind, from: &[u8], hash: HashKind) -> Result<owned::Id, Self::Error> {
match self.0.as_ref() {
Some(db) => db.write_buf(kind, from, hash).map_err(Into::into),
None => git_odb::sink().write_buf(kind, from, hash).map_err(Into::into),
}

impl OutputWriter {
fn new(path: Option<impl AsRef<Path>>) -> Self {
match path {
Some(path) => OutputWriter::Loose(loose::Db::at(path.as_ref())),
None => OutputWriter::Sink(git_odb::sink().compress(true)),
}
}
}
Expand Down Expand Up @@ -138,7 +151,6 @@ where
));
}

let out = OutputWriter(object_path.map(|path| loose::Db::at(path.as_ref())));
let mut progress = bundle.index.traverse(
&bundle.pack,
pack::index::traverse::Context {
Expand All @@ -147,8 +159,11 @@ where
check: check.into(),
},
progress,
|| {
|object_kind, buf, index_entry, _entry_stats, progress| {
{
let object_path = object_path.map(|p| p.as_ref().to_owned());
move || {
let out = OutputWriter::new(object_path.clone());
move |object_kind, buf, index_entry, _entry_stats, progress| {
let written_id = out
.write_buf(object_kind, buf, HashKind::Sha1)
.map_err(|err| Error::Write(Box::new(err) as Box<dyn std::error::Error + Send + Sync>, object_kind, index_entry.oid))
Expand All @@ -162,7 +177,7 @@ where
}
Ok(())
}
},
}},
pack::cache::DecodeEntryLRU::default,
).map(|(_,_,c)|progress::DoOrDiscard::from(c)).with_context(|| "Failed to explode the entire pack - some loose objects may have been created nonetheless")?;

Expand Down

0 comments on commit 70562fa

Please sign in to comment.