Skip to content

Commit

Permalink
Merge pull request #4031 from wasmerio/remove-cache-compression
Browse files Browse the repository at this point in the history
Speed up the module cache 6x by removing LZW compression
  • Loading branch information
Michael Bryan authored Jun 26, 2023
2 parents 77898a7 + 4223625 commit bf5b569
Showing 1 changed file with 74 additions and 42 deletions.
116 changes: 74 additions & 42 deletions lib/wasix/src/runtime/module_cache/filesystem.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::path::{Path, PathBuf};
use std::{
io::{BufWriter, Write},
path::{Path, PathBuf},
};

use tempfile::NamedTempFile;
use wasmer::{Engine, Module};
Expand Down Expand Up @@ -43,10 +46,9 @@ impl ModuleCache for FileSystemCache {
// background.
// https://github.com/wasmerio/wasmer/issues/3851

let uncompressed = read_compressed(&path)?;
let bytes = read_file(&path)?;

let res = unsafe { Module::deserialize(&engine, uncompressed) };
match res {
match deserialize(&bytes, engine) {
Ok(m) => {
tracing::debug!("Cache hit!");
Ok(m)
Expand All @@ -68,7 +70,7 @@ impl ModuleCache for FileSystemCache {
);
}

Err(CacheError::Deserialize(e))
Err(e)
}
}
}
Expand Down Expand Up @@ -101,58 +103,64 @@ impl ModuleCache for FileSystemCache {

// Note: We save to a temporary file and persist() it at the end so
// concurrent readers won't see a partially written module.
let mut f = NamedTempFile::new_in(parent).map_err(CacheError::other)?;
let mut temp = NamedTempFile::new_in(parent).map_err(CacheError::other)?;
let serialized = module.serialize()?;

if let Err(e) = save_compressed(&mut f, &serialized) {
return Err(CacheError::FileWrite { path, error: e });
if let Err(error) = BufWriter::new(&mut temp).write_all(&serialized) {
return Err(CacheError::FileWrite { path, error });
}

f.persist(&path).map_err(CacheError::other)?;
temp.persist(&path).map_err(CacheError::other)?;

Ok(())
}
}

fn save_compressed(writer: impl std::io::Write, data: &[u8]) -> Result<(), std::io::Error> {
let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
encoder
.into_stream(writer)
.encode_all(std::io::Cursor::new(data))
.status?;

Ok(())
fn read_file(path: &Path) -> Result<Vec<u8>, CacheError> {
match std::fs::read(path) {
Ok(bytes) => Ok(bytes),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(CacheError::NotFound),
Err(error) => Err(CacheError::FileRead {
path: path.to_path_buf(),
error,
}),
}
}

fn read_compressed(path: &Path) -> Result<Vec<u8>, CacheError> {
let compressed = match std::fs::read(path) {
Ok(bytes) => bytes,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(CacheError::NotFound);
fn deserialize(bytes: &[u8], engine: &Engine) -> Result<Module, CacheError> {
// We used to compress our compiled modules using LZW encoding in the past.
// This was removed because it has a negative impact on startup times for
// "wasmer run", so all new compiled modules should be saved directly to
// disk.
//
// For perspective, compiling php.wasm with cranelift took about 4.75
// seconds on a M1 Mac.
//
// Without LZW compression:
// - ModuleCache::save(): 408ms, 142MB binary
// - ModuleCache::load(): 155ms
// With LZW compression:
// - ModuleCache::save(): 2.4s, 72MB binary
// - ModuleCache::load(): 822ms

match unsafe { Module::deserialize(engine, bytes) } {
// The happy case
Ok(m) => Ok(m),
Err(wasmer::DeserializeError::Incompatible(_)) => {
let bytes = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8)
.decode(bytes)
.map_err(CacheError::other)?;

let m = unsafe { Module::deserialize(engine, bytes)? };

Ok(m)
}
Err(error) => {
return Err(CacheError::FileRead {
path: path.to_path_buf(),
error,
});
}
};

let mut uncompressed = Vec::new();
let mut decoder = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8);
decoder
.into_vec(&mut uncompressed)
.decode_all(&compressed)
.status
.map_err(CacheError::other)?;

Ok(uncompressed)
Err(e) => Err(CacheError::Deserialize(e)),
}
}

#[cfg(test)]
mod tests {
use std::fs::File;

use tempfile::TempDir;

use super::*;
Expand Down Expand Up @@ -218,7 +226,31 @@ mod tests {
let expected_path = cache.path(key, engine.deterministic_id());
std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
let serialized = module.serialize().unwrap();
save_compressed(File::create(&expected_path).unwrap(), &serialized).unwrap();
std::fs::write(&expected_path, &serialized).unwrap();

let module = cache.load(key, &engine).await.unwrap();

let exports: Vec<_> = module
.exports()
.map(|export| export.name().to_string())
.collect();
assert_eq!(exports, ["add"]);
}

/// For backwards compatibility, make sure we can still work with LZW
/// compressed modules.
#[tokio::test]
async fn can_still_load_lzw_compressed_binaries() {
let temp = TempDir::new().unwrap();
let engine = Engine::default();
let module = Module::new(&engine, ADD_WAT).unwrap();
let key = ModuleHash::from_bytes([0; 32]);
let cache = FileSystemCache::new(temp.path());
let expected_path = cache.path(key, engine.deterministic_id());
std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
let serialized = module.serialize().unwrap();
let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
std::fs::write(&expected_path, encoder.encode(&serialized).unwrap()).unwrap();

let module = cache.load(key, &engine).await.unwrap();

Expand Down

0 comments on commit bf5b569

Please sign in to comment.