Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement deflate_if conditional compression #26

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ default = ["deflate", "zstd"]
deflate = ["include-flate-compress/deflate"]
zstd = ["include-flate-compress/zstd"]
no-compression-warnings = ["include-flate-codegen/no-compression-warnings"]

[[example]]
name = "flate"
1 change: 1 addition & 0 deletions assets/hello-world.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello, World!🌅
1 change: 1 addition & 0 deletions assets/one.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
232 changes: 196 additions & 36 deletions codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,43 @@ use std::io::{Read, Seek};
use std::path::PathBuf;
use std::str::{from_utf8, FromStr};

use include_flate_compress::{apply_compression, CompressionMethod};
use include_flate_compress::{apply_compression, compression_ratio, CompressionMethod};
use proc_macro::TokenStream;
use proc_macro2::Span;
use proc_macro_error::{emit_warning, proc_macro_error};
use quote::quote;
use syn::{Error, LitByteStr};
use syn::{Error, LitByteStr, LitInt, Token};

/// This macro evaluates to `true` if the file should be compressed, `false` otherwise, at compile time.
/// Useful for conditional compilation without any efforts to the runtime.
///
/// Please note that unlike the macro names suggest, this macro does **not** actually compress the file.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is only expected for internal use, should we #[doc(hidden)] the re-export? And shall we rename this to should_deflate, since this does not actually do the imperative deflate as the name suggests?

///
/// # Parameters
/// This macro accepts custom compression methods and threshold conditions.
///
/// # Returns
/// This macro expands to a `bool` literal that indicates whether the file should be compressed.
/// If no condition is specified, this macro always returns `true`.
#[proc_macro]
#[proc_macro_error]
pub fn deflate_if(ts: TokenStream) -> TokenStream {
match deflate_if_inner(ts, false) {
Ok(ts) => ts.into(),
Err(err) => err.to_compile_error().into(),
}
}

/// This macro is identical to `deflate_if!()`, except it additionally performs UTF-8 validation.
/// See `deflate_if!` for more details.
#[proc_macro]
#[proc_macro_error]
pub fn deflate_utf8_if(ts: TokenStream) -> TokenStream {
match deflate_if_inner(ts, true) {
Ok(ts) => ts.into(),
Err(err) => err.to_compile_error().into(),
}
}

/// `deflate_file!("file")` is equivalent to `include_bytes!("file.gz")`.
///
Expand All @@ -46,7 +77,7 @@ use syn::{Error, LitByteStr};
#[proc_macro]
#[proc_macro_error]
pub fn deflate_file(ts: TokenStream) -> TokenStream {
match inner(ts, false) {
match deflate_inner(ts, false) {
Ok(ts) => ts.into(),
Err(err) => err.to_compile_error().into(),
}
Expand All @@ -60,7 +91,7 @@ pub fn deflate_file(ts: TokenStream) -> TokenStream {
#[proc_macro]
#[proc_macro_error]
pub fn deflate_utf8_file(ts: TokenStream) -> TokenStream {
match inner(ts, true) {
match deflate_inner(ts, true) {
Ok(ts) => ts.into(),
Err(err) => err.to_compile_error().into(),
}
Expand All @@ -72,55 +103,183 @@ pub fn deflate_utf8_file(ts: TokenStream) -> TokenStream {
/// flate!(pub static DATA: [u8] from "assets/009f.dat"); // default, DEFLATE
/// flate!(pub static DATA: [u8] from "assets/009f.dat" with zstd); // Use Zstd for this file spcifically
/// flate!(pub static DATA: [u8] from "assets/009f.dat" with deflate); // Explicitly use DEFLATE.
///
/// flate!(pub static DATA: [u8] from "assets/009f.dat" if always); // Always compress regardless of compression ratio.
/// flate!(pub static DATA: [u8] from "assets/009f.dat" if less_than_original); // Compress only if the compressed size is smaller than the original size.
/// flate!(pub static DATA: [u8] from "assets/009f.dat" if compression_ratio_more_than 10%); // Compress only if the compression ratio is higher than 10%.
/// ```
struct FlateArgs {
path: syn::LitStr,
algorithm: Option<CompressionMethodTy>,
threshold: Option<ThresholdCondition>,
}

impl syn::parse::Parse for FlateArgs {
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
let path = input.parse()?;

let algorithm = if input.is_empty() {
None
} else {
let mut algorithm = None;
let mut threshold = None;

while !input.is_empty() {
let lookahead = input.lookahead1();
if lookahead.peek(kw::deflate) {
input.parse::<kw::deflate>()?;
Some(CompressionMethodTy(CompressionMethod::Deflate))
} else if lookahead.peek(kw::zstd) {
input.parse::<kw::zstd>()?;
Some(CompressionMethodTy(CompressionMethod::Zstd))
if lookahead.peek(kw::deflate) || lookahead.peek(kw::zstd) {
algorithm = if lookahead.peek(kw::deflate) {
input.parse::<kw::deflate>()?;
Some(CompressionMethodTy(CompressionMethod::Deflate))
} else {
input.parse::<kw::zstd>()?;
Some(CompressionMethodTy(CompressionMethod::Zstd))
};
} else if lookahead.peek(kw::always)
|| lookahead.peek(kw::less_than_original)
|| (lookahead.peek(kw::compression_ratio_more_than)
&& input.peek2(syn::LitInt)
&& input.peek3(Token![%]))
{
threshold = Some(input.parse()?);
} else {
return Err(lookahead.error());
}
};
}

Ok(Self {
path,
algorithm,
threshold,
})
}
}

/// A threshold condition for compression.
enum ThresholdCondition {
/// Always compress regardless of compression ratio.
/// This is the default behaviour.
Always,
/// Compress only if the compressed size is smaller than the original size.
LessThanOriginal,
/// Compress only if the compression ratio is higher than the given threshold.
CompressionRatioMoreThan(u64),
}

impl syn::parse::Parse for ThresholdCondition {
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
let lookahead = input.lookahead1();
if lookahead.peek(kw::always) {
input.parse::<kw::always>()?;
Ok(Self::Always)
} else if lookahead.peek(kw::less_than_original) {
input.parse::<kw::less_than_original>()?;
Ok(Self::LessThanOriginal)
} else if lookahead.peek(kw::compression_ratio_more_than) {
input.parse::<kw::compression_ratio_more_than>()?;
let lit: LitInt = input.parse()?;
input.parse::<Token![%]>()?;
Ok(Self::CompressionRatioMoreThan(lit.base10_parse()?))
} else {
Err(lookahead.error())
}
}
}

Ok(Self { path, algorithm })
impl Into<u64> for ThresholdCondition {
fn into(self) -> u64 {
match self {
Self::Always => 0,
Self::LessThanOriginal => 100,
Self::CompressionRatioMoreThan(threshold) => threshold,
}
}
}

/// Custom keywords for the proc-macro.
mod kw {
// `deflate` is a keyword that indicates that the file should be compressed with DEFLATE.
syn::custom_keyword!(deflate);
// `zstd` is a keyword that indicates that the file should be compressed with Zstd.
syn::custom_keyword!(zstd);

// `always` is a keyword that indicates that the file should always be compressed.
syn::custom_keyword!(always);
// `less_than_original` is a keyword that indicates that the file should be compressed only if the compressed size is larger than the original size.
syn::custom_keyword!(less_than_original);
// `compression_ratio_more_than` is a keyword that indicates that the file should be compressed only if the compression ratio is less than the given threshold.
// For example, `compression_ratio_more_than 10%` means that the file should be compressed only if the compressed size is less than 10% of the original size.
syn::custom_keyword!(compression_ratio_more_than);
}

#[derive(Debug)]
struct CompressionMethodTy(CompressionMethod);

fn compression_ratio(original_size: u64, compressed_size: u64) -> f64 {
(compressed_size as f64 / original_size as f64) * 100.0
fn emap<E: std::fmt::Display>(error: E) -> Error {
Error::new(Span::call_site(), error)
}

fn inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
fn emap<E: std::fmt::Display>(error: E) -> Error {
Error::new(Span::call_site(), error)
fn deflate_if_inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
let dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").map_err(emap)?);

let args = syn::parse2::<FlateArgs>(ts.to_owned().into())?;
let path = PathBuf::from_str(&args.path.value()).map_err(emap)?;
let algo = args
.algorithm
.unwrap_or(CompressionMethodTy(CompressionMethod::Deflate));

if path.is_absolute() {
Err(emap("absolute paths are not supported"))?;
}

let target = dir.join(&path);
let mut file = File::open(&target).map_err(emap)?;
let mut vec = Vec::<u8>::new();
if utf8 {
std::io::copy(&mut file, &mut vec).map_err(emap)?;
from_utf8(&vec).map_err(emap)?;
}

let mut compressed_buffer = Vec::<u8>::new();

{
let mut compressed_cursor = std::io::Cursor::new(&mut compressed_buffer);
let mut source: Box<dyn Read> = if utf8 {
Box::new(std::io::Cursor::new(&vec))
} else {
file.seek(std::io::SeekFrom::Start(0)).map_err(emap)?;
Box::new(&file)
};

apply_compression(&mut source, &mut compressed_cursor, algo.0).map_err(emap)?;
}

let compression_ratio = compression_ratio(
fs::metadata(&target).map_err(emap)?.len(),
compressed_buffer.len() as u64,
);

// returns `true` if the file should be compressed, `false` otherwise.
match args.threshold {
Some(ThresholdCondition::Always) => Ok(quote!(true)),
Some(ThresholdCondition::LessThanOriginal) => {
if compressed_buffer.len() > vec.len() {
Ok(quote!(false))
} else {
Ok(quote!(true))
}
}
Some(ThresholdCondition::CompressionRatioMoreThan(threshold)) => {
if compression_ratio > threshold as f64 {
Ok(quote!(false))
} else {
Ok(quote!(true))
}
}
_ => Ok(quote!(true)),
}
}

fn deflate_inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
let dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").map_err(emap)?);

let args: FlateArgs = syn::parse2::<FlateArgs>(ts.to_owned().into())?;
let args = syn::parse2::<FlateArgs>(ts.to_owned().into())?;
let path = PathBuf::from_str(&args.path.value()).map_err(emap)?;
let algo = args
.algorithm
Expand All @@ -131,9 +290,7 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
}

let target = dir.join(&path);

let mut file = File::open(&target).map_err(emap)?;

let mut vec = Vec::<u8>::new();
if utf8 {
std::io::copy(&mut file, &mut vec).map_err(emap)?;
Expand All @@ -145,7 +302,7 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
{
let mut compressed_cursor = std::io::Cursor::new(&mut compressed_buffer);
let mut source: Box<dyn Read> = if utf8 {
Box::new(std::io::Cursor::new(vec))
Box::new(std::io::Cursor::new(&vec))
} else {
file.seek(std::io::SeekFrom::Start(0)).map_err(emap)?;
Box::new(&file)
Expand All @@ -157,21 +314,24 @@ fn inner(ts: TokenStream, utf8: bool) -> syn::Result<impl Into<TokenStream>> {
let bytes = LitByteStr::new(&compressed_buffer, Span::call_site());
let result = quote!(#bytes);

let compression_ratio = compression_ratio(
fs::metadata(&target).map_err(emap)?.len(),
compressed_buffer.len() as u64,
);

// Default to 10% threshold
let threshold: u64 = args.threshold.map_or(10, |cond| cond.into());

#[cfg(not(feature = "no-compression-warnings"))]
{
let compression_ratio = compression_ratio(
fs::metadata(&target).map_err(emap)?.len(),
compressed_buffer.len() as u64,
);

if compression_ratio < 10.0f64 {
if compression_ratio < threshold as f64 {
emit_warning!(
&args.path,
"Detected low compression ratio ({:.2}%) for file {:?} with `{:?}`. Consider using other compression methods.",
compression_ratio,
path.display(),
algo.0,
);
&args.path,
"Detected low compression ratio ({:.2}%) for file {:?} with `{:?}`. Consider using other compression methods.",
compression_ratio,
path.display(),
algo.0,
);
}
}

Expand Down
4 changes: 4 additions & 0 deletions compress/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,7 @@ where
io::copy(&mut decoder, writer)?;
Ok(())
}

pub fn compression_ratio(original_size: u64, compressed_size: u64) -> f64 {
(compressed_size as f64 / original_size as f64) * 100.0
}
22 changes: 22 additions & 0 deletions examples/flate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// include-flate
// Copyright (C) SOFe, kkent030315
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use include_flate::flate;

flate!(pub static HELLO_WORLD: str from "assets/hello-world.txt" with deflate if always);

fn main() {
println!("{}", *HELLO_WORLD);
}
Loading
Loading