Skip to content

Commit

Permalink
Merge pull request #27 from keszybz/pyc-zero-mtime
Browse files Browse the repository at this point in the history
New handler to set mtime embedded in pyc file to 0
  • Loading branch information
keszybz authored Jul 18, 2024
2 parents 2e0ee62 + 5ffca30 commit f02368f
Show file tree
Hide file tree
Showing 9 changed files with 282 additions and 37 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ and cleans up unused "flag references".
It is a Rust reimplementation of
the [MarshalParser Python module](https://github.com/fedora-python/marshalparser).

### `pyc-zero-mtime`

Accepts `*.pyc`.

This handler sets the internal timestamp in `.pyc` file header to 0,
and sets the mtime on the corresponding source `.py` file to 0.
This is intended to be used on [OSTree](https://github.com/ostreedev/ostree)
systems where mtimes are discarded,
causing a mismatch between the timestamp embedded in the `.pyc` file
and the filesystem metadata of the `.py` file.

This handler is not enabled by default and must be explicitly requested
via `--handlers pyc-zero-mtime`.

## Notes

This project is inspired by
Expand Down
15 changes: 8 additions & 7 deletions src/handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,24 @@ impl Stats {

pub type HandlerBoxed = fn(&Rc<options::Config>) -> Box<dyn Processor>;

pub const HANDLERS: &[(&str, HandlerBoxed)] = &[
("ar", ar::Ar::boxed),
("jar", jar::Jar::boxed),
("javadoc", javadoc::Javadoc::boxed),
("pyc", pyc::Pyc::boxed),
pub const HANDLERS: &[(&str, bool, HandlerBoxed)] = &[
("ar", true, ar::Ar::boxed ),
("jar", true, jar::Jar::boxed ),
("javadoc", true, javadoc::Javadoc::boxed ),
("pyc", true, pyc::Pyc::boxed ),
("pyc-zero-mtime", false, pyc::PycZeroMtime::boxed),
];

pub fn handler_names() -> Vec<&'static str> {
HANDLERS.iter()
.map(|(name, _)| *name)
.map(|(name, _, _)| *name)
.collect()
}

pub fn make_handlers(config: &Rc<options::Config>) -> Result<Vec<Box<dyn Processor>>> {
let mut handlers: Vec<Box<dyn Processor>> = vec![];

for (name, func) in HANDLERS {
for (name, _, func) in HANDLERS {
if config.handler_names.contains(name) {
let mut handler = func(config);
match handler.initialize() {
Expand Down
182 changes: 162 additions & 20 deletions src/handlers/pyc.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */

use anyhow::Result;
use anyhow::{bail, Result};
use log::debug;
use std::fs::File;
use std::io;
use std::io::{Read, Write};
use std::iter;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::str;
use std::time;

use itertools::Itertools;
use num_bigint_dig::{BigInt, ToBigInt};

use crate::handlers::InputOutputHelper;
use crate::handlers::{InputOutputHelper, unwrap_os_string};
use crate::options;

const PYC_MAGIC: &[u8] = &[0x0D, 0x0A];
Expand Down Expand Up @@ -296,16 +299,6 @@ pub fn pyc_python_version(buf: &[u8; 4]) -> Result<((u32, u32), usize)> {
}
}

pub struct Pyc {
config: Rc<options::Config>,
}

impl Pyc {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}
}

#[derive(Debug)]
#[allow(dead_code)] // Right now, we only use dbg! to print the object.
enum Object {
Expand Down Expand Up @@ -382,14 +375,55 @@ impl PycParser {
let mut data = Vec::from(&buf);
input.read_to_end(&mut data)?;

Ok(PycParser {
if data.len() < header_length {
return Err(super::Error::Other(
format!("pyc file is too short ({} < {})", data.len(), header_length)
).into());
}

let pyc = PycParser {
input_path: input_path.to_path_buf(),
version,
data,
read_offset: header_length,
irefs: Vec::new(),
flag_refs: Vec::new(),
})
};

let mtime = pyc.py_content_mtime();
debug!("{}: from py with mtime={} ({}), size={} bytes, {}",
input_path.display(),
mtime,
chrono::DateTime::from_timestamp(mtime as i64, 0).unwrap(),
pyc.py_content_size(),
match pyc.py_content_hash() {
None | Some(0) => "no hash invalidation".to_string(),
Some(hash) => format!("hash={hash}"),
}
);

Ok(pyc)
}

pub fn py_content_hash(&self) -> Option<u32> {
if self.version < (3, 7) { // The first version supporting PEP 552
None
} else {
match self._read_long_at(4) {
0 => None, // Let's always map 0 to None.
v => Some(v),
}
}
}

pub fn py_content_mtime(&self) -> u32 {
let offset = if self.version < (3, 7) { 4 } else { 8 };
self._read_long_at(offset)
}

pub fn py_content_size(&self) -> u32 {
let offset = if self.version < (3, 7) { 8 } else { 12 };
self._read_long_at(offset)
}

fn take(&mut self, count: usize) -> Result<usize> {
Expand Down Expand Up @@ -530,10 +564,14 @@ impl PycParser {
})
}

fn _read_long_at(&self, offset: usize) -> u32 {
let bytes = &self.data[offset .. offset + 4];
u32::from_le_bytes(bytes.try_into().unwrap())
}

fn _read_long(&mut self) -> Result<u32> {
let offset = self.take(4)?;
let bytes = &self.data[offset .. offset + 4];
Ok(u32::from_le_bytes(bytes.try_into().unwrap()))
Ok(self._read_long_at(offset))
}

fn _read_long_signed(&mut self) -> Result<i32> {
Expand Down Expand Up @@ -652,7 +690,7 @@ impl PycParser {
Ok(Object::Dict(dict))
}

fn clear_unused_flag_refs(&mut self) -> Result<(bool, Vec<u8>)> {
fn clear_unused_flag_refs(&mut self) -> Result<bool> {
// Sequence of flag_refs and irefs ordered by number of byte in a file
let final_list =
iter::zip(self.flag_refs.iter(), iter::repeat(true))
Expand Down Expand Up @@ -698,7 +736,36 @@ impl PycParser {

debug!("{}: removed {} unused FLAG_REFs", self.input_path.display(), removed_count);
assert_eq!(data == self.data, removed_count == 0);
Ok((removed_count > 0, data))
if removed_count > 0 {
self.data = data;
}

Ok(removed_count > 0)
}

fn set_zero_mtime(&mut self) -> Result<bool> {
// Set the embedded mtime timestamp of the source .py file to 0 in the header.

if self.py_content_mtime() == 0 {
return Ok(false);
}

let offset = if self.version < (3, 7) { 4 } else { 8 };
self.data[offset..offset+4].fill(0);
assert!(self.py_content_mtime() == 0);

Ok(true)
}
}


pub struct Pyc {
config: Rc<options::Config>,
}

impl Pyc {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}
}

Expand All @@ -721,16 +788,91 @@ impl super::Processor for Pyc {

parser.read_object()?;

let (have_mod, data) = parser.clear_unused_flag_refs()?;
let have_mod = parser.clear_unused_flag_refs()?;
if have_mod {
io.open_output()?;
io.output.as_mut().unwrap().write_all(&data)?;
io.output.as_mut().unwrap().write_all(&parser.data)?;
}

io.finalize(have_mod)
}
}


pub struct PycZeroMtime {
config: Rc<options::Config>,
}

impl PycZeroMtime {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}

fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> {
let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?;
let base = input_file_name.split('.').nth(0).unwrap();
let py_path = input_path.with_file_name(format!("{base}.py"));
debug!("Looking at {}…", py_path.display());

let py_file = match File::open(&py_path) {
Ok(some) => some,
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
debug!("{}: not found, ignoring", py_path.display());
return Ok(());
} else {
bail!("{}: cannot open: {}", py_path.display(), e);
}
}
};

let orig = py_file.metadata()?;
if !orig.file_type().is_file() {
debug!("{}: not a file, ignoring", py_path.display());
} else if orig.modified()? == time::UNIX_EPOCH {
debug!("{}: mtime is already 0", py_path.display());
} else if self.config.check {
debug!("{}: not touching mtime in --check mode", py_path.display());
} else {
py_file.set_modified(time::UNIX_EPOCH)?;
debug!("{}: mtime set to 0", py_path.display());
}

Ok(())
}
}

impl super::Processor for PycZeroMtime {
fn name(&self) -> &str {
"pyc-zero-mtime"
}

fn filter(&self, path: &Path) -> Result<bool> {
Ok(path.extension().is_some_and(|x| x == "pyc"))
}

fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
let (mut io, input) = InputOutputHelper::open(input_path, self.config.check)?;

let mut parser = PycParser::from_file(input_path, input)?;
let have_mod = parser.set_zero_mtime()?;

if have_mod {
io.open_output()?;
io.output.as_mut().unwrap().write_all(&parser.data)?;
}

let res = io.finalize(have_mod)?;

if have_mod {
self.set_zero_mtime_on_py_file(input_path)?;
}

Ok(res)
}
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down
39 changes: 30 additions & 9 deletions src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub struct Config {
pub strict_handlers: bool,
}

fn filter_by_name(name: &str, filter: &[&str]) -> bool {
fn filter_by_name(name: &str, enabled_by_default: bool, filter: &[&str]) -> bool {
let mut negative_filter = true;

for f in filter.iter().rev() {
Expand All @@ -90,7 +90,7 @@ fn filter_by_name(name: &str, filter: &[&str]) -> bool {
}
}

negative_filter
enabled_by_default && negative_filter
}

pub fn requested_handlers(filter: &[&str]) -> Result<(Vec<&'static str>, bool)> {
Expand All @@ -109,8 +109,8 @@ pub fn requested_handlers(filter: &[&str]) -> Result<(Vec<&'static str>, bool)>

let list: Vec<&'static str> = handlers::HANDLERS
.iter()
.filter(|(name, _)| filter_by_name(name, filter))
.map(|(name, _)| *name)
.filter(|(name, enabled_by_default, _)| filter_by_name(name, *enabled_by_default, filter))
.map(|(name, _, _)| *name)
.collect();

if list.is_empty() {
Expand Down Expand Up @@ -213,10 +213,31 @@ mod tests {

#[test]
fn test_filter_by_name() {
assert_eq!(filter_by_name("x", &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", &vec!["x"]), true);
assert_eq!(filter_by_name("x", &vec![]), true);
assert_eq!(filter_by_name("x", &vec!["-x"]), false);
assert_eq!(filter_by_name("x", &vec!["-y"]), true);
assert_eq!(filter_by_name("x", true, &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", true, &vec!["x"]), true);
assert_eq!(filter_by_name("x", true, &vec![]), true);
assert_eq!(filter_by_name("x", true, &vec!["-x"]), false);
assert_eq!(filter_by_name("x", true, &vec!["-y"]), true);

assert_eq!(filter_by_name("x", false, &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", false, &vec!["x"]), true);
assert_eq!(filter_by_name("x", false, &vec![]), false);
assert_eq!(filter_by_name("x", false, &vec!["-x"]), false);
assert_eq!(filter_by_name("x", false, &vec!["-y"]), false);
}

#[test]
fn test_requested_handlers() {
let (list, strict) = requested_handlers(&vec![]).unwrap();
assert_eq!(list, vec!["ar", "jar", "javadoc", "pyc"]);
assert_eq!(strict, false);

let (list, strict) = requested_handlers(&vec!["ar", "pyc-zero-mtime"]).unwrap();
assert_eq!(list, vec!["ar", "pyc-zero-mtime"]);
assert_eq!(strict, true);

let (list, strict) = requested_handlers(&vec!["-pyc-zero-mtime"]).unwrap();
assert_eq!(list, vec!["ar", "jar", "javadoc", "pyc"]);
assert_eq!(strict, true);
}
}
Binary file added tests/cases/adapters.cpython-311~mtime.pyc
Binary file not shown.
Binary file added tests/cases/adapters.cpython-36~mtime.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_handlers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod test_ar;
mod test_javadoc;
mod test_pyc;
mod test_pyc_zero_mtime;

use anyhow::Result;
use std::fs;
Expand Down
Loading

0 comments on commit f02368f

Please sign in to comment.