Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New handler to set mtime embedded in pyc file to 0 #27

Merged
merged 5 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ and cleans up unused "flag references".
It is a Rust reimplementation of
the [MarshalParser Python module](https://github.com/fedora-python/marshalparser).

### `pyc-zero-mtime`

Accepts `*.pyc`.

This handler sets the internal timestamp in `.pyc` file header to 0,
and sets the mtime on the corresponding source `.py` file to 0.
This is intended to be used on [OSTree](https://github.com/ostreedev/ostree)
systems where mtimes are discarded,
causing a mismatch between the timestamp embedded in the `.pyc` file
and the filesystem metadata of the `.py` file.

This handler is not enabled by default and must be explicitly requested
via `--handlers pyc-zero-mtime`.

## Notes

This project is inspired by
Expand Down
15 changes: 8 additions & 7 deletions src/handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,24 @@ impl Stats {

pub type HandlerBoxed = fn(&Rc<options::Config>) -> Box<dyn Processor>;

pub const HANDLERS: &[(&str, HandlerBoxed)] = &[
("ar", ar::Ar::boxed),
("jar", jar::Jar::boxed),
("javadoc", javadoc::Javadoc::boxed),
("pyc", pyc::Pyc::boxed),
pub const HANDLERS: &[(&str, bool, HandlerBoxed)] = &[
("ar", true, ar::Ar::boxed ),
("jar", true, jar::Jar::boxed ),
("javadoc", true, javadoc::Javadoc::boxed ),
("pyc", true, pyc::Pyc::boxed ),
("pyc-zero-mtime", false, pyc::PycZeroMtime::boxed),
];

pub fn handler_names() -> Vec<&'static str> {
HANDLERS.iter()
.map(|(name, _)| *name)
.map(|(name, _, _)| *name)
.collect()
}

pub fn make_handlers(config: &Rc<options::Config>) -> Result<Vec<Box<dyn Processor>>> {
let mut handlers: Vec<Box<dyn Processor>> = vec![];

for (name, func) in HANDLERS {
for (name, _, func) in HANDLERS {
if config.handler_names.contains(name) {
let mut handler = func(config);
match handler.initialize() {
Expand Down
182 changes: 162 additions & 20 deletions src/handlers/pyc.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */

use anyhow::Result;
use anyhow::{bail, Result};
use log::debug;
use std::fs::File;
use std::io;
use std::io::{Read, Write};
use std::iter;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::str;
use std::time;

use itertools::Itertools;
use num_bigint_dig::{BigInt, ToBigInt};

use crate::handlers::InputOutputHelper;
use crate::handlers::{InputOutputHelper, unwrap_os_string};
use crate::options;

const PYC_MAGIC: &[u8] = &[0x0D, 0x0A];
Expand Down Expand Up @@ -296,16 +299,6 @@ pub fn pyc_python_version(buf: &[u8; 4]) -> Result<((u32, u32), usize)> {
}
}

pub struct Pyc {
config: Rc<options::Config>,
}

impl Pyc {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}
}

#[derive(Debug)]
#[allow(dead_code)] // Right now, we only use dbg! to print the object.
enum Object {
Expand Down Expand Up @@ -382,14 +375,55 @@ impl PycParser {
let mut data = Vec::from(&buf);
input.read_to_end(&mut data)?;

Ok(PycParser {
if data.len() < header_length {
return Err(super::Error::Other(
format!("pyc file is too short ({} < {})", data.len(), header_length)
).into());
}

let pyc = PycParser {
input_path: input_path.to_path_buf(),
version,
data,
read_offset: header_length,
irefs: Vec::new(),
flag_refs: Vec::new(),
})
};

let mtime = pyc.py_content_mtime();
debug!("{}: from py with mtime={} ({}), size={} bytes, {}",
input_path.display(),
mtime,
chrono::DateTime::from_timestamp(mtime as i64, 0).unwrap(),
pyc.py_content_size(),
match pyc.py_content_hash() {
None | Some(0) => "no hash invalidation".to_string(),
Some(hash) => format!("hash={hash}"),
}
);

Ok(pyc)
}

pub fn py_content_hash(&self) -> Option<u32> {
if self.version < (3, 7) { // The first version supporting PEP 552
None
} else {
match self._read_long_at(4) {
0 => None, // Let's always map 0 to None.
v => Some(v),
}
}
}

pub fn py_content_mtime(&self) -> u32 {
let offset = if self.version < (3, 7) { 4 } else { 8 };
self._read_long_at(offset)
}

pub fn py_content_size(&self) -> u32 {
let offset = if self.version < (3, 7) { 8 } else { 12 };
self._read_long_at(offset)
}

fn take(&mut self, count: usize) -> Result<usize> {
Expand Down Expand Up @@ -530,10 +564,14 @@ impl PycParser {
})
}

fn _read_long_at(&self, offset: usize) -> u32 {
let bytes = &self.data[offset .. offset + 4];
u32::from_le_bytes(bytes.try_into().unwrap())
}

fn _read_long(&mut self) -> Result<u32> {
let offset = self.take(4)?;
let bytes = &self.data[offset .. offset + 4];
Ok(u32::from_le_bytes(bytes.try_into().unwrap()))
Ok(self._read_long_at(offset))
}

fn _read_long_signed(&mut self) -> Result<i32> {
Expand Down Expand Up @@ -652,7 +690,7 @@ impl PycParser {
Ok(Object::Dict(dict))
}

fn clear_unused_flag_refs(&mut self) -> Result<(bool, Vec<u8>)> {
fn clear_unused_flag_refs(&mut self) -> Result<bool> {
// Sequence of flag_refs and irefs ordered by number of byte in a file
let final_list =
iter::zip(self.flag_refs.iter(), iter::repeat(true))
Expand Down Expand Up @@ -698,7 +736,36 @@ impl PycParser {

debug!("{}: removed {} unused FLAG_REFs", self.input_path.display(), removed_count);
assert_eq!(data == self.data, removed_count == 0);
Ok((removed_count > 0, data))
if removed_count > 0 {
self.data = data;
}

Ok(removed_count > 0)
}

fn set_zero_mtime(&mut self) -> Result<bool> {
// Set the embedded mtime timestamp of the source .py file to 0 in the header.

if self.py_content_mtime() == 0 {
return Ok(false);
}

let offset = if self.version < (3, 7) { 4 } else { 8 };
self.data[offset..offset+4].fill(0);
assert!(self.py_content_mtime() == 0);

Ok(true)
}
}


pub struct Pyc {
config: Rc<options::Config>,
}

impl Pyc {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}
}

Expand All @@ -721,16 +788,91 @@ impl super::Processor for Pyc {

parser.read_object()?;

let (have_mod, data) = parser.clear_unused_flag_refs()?;
let have_mod = parser.clear_unused_flag_refs()?;
if have_mod {
io.open_output()?;
io.output.as_mut().unwrap().write_all(&data)?;
io.output.as_mut().unwrap().write_all(&parser.data)?;
}

io.finalize(have_mod)
}
}


pub struct PycZeroMtime {
config: Rc<options::Config>,
}

impl PycZeroMtime {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}

fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> {
let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?;
let base = input_file_name.split('.').nth(0).unwrap();
let py_path = input_path.with_file_name(format!("{base}.py"));
debug!("Looking at {}…", py_path.display());

let py_file = match File::open(&py_path) {
Ok(some) => some,
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
debug!("{}: not found, ignoring", py_path.display());
return Ok(());
} else {
bail!("{}: cannot open: {}", py_path.display(), e);
}
}
};

let orig = py_file.metadata()?;
if !orig.file_type().is_file() {
debug!("{}: not a file, ignoring", py_path.display());
} else if orig.modified()? == time::UNIX_EPOCH {
debug!("{}: mtime is already 0", py_path.display());
} else if self.config.check {
debug!("{}: not touching mtime in --check mode", py_path.display());
} else {
py_file.set_modified(time::UNIX_EPOCH)?;
debug!("{}: mtime set to 0", py_path.display());
}

Ok(())
}
}

impl super::Processor for PycZeroMtime {
fn name(&self) -> &str {
"pyc-zero-mtime"
}

fn filter(&self, path: &Path) -> Result<bool> {
Ok(path.extension().is_some_and(|x| x == "pyc"))
}

fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
let (mut io, input) = InputOutputHelper::open(input_path, self.config.check)?;

let mut parser = PycParser::from_file(input_path, input)?;
let have_mod = parser.set_zero_mtime()?;

if have_mod {
io.open_output()?;
io.output.as_mut().unwrap().write_all(&parser.data)?;
}

let res = io.finalize(have_mod)?;

if have_mod {
self.set_zero_mtime_on_py_file(input_path)?;
}

Ok(res)
}
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down
39 changes: 30 additions & 9 deletions src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub struct Config {
pub strict_handlers: bool,
}

fn filter_by_name(name: &str, filter: &[&str]) -> bool {
fn filter_by_name(name: &str, enabled_by_default: bool, filter: &[&str]) -> bool {
let mut negative_filter = true;

for f in filter.iter().rev() {
Expand All @@ -90,7 +90,7 @@ fn filter_by_name(name: &str, filter: &[&str]) -> bool {
}
}

negative_filter
enabled_by_default && negative_filter
}

pub fn requested_handlers(filter: &[&str]) -> Result<(Vec<&'static str>, bool)> {
Expand All @@ -109,8 +109,8 @@ pub fn requested_handlers(filter: &[&str]) -> Result<(Vec<&'static str>, bool)>

let list: Vec<&'static str> = handlers::HANDLERS
.iter()
.filter(|(name, _)| filter_by_name(name, filter))
.map(|(name, _)| *name)
.filter(|(name, enabled_by_default, _)| filter_by_name(name, *enabled_by_default, filter))
.map(|(name, _, _)| *name)
.collect();

if list.is_empty() {
Expand Down Expand Up @@ -213,10 +213,31 @@ mod tests {

#[test]
fn test_filter_by_name() {
assert_eq!(filter_by_name("x", &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", &vec!["x"]), true);
assert_eq!(filter_by_name("x", &vec![]), true);
assert_eq!(filter_by_name("x", &vec!["-x"]), false);
assert_eq!(filter_by_name("x", &vec!["-y"]), true);
assert_eq!(filter_by_name("x", true, &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", true, &vec!["x"]), true);
assert_eq!(filter_by_name("x", true, &vec![]), true);
assert_eq!(filter_by_name("x", true, &vec!["-x"]), false);
assert_eq!(filter_by_name("x", true, &vec!["-y"]), true);

assert_eq!(filter_by_name("x", false, &vec!["x", "y"]), true);
assert_eq!(filter_by_name("x", false, &vec!["x"]), true);
assert_eq!(filter_by_name("x", false, &vec![]), false);
assert_eq!(filter_by_name("x", false, &vec!["-x"]), false);
assert_eq!(filter_by_name("x", false, &vec!["-y"]), false);
}

#[test]
fn test_requested_handlers() {
let (list, strict) = requested_handlers(&vec![]).unwrap();
assert_eq!(list, vec!["ar", "jar", "javadoc", "pyc"]);
assert_eq!(strict, false);

let (list, strict) = requested_handlers(&vec!["ar", "pyc-zero-mtime"]).unwrap();
assert_eq!(list, vec!["ar", "pyc-zero-mtime"]);
assert_eq!(strict, true);

let (list, strict) = requested_handlers(&vec!["-pyc-zero-mtime"]).unwrap();
assert_eq!(list, vec!["ar", "jar", "javadoc", "pyc"]);
assert_eq!(strict, true);
}
}
Binary file added tests/cases/adapters.cpython-311~mtime.pyc
Binary file not shown.
Binary file added tests/cases/adapters.cpython-36~mtime.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_handlers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod test_ar;
mod test_javadoc;
mod test_pyc;
mod test_pyc_zero_mtime;

use anyhow::Result;
use std::fs;
Expand Down
Loading
Loading