From ea42424da93f22d6b8c1d5fd25fe04cd73d18a8d Mon Sep 17 00:00:00 2001 From: Eashwar Ranganathan Date: Fri, 21 Nov 2025 13:54:29 -0500 Subject: [PATCH 1/2] Pass an optional `mtime` param to `HeaderMode::Deterministic` This `mtime` value will be used instead of `DETERMINISTIC_TIMESTAMP` when provided. This is useful e.g. to pass in `$SOURCE_DATE_EPOCH` to support reproducible builds. --- src/header.rs | 16 ++++++++++------ tests/all.rs | 2 +- tests/header/mod.rs | 5 ++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/header.rs b/src/header.rs index fd4a1deb..a4e26ff3 100644 --- a/src/header.rs +++ b/src/header.rs @@ -47,7 +47,11 @@ pub enum HeaderMode { /// Only metadata that is directly relevant to the identity of a file will /// be included. In particular, ownership and mod/access times are excluded. - Deterministic, + Deterministic { + ///If the `mtime` param is `Some(..)`, the value will be used instead of + /// [DETERMINISTIC_TIMESTAMP] + mtime: Option, + }, } /// Representation of the header of an entry in an archive @@ -777,12 +781,12 @@ impl Header { self.set_gid(meta.gid() as u64); self.set_mode(meta.mode()); } - HeaderMode::Deterministic => { - // We could in theory set the mtime to zero here, but not all tools seem to behave + HeaderMode::Deterministic { mtime } => { + // We could in theory default the mtime to zero here, but not all tools seem to behave // well when ingesting files with a 0 timestamp. // For example, rust-lang/cargo#9512 shows that lldb doesn't ingest files with a // zero timestamp correctly. - self.set_mtime(DETERMINISTIC_TIMESTAMP); + self.set_mtime(mtime.unwrap_or(DETERMINISTIC_TIMESTAMP)); self.set_uid(0); self.set_gid(0); @@ -847,10 +851,10 @@ impl Header { }; self.set_mode(fs_mode); } - HeaderMode::Deterministic => { + HeaderMode::Deterministic { mtime } => { self.set_uid(0); self.set_gid(0); - self.set_mtime(DETERMINISTIC_TIMESTAMP); // see above in unix + self.set_mtime(mtime.unwrap_or(DETERMINISTIC_TIMESTAMP)); // see above in unix let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; self.set_mode(fs_mode); } diff --git a/tests/all.rs b/tests/all.rs index 987179ce..a2aa63bb 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -804,7 +804,7 @@ fn zero_file_times() { let td = TempBuilder::new().prefix("tar-rs").tempdir().unwrap(); let mut ar = Builder::new(Vec::new()); - ar.mode(HeaderMode::Deterministic); + ar.mode(HeaderMode::Deterministic { mtime: None }); let path = td.path().join("tmpfile"); File::create(&path).unwrap(); ar.append_path_with_name(&path, "a").unwrap(); diff --git a/tests/header/mod.rs b/tests/header/mod.rs index dbbed991..d9b8cd79 100644 --- a/tests/header/mod.rs +++ b/tests/header/mod.rs @@ -188,7 +188,10 @@ fn set_metadata_deterministic() { perms.set_readonly(readonly); fs::set_permissions(path, perms).unwrap(); let mut h = Header::new_ustar(); - h.set_metadata_in_mode(&path.metadata().unwrap(), HeaderMode::Deterministic); + h.set_metadata_in_mode( + &path.metadata().unwrap(), + HeaderMode::Deterministic { mtime: None }, + ); Ok(h) } From 8b6917193aa0d07de5462fccb1ce7ba5a802869e Mon Sep 17 00:00:00 2001 From: Eashwar Ranganathan Date: Sun, 23 Nov 2025 22:26:51 -0500 Subject: [PATCH 2/2] Add a new HeaderMode variant with specific overrides --- src/header.rs | 228 ++++++++++++++++++++++++++++++++------------ tests/all.rs | 2 +- tests/header/mod.rs | 5 +- 3 files changed, 169 insertions(+), 66 deletions(-) diff --git a/src/header.rs b/src/header.rs index a4e26ff3..7eadeebe 100644 --- a/src/header.rs +++ b/src/header.rs @@ -4,6 +4,7 @@ use std::os::unix::prelude::*; use std::os::windows::prelude::*; use std::borrow::Cow; +use std::cmp; use std::fmt; use std::fs; use std::io; @@ -47,11 +48,84 @@ pub enum HeaderMode { /// Only metadata that is directly relevant to the identity of a file will /// be included. In particular, ownership and mod/access times are excluded. - Deterministic { - ///If the `mtime` param is `Some(..)`, the value will be used instead of - /// [DETERMINISTIC_TIMESTAMP] - mtime: Option, - }, + Deterministic, + + /// Preserves all the original metadata except for the provided overrides. + /// The default is effectively the same as Complete. + Override(HeaderModeOverrides), +} + +#[cfg(all(any(unix, windows), not(target_arch = "wasm32")))] +impl HeaderMode { + fn as_override(&self) -> HeaderModeOverrides { + match self { + Self::Complete => HeaderModeOverrides::default(), + Self::Deterministic => HeaderModeOverrides::default() + .with_uid(0) + .with_gid(0) + .override_mtime(DETERMINISTIC_TIMESTAMP) + .with_deterministic_mode(), + Self::Override(overrides) => *overrides, + } + } +} + +/// Declares the specific attributes of the header to override when filling a Header +/// in [HeaderMode::Override] +#[non_exhaustive] +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] +pub struct HeaderModeOverrides { + uid: Option, + gid: Option, + mtime: Option, + mode: Option, +} + +impl HeaderModeOverrides { + /// Override the Header `uid` to the given value + pub fn with_uid(mut self, uid: u64) -> Self { + self.uid = Some(uid); + self + } + + /// Override the Header `gid` to the given value + pub fn with_gid(mut self, gid: u64) -> Self { + self.gid = Some(gid); + self + } + + /// Configures the `mtime` to be set to the specific value + pub fn override_mtime(mut self, mtime: u64) -> Self { + self.mtime = Some(MtimeOverride::Override(mtime)); + self + } + + /// Configures the `mtime` to be set to the minimum of either + /// the actual mtime or the provided value + pub fn clamp_mtime(mut self, mtime: u64) -> Self { + self.mtime = Some(MtimeOverride::Clamp(mtime)); + self + } + + /// Configures the file permissions to be set in the same manner + /// as [HeaderMode::Deterministic] + pub fn with_deterministic_mode(mut self) -> Self { + self.mode = Some(ModeOverride::Deterministic); + self + } +} + +#[non_exhaustive] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum MtimeOverride { + Override(u64), + Clamp(u64), +} + +#[non_exhaustive] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum ModeOverride { + Deterministic, } /// Representation of the header of an entry in an archive @@ -774,32 +848,7 @@ impl Header { #[cfg(all(unix, not(target_arch = "wasm32")))] fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { - match mode { - HeaderMode::Complete => { - self.set_mtime(meta.mtime() as u64); - self.set_uid(meta.uid() as u64); - self.set_gid(meta.gid() as u64); - self.set_mode(meta.mode()); - } - HeaderMode::Deterministic { mtime } => { - // We could in theory default the mtime to zero here, but not all tools seem to behave - // well when ingesting files with a 0 timestamp. - // For example, rust-lang/cargo#9512 shows that lldb doesn't ingest files with a - // zero timestamp correctly. - self.set_mtime(mtime.unwrap_or(DETERMINISTIC_TIMESTAMP)); - - self.set_uid(0); - self.set_gid(0); - - // Use a default umask value, but propagate the (user) execute bit. - let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) { - 0o755 - } else { - 0o644 - }; - self.set_mode(fs_mode); - } - } + self.fill_platform_from_overrides(meta, mode.as_override()); // Note that if we are a GNU header we *could* set atime/ctime, except // the `tar` utility doesn't do that by default and it causes problems @@ -829,36 +878,7 @@ impl Header { #[cfg(windows)] fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { // There's no concept of a file mode on Windows, so do a best approximation here. - match mode { - HeaderMode::Complete => { - self.set_uid(0); - self.set_gid(0); - // The dates listed in tarballs are always seconds relative to - // January 1, 1970. On Windows, however, the timestamps are returned as - // dates relative to January 1, 1601 (in 100ns intervals), so we need to - // add in some offset for those dates. - let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600; - self.set_mtime(mtime); - let fs_mode = { - const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; - let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; - match (meta.is_dir(), readonly != 0) { - (true, false) => 0o755, - (true, true) => 0o555, - (false, false) => 0o644, - (false, true) => 0o444, - } - }; - self.set_mode(fs_mode); - } - HeaderMode::Deterministic { mtime } => { - self.set_uid(0); - self.set_gid(0); - self.set_mtime(mtime.unwrap_or(DETERMINISTIC_TIMESTAMP)); // see above in unix - let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; - self.set_mode(fs_mode); - } - } + self.fill_platform_from_overrides(meta, mode.as_override()); let ft = meta.file_type(); self.set_entry_type(if ft.is_dir() { @@ -872,6 +892,52 @@ impl Header { }); } + #[cfg(all(unix, not(target_arch = "wasm32")))] + fn fill_platform_from_overrides( + &mut self, + meta: &fs::Metadata, + overrides: HeaderModeOverrides, + ) { + let mtime = match overrides.mtime { + Some(MtimeOverride::Override(mtime)) => mtime, + Some(MtimeOverride::Clamp(mtime)) => cmp::min(meta.mtime() as u64, mtime), + None => meta.mtime() as u64, + }; + self.set_mtime(mtime); + + self.set_uid(overrides.uid.unwrap_or_else(|| meta.uid() as u64)); + self.set_gid(overrides.gid.unwrap_or_else(|| meta.gid() as u64)); + + let mode = match overrides.mode { + Some(ModeOverride::Deterministic) => deterministic_mode(meta), + None => meta.mode(), + }; + self.set_mode(mode); + } + + #[cfg(windows)] + fn fill_platform_from_overrides( + &mut self, + meta: &fs::Metadata, + overrides: HeaderModeOverrides, + ) { + self.set_uid(0); + self.set_gid(0); + + let mtime = match overrides.mtime { + Some(MtimeOverride::Override(mtime)) => mtime, + Some(MtimeOverride::Clamp(mtime)) => cmp::min(extract_mtime_windows(meta), mtime), + None => extract_mtime_windows(meta), + }; + self.set_mtime(mtime); + + let mode = match overrides.mode { + Some(ModeOverride::Deterministic) => deterministic_mode(meta), + None => extract_mode_windows(meta), + }; + self.set_mode(mode); + } + fn debug_fields(&self, b: &mut fmt::DebugStruct) { if let Ok(entry_size) = self.entry_size() { b.field("entry_size", &entry_size); @@ -1732,3 +1798,43 @@ pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { fn invalid_utf8(_: T) -> io::Error { io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8") } + +#[cfg(windows)] +fn extract_mtime_windows(meta: &fs::Metadata) -> u64 { + // The dates listed in tarballs are always seconds relative to + // January 1, 1970. On Windows, however, the timestamps are returned as + // dates relative to January 1, 1601 (in 100ns intervals), so we need to + // add in some offset for those dates. + (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600 +} + +#[cfg(windows)] +fn extract_mode_windows(meta: &fs::Metadata) -> u32 { + const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; + let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; + match (meta.is_dir(), readonly != 0) { + (true, false) => 0o755, + (true, true) => 0o555, + (false, false) => 0o644, + (false, true) => 0o444, + } +} + +#[cfg(all(unix, not(target_arch = "wasm32")))] +fn deterministic_mode(meta: &fs::Metadata) -> u32 { + // Use a default umask value, but propagate the (user) execute bit. + if meta.is_dir() || (0o100 & meta.mode() == 0o100) { + 0o755 + } else { + 0o644 + } +} + +#[cfg(windows)] +fn deterministic_mode(meta: &fs::Metadata) -> u32 { + if meta.is_dir() { + 0o755 + } else { + 0o644 + } +} diff --git a/tests/all.rs b/tests/all.rs index a2aa63bb..987179ce 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -804,7 +804,7 @@ fn zero_file_times() { let td = TempBuilder::new().prefix("tar-rs").tempdir().unwrap(); let mut ar = Builder::new(Vec::new()); - ar.mode(HeaderMode::Deterministic { mtime: None }); + ar.mode(HeaderMode::Deterministic); let path = td.path().join("tmpfile"); File::create(&path).unwrap(); ar.append_path_with_name(&path, "a").unwrap(); diff --git a/tests/header/mod.rs b/tests/header/mod.rs index d9b8cd79..dbbed991 100644 --- a/tests/header/mod.rs +++ b/tests/header/mod.rs @@ -188,10 +188,7 @@ fn set_metadata_deterministic() { perms.set_readonly(readonly); fs::set_permissions(path, perms).unwrap(); let mut h = Header::new_ustar(); - h.set_metadata_in_mode( - &path.metadata().unwrap(), - HeaderMode::Deterministic { mtime: None }, - ); + h.set_metadata_in_mode(&path.metadata().unwrap(), HeaderMode::Deterministic); Ok(h) }