Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion fuzz/fuzz_targets/differential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
//! character/block devices, symbolic links, hard links) but whose `size`
//! field is non-zero. tar-rs silently accepts such archives and treats the
//! non-zero size as content bytes, which can lead to stream desynchronisation.
//!
//! - **GNU LongName/LongLink NUL truncation**: tar-core truncates the resolved
//! path/link-target at the first NUL byte, matching GNU tar's C-string
//! convention. tar-rs returns the full content without truncation. This is
//! normalized in `parse_tar_rs` (in testutil) before comparison, not treated
//! as a hard error.

#![no_main]

Expand Down Expand Up @@ -58,7 +64,8 @@ fn dump_headers(data: &[u8]) {
}

/// Returns true if the error is a known behavioral difference where
/// tar-core is intentionally stricter than tar-rs.
/// tar-core is intentionally stricter than tar-rs in ways that produce
/// hard errors (not just output normalization).
///
/// When this returns true, tar-rs may have parsed more entries than
/// tar-core, and that's expected.
Expand All @@ -84,6 +91,11 @@ fn is_allowlisted_divergence(err: &ParseError) -> bool {
/// all-null numeric fields are accepted as 0), so we only require that
/// tar-core parses *at least* as many entries as tar-rs and that those
/// entries match.
///
/// Note: paths and link_targets from tar-rs are pre-normalized by
/// `truncate_at_nul` in `parse_tar_rs` to account for tar-core correctly
/// truncating GNU LongName/LongLink content at the first NUL byte (matching
/// GNU tar's C-string convention) while tar-rs does not.
fn compare_entries(
data: &[u8],
tar_rs_entries: &[OwnedEntry],
Expand Down
81 changes: 77 additions & 4 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1006,14 +1006,20 @@ impl Parser {
let content_end = content_start + size as usize;
let mut data: &'a [u8] = &input[content_start..content_end];

// Strip trailing null for GNU long name/link
// Truncate GNU long name/link content at the first NUL byte.
//
// The GNU tar format specifies that LongName/LongLink content is a
// NUL-terminated C string. Stopping at the first NUL (not just
// stripping a trailing one) is correct: content that is padded with
// zeros to fill a block boundary, or that has an embedded NUL due to
// archive corruption or overlay, should not contribute bytes to the
// path beyond the terminator. This matches what GNU tar, Python
// tarfile, and Go archive/tar all do.
if matches!(
kind,
ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink
) {
if let Some(trimmed) = data.strip_suffix(&[0]) {
data = trimmed;
}
data = crate::truncate_null(data);
self.limits.check_path_len(data.len())?;
}

Expand Down Expand Up @@ -1959,6 +1965,73 @@ mod tests {
// GNU long name tests
// =========================================================================

/// Build a GNU `L` header whose content is `raw_content` verbatim
/// (no trailing NUL added). Used for testing embedded-NUL truncation.
fn make_gnu_long_name_raw(raw_content: &[u8]) -> Vec<u8> {
let padded = raw_content.len().next_multiple_of(HEADER_SIZE);
let header = make_header(b"././@LongLink", raw_content.len() as u64, b'L');
let mut result = Vec::with_capacity(HEADER_SIZE + padded);
result.extend_from_slice(&header);
result.extend_from_slice(raw_content);
result.extend(zeroes(padded - raw_content.len()));
result
}

/// GNU LongName content is NUL-terminated: an embedded NUL must truncate
/// the path just like a trailing NUL does. This matches GNU tar, Python
/// tarfile, and Go archive/tar.
#[test]
fn test_parser_gnu_long_name_embedded_nul_truncates() {
// Content: "safe\x00evil" — the first NUL terminates at "safe".
let raw: &[u8] = b"safe\x00evil";

let mut archive = Vec::new();
archive.extend(make_gnu_long_name_raw(raw));
archive.extend_from_slice(&make_header(b"placeholder", 0, b'0'));
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
match parser.parse(&archive).unwrap() {
ParseEvent::Entry { entry, .. } => {
assert_eq!(
entry.path.as_ref(),
b"safe",
"embedded NUL should truncate LongName path"
);
}
other => panic!("Expected Entry, got {:?}", other),
}
}

/// A GNU LongName padded with NUL bytes to 100 bytes, followed by mode-like
/// ASCII bytes within the declared size — the pattern produced by the overlay
/// mutation strategy. Only the bytes before the first NUL should be used.
#[test]
fn test_parser_gnu_long_name_nul_padded_header_bytes() {
// "safe" + 96 NUL bytes (fills a ustar name field) + "0000644\x00"
let mut raw: Vec<u8> = b"safe".to_vec();
raw.resize(100, 0u8);
raw.extend_from_slice(b"0000644\x00");
assert_eq!(raw.len(), 108);

let mut archive = Vec::new();
archive.extend(make_gnu_long_name_raw(&raw));
archive.extend_from_slice(&make_header(b"placeholder", 0, b'0'));
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
match parser.parse(&archive).unwrap() {
ParseEvent::Entry { entry, .. } => {
assert_eq!(
entry.path.as_ref(),
b"safe",
"NUL padding after short name must truncate, not include mode-field bytes"
);
}
other => panic!("Expected Entry, got {:?}", other),
}
}

#[test]
fn test_parser_gnu_long_name() {
// Create archive with GNU long name entry followed by actual file
Expand Down
24 changes: 22 additions & 2 deletions testutil/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,20 @@ pub fn parse_tar_core_detailed(data: &[u8], limits: Limits) -> TarCoreParseResul
}
}

/// Truncate a byte slice at the first NUL byte, if any.
///
/// GNU LongName/LongLink content is NUL-terminated (C-string convention).
/// tar-core truncates at the first NUL when resolving these extension headers,
/// matching GNU tar and POSIX filesystem semantics (NUL is not a valid filename
/// character). tar-rs does not perform this truncation, so we normalize its
/// output here before comparison.
fn truncate_at_nul(bytes: Vec<u8>) -> Vec<u8> {
match bytes.iter().position(|&b| b == 0) {
Some(pos) => bytes[..pos].to_vec(),
None => bytes,
}
}

/// Parse a tar archive with the `tar` crate, returning owned entries.
pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
let mut results = Vec::new();
Expand All @@ -190,7 +204,11 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
let header = entry.header().clone();
let entry_type = header.entry_type().as_byte();

let path = entry.path_bytes().into_owned();
// Normalize NUL-termination: tar-rs does not truncate GNU LongName/
// LongLink content at the first NUL byte; tar-core does (matching the
// C-string convention used by GNU tar). Truncate here so we compare
// equivalent representations.
let path = truncate_at_nul(entry.path_bytes().into_owned());
let size = entry.size();

// Require that numeric fields parse successfully. tar-core
Expand Down Expand Up @@ -229,10 +247,12 @@ pub fn parse_tar_rs(data: &[u8]) -> Vec<OwnedEntry> {
}
// entry.link_name_bytes() applies PAX linkpath and GNU long link
// overrides, unlike header.link_name_bytes() which is raw.
// Also truncate at the first NUL to match tar-core's behavior for
// GNU LongLink content (same NUL-termination normalization as path).
let link_target = entry
.link_name_bytes()
.filter(|b| !b.is_empty())
.map(|b| b.to_vec());
.map(|b| truncate_at_nul(b.to_vec()));

// Extract PAX-overridden uname/gname and xattrs from PAX extensions.
// tar-rs does not expose PAX uname/gname through entry-level methods,
Expand Down