Skip to content

Commit 575c024

Browse files
committed
tarfile: validate the link target that is actually written
The data filter rewrote linknames with normpath() but ran the containment check against the un-normalised value, and computed a symlink's directory before stripping trailing slashes. Both let a crafted archive create links pointing outside the destination. Also reject link members that resolve to the destination directory itself, which could otherwise replace it with a symlink and redirect all subsequent members.
1 parent 72f29dc commit 575c024

3 files changed

Lines changed: 99 additions & 9 deletions

File tree

Lib/tarfile.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -830,16 +830,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
830830
if member.islnk() or member.issym():
831831
if os.path.isabs(member.linkname):
832832
raise AbsoluteLinkError(member)
833+
# A link member that resolves to the destination directory itself
834+
# would replace it with a (sym)link, redirecting the destination
835+
# for all subsequent members.
836+
if target_path == dest_path:
837+
raise OutsideDestinationError(member, target_path)
833838
normalized = os.path.normpath(member.linkname)
834839
if normalized != member.linkname:
835840
new_attrs['linkname'] = normalized
836841
if member.issym():
837-
target_path = os.path.join(dest_path,
838-
os.path.dirname(name),
839-
member.linkname)
842+
# The symlink is created at `name` with trailing separators
843+
# stripped, so its target is relative to the directory
844+
# containing that path.
845+
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
846+
target_path = os.path.join(dest_path, link_dir, normalized)
840847
else:
841-
target_path = os.path.join(dest_path,
842-
member.linkname)
848+
target_path = os.path.join(dest_path, normalized)
843849
target_path = os.path.realpath(target_path,
844850
strict=os.path.ALLOW_MISSING)
845851
if os.path.commonpath([target_path, dest_path]) != dest_path:

Lib/test/test_tarfile.py

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,10 +3911,19 @@ def test_parent_symlink(self):
39113911
+ "which is outside the destination")
39123912

39133913
with self.check_context(arc.open(), 'data'):
3914-
self.expect_exception(
3915-
tarfile.LinkOutsideDestinationError,
3916-
"""'parent' would link to ['"].*outerdir['"], """
3917-
+ "which is outside the destination")
3914+
if self.dotdot_resolves_early:
3915+
# 'current/../..' normalises to '..', which is rejected.
3916+
self.expect_exception(
3917+
tarfile.LinkOutsideDestinationError,
3918+
"""'parent' would link to ['"].*outerdir['"], """
3919+
+ "which is outside the destination")
3920+
else:
3921+
# 'current/..' normalises to '.'; the rewritten link is
3922+
# created and 'parent/evil' lands harmlessly inside the
3923+
# destination.
3924+
self.expect_file('current', symlink_to='.')
3925+
self.expect_file('parent', symlink_to='.')
3926+
self.expect_file('evil')
39183927

39193928
else:
39203929
# No symlink support. The symlinks are ignored.
@@ -4174,6 +4183,76 @@ def test_sly_relative2(self):
41744183
+ """['"].*moo['"], which is outside the """
41754184
+ "destination")
41764185

4186+
@symlink_test
4187+
@os_helper.skip_unless_symlink
4188+
def test_normpath_realpath_mismatch(self):
4189+
# The link-target check must validate the value that will actually
4190+
# be written to disk (the normalised linkname), not the original.
4191+
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
4192+
# of 'a/../../...' stays inside the destination while normpath()
4193+
# collapses 'a/..' lexically and escapes.
4194+
depth = len(self.destdir.parts) + 5
4195+
deep = '/'.join(f'p{i}' for i in range(depth))
4196+
sneaky = 'a/' + '../' * depth + 'flag'
4197+
for kind in 'symlink_to', 'hardlink_to':
4198+
with self.subTest(kind):
4199+
with ArchiveMaker() as arc:
4200+
arc.add('a', symlink_to=deep)
4201+
arc.add('escape', **{kind: sneaky})
4202+
with self.check_context(arc.open(), 'data'):
4203+
self.expect_exception(
4204+
tarfile.LinkOutsideDestinationError)
4205+
4206+
@symlink_test
4207+
@os_helper.skip_unless_symlink
4208+
def test_symlink_trailing_slash(self):
4209+
# A trailing slash on a symlink member's name must not cause the
4210+
# link target to be resolved relative to the wrong directory.
4211+
with ArchiveMaker() as arc:
4212+
t = tarfile.TarInfo('x/')
4213+
t.type = tarfile.SYMTYPE
4214+
t.linkname = '..'
4215+
arc.tar_w.addfile(t)
4216+
arc.add('x/escaped', content='hi')
4217+
4218+
with self.check_context(arc.open(), 'data'):
4219+
self.expect_exception(tarfile.LinkOutsideDestinationError)
4220+
4221+
@symlink_test
4222+
@os_helper.skip_unless_symlink
4223+
def test_link_at_destination(self):
4224+
# A link member whose name resolves to the destination directory
4225+
# itself must be rejected: otherwise the destination is replaced
4226+
# by a symlink and later members can be redirected through it.
4227+
for name in '', '.', './':
4228+
with ArchiveMaker() as arc:
4229+
t = tarfile.TarInfo(name)
4230+
t.type = tarfile.SYMTYPE
4231+
t.linkname = '.'
4232+
arc.tar_w.addfile(t)
4233+
4234+
with self.check_context(arc.open(), 'data'):
4235+
self.expect_exception(tarfile.OutsideDestinationError)
4236+
4237+
@symlink_test
4238+
@os_helper.skip_unless_symlink
4239+
def test_empty_name_symlink_chain(self):
4240+
# Regression test for a chain of empty-named symlinks that
4241+
# incrementally redirects the destination outwards.
4242+
with ArchiveMaker() as arc:
4243+
for name, target in [('', ''), ('a/', '..'),
4244+
('', 'dummy'), ('', 'a'),
4245+
('b/', '..'),
4246+
('', 'dummy'), ('', 'a/b')]:
4247+
t = tarfile.TarInfo(name)
4248+
t.type = tarfile.SYMTYPE
4249+
t.linkname = target
4250+
arc.tar_w.addfile(t)
4251+
arc.add('escaped', content='hi')
4252+
4253+
with self.check_context(arc.open(), 'data'):
4254+
self.expect_exception(tarfile.FilterError)
4255+
41774256
@symlink_test
41784257
def test_deep_symlink(self):
41794258
# Test that symlinks and hardlinks inside a directory
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:func:`tarfile.data_filter` now validates link targets using the same
2+
normalised value that is written to disk, strips trailing separators from
3+
the member name when resolving a symlink's directory, and rejects link
4+
members that would replace the destination directory itself. This closes
5+
several path-traversal bypasses of the ``data`` extraction filter.

0 commit comments

Comments
 (0)