Skip to content

Commit 756e1ee

Browse files
Make pymupdf embed-extract safe by default.
Fixes #4767.
1 parent 4c76a5c commit 756e1ee

File tree

4 files changed

+100
-1
lines changed

4 files changed

+100
-1
lines changed

changes.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ Change Log
77
Other:
88

99
* Retrospectively mark `4756 <https://github.com/pymupdf/PyMuPDF/issues/4756>`_ as fixed in 1.26.6.
10+
* Improved safety of `pymupdf embed-extract`. This now refuses to write to
11+
an existing file or outside current directory, unless `-output` or new flag
12+
`-unsafe` is specified.
1013

1114

1215
**Changes in version 1.26.6** (2025-11-05)

docs/module.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ Extraction
299299
Extract an embedded file like this::
300300

301301
pymupdf embed-extract -h
302-
usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-output OUTPUT]
302+
usage: pymupdf embed-extract [-h] -name NAME [-password PASSWORD] [-unsafe] [-output OUTPUT]
303303
input
304304

305305
---------------------- extract embedded file to disk ----------------------
@@ -311,6 +311,7 @@ Extract an embedded file like this::
311311
-h, --help show this help message and exit
312312
-name NAME name of entry
313313
-password PASSWORD password
314+
-unsafe allow write to stored name even if an existing file or outside current directory
314315
-output OUTPUT output filename, default is stored name
315316

316317
For details consult :meth:`Document.embfile_get`. Example (refer to previous section)::

src/__main__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ def embedded_get(args):
350350
except (ValueError, pymupdf.mupdf.FzErrorBase) as e:
351351
sys.exit(f'no such embedded file {args.name!r}: {e}')
352352
filename = args.output if args.output else d["filename"]
353+
if not args.unsafe and not args.output:
354+
if os.path.exists(filename):
355+
sys.exit(f'refusing to overwrite existing file with stored name: {filename}')
356+
filename_abs = os.path.abspath(filename)
357+
if not filename_abs.startswith(os.getcwd() + os.sep):
358+
sys.exit(f'refusing to write stored name outside current directory: {filename}')
353359
with open(filename, "wb") as output:
354360
output.write(stream)
355361
pymupdf.message("saved entry '%s' as '%s'" % (args.name, filename))
@@ -1024,6 +1030,9 @@ def main():
10241030
ps_embed_extract.add_argument("input", type=str, help="PDF filename")
10251031
ps_embed_extract.add_argument("-name", required=True, help="name of entry")
10261032
ps_embed_extract.add_argument("-password", help="password")
1033+
ps_embed_extract.add_argument("-unsafe", default=False, action="store_true",
1034+
help="allow write to stored name even if an existing file or outside current directory"
1035+
)
10271036
ps_embed_extract.add_argument(
10281037
"-output", help="output filename, default is stored name"
10291038
)

tests/test_4767.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import shutil
2+
import os
3+
import pymupdf
4+
import subprocess
5+
import sys
6+
7+
8+
def test_4767():
9+
'''
10+
Check handling of unsafe paths in `pymupdf embed-extract`.
11+
'''
12+
with pymupdf.open() as document:
13+
document.new_page()
14+
document.embfile_add(
15+
'evil_entry',
16+
b'poc:traversal test\n',
17+
filename="../../test.txt",
18+
ufilename="../../test.txt",
19+
desc="poc",
20+
)
21+
document.embfile_add(
22+
'evil_entry2',
23+
b'poc:traversal test\n',
24+
filename="test2.txt",
25+
ufilename="test2.txt",
26+
desc="poc",
27+
)
28+
path = os.path.abspath(f'{__file__}/../../tests/test_4767.pdf')
29+
document.save(path)
30+
testdir = os.path.abspath(f'{__file__}/../../tests/test_4767_dir').replace('\\', '/')
31+
shutil.rmtree(testdir, ignore_errors=1)
32+
os.makedirs(f'{testdir}/one/two', exist_ok=1)
33+
34+
def run(command, *, check=0, capture=1):
35+
print(f'Running: {command}')
36+
cp = subprocess.run(
37+
command, shell=1,
38+
text=1,
39+
check=check,
40+
stdout=subprocess.PIPE if capture else None,
41+
stderr=subprocess.STDOUT if capture else None,
42+
)
43+
print(cp.stdout)
44+
return cp
45+
46+
def get_paths():
47+
paths = list()
48+
for dirpath, dirnames, filenames in os.walk(testdir):
49+
for filename in filenames:
50+
path = f'{dirpath}/{filename}'.replace('\\', '/')
51+
paths.append(path)
52+
return paths
53+
54+
cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry')
55+
print(cp.stdout)
56+
assert cp.returncode
57+
assert cp.stdout == 'refusing to write stored name outside current directory: ../../test.txt\n'
58+
assert not get_paths()
59+
60+
cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry -unsafe')
61+
assert cp.returncode == 0
62+
assert cp.stdout == "saved entry 'evil_entry' as '../../test.txt'\n"
63+
paths = get_paths()
64+
print(f'{paths=}')
65+
assert paths == [f'{testdir}/test.txt']
66+
67+
cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
68+
assert not cp.returncode
69+
assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
70+
paths = get_paths()
71+
print(f'{paths=}')
72+
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
73+
74+
cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2')
75+
assert cp.returncode
76+
assert cp.stdout == "refusing to overwrite existing file with stored name: test2.txt\n"
77+
paths = get_paths()
78+
print(f'{paths=}')
79+
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']
80+
81+
cp = run(f'cd {testdir}/one/two && {sys.executable} -m pymupdf embed-extract {path} -name evil_entry2 -unsafe')
82+
assert not cp.returncode
83+
assert cp.stdout == "saved entry 'evil_entry2' as 'test2.txt'\n"
84+
paths = get_paths()
85+
print(f'{paths=}')
86+
assert paths == [f'{testdir}/test.txt', f'{testdir}/one/two/test2.txt']

0 commit comments

Comments
 (0)