Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion Lib/multiprocessing/resource_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# this resource tracker process, "killall python" would probably leave unlinked
# resources.

import base64
import os
import signal
import sys
Expand Down Expand Up @@ -248,6 +249,12 @@ def _write(self, msg):
assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}"

def _send(self, cmd, name, rtype):
# Encode shared_memory names as they are created by the user and may contain
# colons or newlines.
if rtype == "shared_memory":
b = name.encode('utf-8', 'surrogateescape')
name = base64.urlsafe_b64encode(b).decode('ascii')

msg = f"{cmd}:{name}:{rtype}\n".encode("ascii")
if len(msg) > 512:
# posix guarantees that writes to a pipe of less than PIPE_BUF
Expand Down Expand Up @@ -285,7 +292,13 @@ def main(fd):
with open(fd, 'rb') as f:
for line in f:
try:
cmd, name, rtype = line.strip().decode('ascii').split(':')
cmd, enc_name, rtype = line.rstrip(b'\n').decode('ascii').split(':', 2)
if rtype == "shared_memory":
name = base64.urlsafe_b64decode(enc_name.encode('ascii')).decode('utf-8', 'surrogateescape')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decoding is OK with strings.

Suggested change
name = base64.urlsafe_b64decode(enc_name.encode('ascii')).decode('utf-8', 'surrogateescape')
name = base64.urlsafe_b64decode(enc_name).decode('utf-8', 'surrogateescape')

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your comment. I like your suggestion to consider using JSON to encode the message.

Also, in the original implementation there was a check that the message is not longer than 512 bytes, since writes shorter than PIPE_BUF (512 bytes on POSIX) are guaranteed to be atomic. But there was no check on the name length.

Now I check that the name is at most 255 bytes long, which is the value of NAME_MAX on Linux (including the leading slash that POSIX requires in shared memory and semaphore names).

I still encode the name using Base64, because json.dumps(..., ensure_ascii=True) would otherwise expand each non-ASCII byte into a 6-character escape like \uDC80. Using Base64 ensures that a 255-byte name becomes at most 340 bytes long, so the total JSON message always remains well below 512 bytes.

As a result, the previous runtime check for the message length is now replaced by an assert.

What do you think?

else:
# Semaphore names are generated internally, so no encoding is needed.
name = enc_name

cleanup_func = _CLEANUP_FUNCS.get(rtype, None)
if cleanup_func is None:
raise ValueError(
Expand Down
43 changes: 43 additions & 0 deletions Lib/test/_test_multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7335,3 +7335,46 @@ def test_forkpty(self):
res = assert_python_failure("-c", code, PYTHONWARNINGS='error')
self.assertIn(b'DeprecationWarning', res.err)
self.assertIn(b'is multi-threaded, use of forkpty() may lead to deadlocks in the child', res.err)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

This should have been a double line gap. I'll fix it in the backports.

@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
class TestSharedMemoryNames(unittest.TestCase):
def test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors(self):
# Test script that creates and cleans up shared memory with colon in name
test_script = textwrap.dedent("""
import sys
from multiprocessing import shared_memory
import time

# Test various patterns of colons in names
test_names = [
"a:b",
"a:b:c",
"test:name:with:many:colons",
":starts:with:colon",
"ends:with:colon:",
"::double::colons::",
"name\\nwithnewline",
"name-with-trailing-newline\\n",
"\\nname-starts-with-newline",
"colons:and\\nnewlines:mix",
"multi\\nline\\nname",
]

for name in test_names:
try:
shm = shared_memory.SharedMemory(create=True, size=100, name=name)
shm.buf[:5] = b'hello' # Write something to the shared memory
shm.close()
shm.unlink()

except Exception as e:
print(f"Error with name '{name}': {e}", file=sys.stderr)
sys.exit(1)

print("SUCCESS")
""")

rc, out, err = assert_python_ok("-c", test_script)
self.assertIn(b"SUCCESS", out)
self.assertNotIn(b"traceback", err.lower(), err)
self.assertNotIn(b"resource_tracker.py", err, err)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix a failure in multiprocessing resource_tracker when SharedMemory names contain colons.
Patch by Rani Pinchuk.
Loading