From 25166b1f36daf53b35eaf726913c82ab32d51e7a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 23 May 2024 11:29:46 +0300 Subject: [PATCH 1/6] gh-119451: Fix OOM vulnerability in http.client Reading the whole body of the HTTP response could cause OOM if the Content-Length value is too large even if the server does not send a large amount of data. Now the HTTP client reads large data by chunks, therefore the amount of consumed memory is proportional to the amount of sent data. --- Lib/http/client.py | 16 +++++++-- Lib/test/test_httplib.py | 33 +++++++++++++++++++ ...-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 3 ++ 3 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index a353716a8506e6..c6f9e5db86039a 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_SAFE_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -637,9 +642,14 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) + cursize = min(amt, _SAFE_BUF_SIZE) + data = self.fp.read(cursize) + while len(data) < amt: + if len(data) < cursize: + raise IncompleteRead(data, amt-len(data)) + delta = min(cursize, amt - cursize) + data += self.fp.read(cursize) + cursize += delta return data def _safe_readinto(self, b): diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 9d853d254db7c6..d405f38ab01166 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1436,6 +1436,39 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") + def test_large_content_length(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + while True: + [conn, address] = serv.accept() + with conn: + conn.recv(1024) + if not size: + break + body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size + conn.sendall(body) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(18, 65): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertRaises(client.IncompleteRead, response.read) + conn.close() + finally: + conn.close() + size = 0 + conn.request("GET", "/") + conn.close() + thread.join() + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst new file mode 100644 index 00000000000000..3fc6f6f59e86d7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -0,0 +1,3 @@ +Fix OOM vulnerability in :mod:`http.client`, when reading the whole body of +a specially prepared small HTTP response could cause consuming an arbitrary +amount of memory. From f097fada1be22d44ca59cf4bb0f74bcb5f331c66 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 23 May 2024 14:25:18 +0300 Subject: [PATCH 2/6] Add also test for non-truncated large body. --- Lib/test/test_httplib.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index d405f38ab01166..f26831ffaf5f5b 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1440,6 +1440,36 @@ def test_large_content_length(self): serv = socket.create_server((HOST, 0)) self.addCleanup(serv.close) + def run_server(): + [conn, address] = serv.accept() + with conn: + while conn.recv(1024): + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" % size) + conn.sendall(b'A' * (size//3)) + conn.sendall(b'B' * (size - size//3)) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(15, 27): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertEqual(len(response.read()), size) + finally: + conn.close() + thread.join(1.0) + + def test_large_content_length_truncated(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + def run_server(): while True: [conn, address] = serv.accept() @@ -1447,8 +1477,11 @@ def run_server(): conn.recv(1024) if not size: break - body = b"HTTP/1.1 200 Ok\r\nContent-Length: %d\r\n\r\nText" % size - conn.sendall(body) + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" + b"Text" % size) thread = threading.Thread(target=run_server) thread.start() @@ -1467,7 +1500,7 @@ def run_server(): size = 0 conn.request("GET", "/") conn.close() - thread.join() + thread.join(1.0) def test_putrequest_override_domain_validation(self): """ From 83cc548d615bf68ad365703e014f2827f0b28627 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 18 Nov 2025 15:44:26 +0200 Subject: [PATCH 3/6] Update 2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst --- .../2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst index 3fc6f6f59e86d7..dc26036b4901db 100644 --- a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -1,3 +1,6 @@ -Fix OOM vulnerability in :mod:`http.client`, when reading the whole body of -a specially prepared small HTTP response could cause consuming an arbitrary -amount of memory. +Fix a potential denial of service in the :mod:`http.client` module. +When connecting to a malicious server, it could cause +an arbitrary amount of memory to be allocated. +In best case this could lead to a :exc:`MemoryError` or other process crash. +In worst case it could lead to swapping which would dramatically slow down the +whole system and make it less responcible. From 4559a5ee0052566871fa15bf6d2a0db4d17bbd74 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Dec 2025 14:48:11 +0200 Subject: [PATCH 4/6] Address review comments. --- Lib/http/client.py | 6 ++++-- .../2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst | 7 +++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/http/client.py b/Lib/http/client.py index 4027454df769c8..00f567a9da34bf 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -113,7 +113,7 @@ # Data larger than this will be read in chunks, to prevent extreme # overallocation. -_SAFE_BUF_SIZE = 1 << 20 +_MIN_READ_BUF_SIZE = 1 << 20 # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) @@ -647,11 +647,13 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - cursize = min(amt, _SAFE_BUF_SIZE) + cursize = min(amt, _MIN_READ_BUF_SIZE) data = self.fp.read(cursize) while len(data) < amt: if len(data) < cursize: raise IncompleteRead(data, amt-len(data)) + # This is a geometric increase in read size (never more than + # doubling out the current length of data per loop iteration). delta = min(cursize, amt - cursize) data += self.fp.read(cursize) cursize += delta diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst index dc26036b4901db..6d6f25cd2f8bf7 100644 --- a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst +++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst @@ -1,6 +1,5 @@ -Fix a potential denial of service in the :mod:`http.client` module. +Fix a potential memory denial of service in the :mod:`http.client` module. When connecting to a malicious server, it could cause an arbitrary amount of memory to be allocated. -In best case this could lead to a :exc:`MemoryError` or other process crash. -In worst case it could lead to swapping which would dramatically slow down the -whole system and make it less responcible. +This could have led to symptoms including a :exc:`MemoryError`, swapping, out +of memory (OOM) killed processes or containers, or even system crashes. From 1ef9b5af980333200498f39328b7d6ff0d8d2ade Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Dec 2025 15:30:07 +0200 Subject: [PATCH 5/6] Fix error. --- Lib/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/http/client.py b/Lib/http/client.py index 00f567a9da34bf..626aa35a85ac1c 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -655,7 +655,7 @@ def _safe_read(self, amt): # This is a geometric increase in read size (never more than # doubling out the current length of data per loop iteration). delta = min(cursize, amt - cursize) - data += self.fp.read(cursize) + data += self.fp.read(delta) cursize += delta return data From 5aaf8a57a8fda897384f7e82d30fcbc3c7b20765 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Dec 2025 15:58:49 +0200 Subject: [PATCH 6/6] Optimization. --- Lib/http/client.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Lib/http/client.py b/Lib/http/client.py index 626aa35a85ac1c..73c3256734a64f 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -649,15 +649,23 @@ def _safe_read(self, amt): """ cursize = min(amt, _MIN_READ_BUF_SIZE) data = self.fp.read(cursize) - while len(data) < amt: - if len(data) < cursize: - raise IncompleteRead(data, amt-len(data)) + if len(data) >= amt: + return data + if len(data) < cursize: + raise IncompleteRead(data, amt - len(data)) + + data = io.BytesIO(data) + data.seek(0, 2) + while True: # This is a geometric increase in read size (never more than # doubling out the current length of data per loop iteration). delta = min(cursize, amt - cursize) - data += self.fp.read(delta) + data.write(self.fp.read(delta)) + if data.tell() >= amt: + return data.getvalue() cursize += delta - return data + if data.tell() < cursize: + raise IncompleteRead(data.getvalue(), amt - data.tell()) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer."""