From f6e2dab1bdfc199143189fb84494c25e151dd6be Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 27 Nov 2025 12:45:14 +0200 Subject: [PATCH 1/2] gh-141713: Prevent possible memory denial of service when reading Make read() and similar methods that read a given number of bytes use a progressively growing buffer instead of allocating the maximum size buffer at once. This helps prevent certain kind of memory denial of service issues when the number of bytes to read within a specific protocol or format is received from untrusted source. --- Lib/_pyio.py | 26 +++++- Lib/test/support/__init__.py | 10 +++ Lib/test/test_io/test_bufferedio.py | 12 +++ Lib/test/test_io/test_fileio.py | 10 +++ Lib/test/test_io/test_general.py | 5 ++ Lib/test/test_os/test_os.py | 18 +--- Lib/test/test_os/test_posix.py | 14 ++++ ...-11-27-12-45-05.gh-issue-141713.54_uaA.rst | 8 ++ Modules/_io/_iomodule.h | 4 + Modules/_io/bufferedio.c | 57 +++++++++---- Modules/_io/clinic/iobase.c.h | 10 +-- Modules/_io/fileio.c | 42 +++++++--- Modules/_io/iobase.c | 81 +++++++++++++----- Modules/clinic/posixmodule.c.h | 12 +-- Modules/posixmodule.c | 84 +++++++++++++------ 15 files changed, 291 insertions(+), 102 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-11-27-12-45-05.gh-issue-141713.54_uaA.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 69a088df8fc987..331ead2818eca1 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -27,6 +27,10 @@ # when the device block size is available. DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want # to inherit the C implementations. @@ -611,15 +615,29 @@ def read(self, size=-1): """ if size is None: size = -1 + size = size.__index__() if size < 0: return self.readall() - b = bytearray(size.__index__()) + b = bytearray(min(size, _MIN_READ_BUF_SIZE)) n = self.readinto(b) if n is None: return None - if n < 0 or n > len(b): - raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") - del b[n:] + written = 0 + while True: + if n != len(b) - written: + if n < 0 or n > len(b) - written: + raise ValueError(f"readinto returned {n} outside buffer size {len(b) - written}") + written += n + break + written += n + if written >= size: + break + b.resize(min(2*len(b), size)) + with memoryview(b) as m: + n = self.readinto(m[written:]) + if n is None: + break + del b[written:] return b.take_bytes() def readall(self): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 0a50912ff0ea8c..71fe6ff825f7ac 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3184,3 +3184,13 @@ def linked_to_musl(): return _linked_to_musl _linked_to_musl = tuple(map(int, version.split('.'))) return _linked_to_musl + + +def itersize(start, stop): + # Produce geometrical increasing sequence from start to stop + # (inclusively) for tests. + size = start + while size < stop: + yield size + size <<= 1 + yield stop diff --git a/Lib/test/test_io/test_bufferedio.py b/Lib/test/test_io/test_bufferedio.py index 3278665bdc9dd3..52f18bda0c36bd 100644 --- a/Lib/test/test_io/test_bufferedio.py +++ b/Lib/test/test_io/test_bufferedio.py @@ -433,6 +433,18 @@ def test_read_all(self): self.assertEqual(b"abcdefg", bufio.read()) + def test_large_read_from_small_file(self): + for size in support.itersize(1 << 20, sys.maxsize): + rawio = self.MockRawIO((b'abc',)) + bufio = self.tp(rawio) + self.assertEqual(bufio.read(size), b'abc') + + def test_large_read1_from_small_file(self): + for size in support.itersize(1 << 20, sys.maxsize): + rawio = self.MockRawIO((b'abc',)) + bufio = self.tp(rawio) + self.assertEqual(bufio.read1(size), b'abc') + @threading_helper.requires_working_threading() @support.requires_resource('cpu') def test_threads(self): diff --git a/Lib/test/test_io/test_fileio.py b/Lib/test/test_io/test_fileio.py index e53c4749f58cf2..dc964b1a868e05 100644 --- a/Lib/test/test_io/test_fileio.py +++ b/Lib/test/test_io/test_fileio.py @@ -9,6 +9,7 @@ from weakref import proxy from functools import wraps +from test import support from test.support import ( cpython_only, swap_attr, gc_collect, is_wasi, infinite_recursion, strace_helper @@ -730,6 +731,15 @@ def __setattr__(self, name, value): self.assertRaises(MyException, MyFileIO, fd) os.close(fd) # should not raise OSError(EBADF) + def test_large_read_from_small_file(self): + self.addCleanup(os.remove, TESTFN) + data = b'abc' + with self.FileIO(TESTFN, 'wb') as f: + f.write(data) + for size in support.itersize(1 << 20, sys.maxsize): + with self.FileIO(TESTFN, 'rb') as f: + self.assertEqual(f.read(size), data) + class COtherFileTests(OtherFileTests, unittest.TestCase): FileIO = _io.FileIO diff --git a/Lib/test/test_io/test_general.py b/Lib/test/test_io/test_general.py index 085ed3ea6a95ee..f0e939663519d3 100644 --- a/Lib/test/test_io/test_general.py +++ b/Lib/test/test_io/test_general.py @@ -628,6 +628,11 @@ def readinto(self, buf): rawio = RawIOKeepsReference([b"1234"]) rawio.read(4) + def test_RawIOBase_large_read_from_small_file(self): + for size in support.itersize(1 << 20, sys.maxsize): + rawio = self.MockRawIOWithoutRead((b"abc",)) + self.assertEqual(rawio.read(size), b'abc') + def test_types_have_dict(self): test = ( self.IOBase(), diff --git a/Lib/test/test_os/test_os.py b/Lib/test/test_os/test_os.py index ddb8a63095bce5..d5d9005358fd2f 100644 --- a/Lib/test/test_os/test_os.py +++ b/Lib/test/test_os/test_os.py @@ -321,25 +321,15 @@ def test_readinto_badarg(self): self.assertEqual(s, 4) self.assertEqual(buffer, b"spam") - @support.cpython_only - # Skip the test on 32-bit platforms: the number of bytes must fit in a - # Py_ssize_t type - @unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX, - "needs INT_MAX < PY_SSIZE_T_MAX") - @support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False) - def test_large_read(self, size): + def test_large_read_from_small_file(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) create_file(os_helper.TESTFN, b'test') # Issue #21932: Make sure that os.read() does not raise an # OverflowError for size larger than INT_MAX - with open(os_helper.TESTFN, "rb") as fp: - data = os.read(fp.fileno(), size) - - # The test does not try to read more than 2 GiB at once because the - # operating system is free to return less bytes than requested. - self.assertEqual(data, b'test') - + for size in support.itersize(1 << 20, sys.maxsize): + with open(os_helper.TESTFN, "rb") as fp: + self.assertEqual(os.read(fp.fileno(), size), b'test') @support.cpython_only # Skip the test on 32-bit platforms: the number of bytes must fit in a diff --git a/Lib/test/test_os/test_posix.py b/Lib/test/test_os/test_posix.py index 37da293a441e46..d6a5eeb27e3bf8 100644 --- a/Lib/test/test_os/test_posix.py +++ b/Lib/test/test_os/test_posix.py @@ -304,6 +304,20 @@ def test_pread(self): finally: os.close(fd) + @unittest.skipUnless(hasattr(posix, 'pread'), "test needs posix.pread()") + def test_large_pread_from_small_file(self): + fd = os.open(os_helper.TESTFN, os.O_WRONLY | os.O_CREAT) + try: + os.write(fd, b'test') + finally: + os.close(fd) + fd = os.open(os_helper.TESTFN, os.O_RDONLY) + try: + for size in support.itersize(1 << 20, sys.maxsize): + self.assertEqual(posix.pread(fd, size, 1), b'est') + finally: + os.close(fd) + @unittest.skipUnless(hasattr(posix, 'preadv'), "test needs posix.preadv()") def test_preadv(self): fd = os.open(os_helper.TESTFN, os.O_RDWR | os.O_CREAT) diff --git a/Misc/NEWS.d/next/Security/2025-11-27-12-45-05.gh-issue-141713.54_uaA.rst b/Misc/NEWS.d/next/Security/2025-11-27-12-45-05.gh-issue-141713.54_uaA.rst new file mode 100644 index 00000000000000..359a84060947af --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-11-27-12-45-05.gh-issue-141713.54_uaA.rst @@ -0,0 +1,8 @@ +Fix a potential memory denial of service when reading from a file, +a file descriptor or a buffered stream the large specific number of bytes. +This affects the :func:`os.read` and :func:`os.pread` functions and +the :meth:`!read` and :meth:`!read1` methods of various :mod:`io` classes. +When the number of bytes to read is received from untrusted source, it could +cause an arbitrary amount of memory to be allocated. +This could have led to symptoms including a :exc:`MemoryError`, swapping, +out of memory (OOM) killed processes or containers, or even system crashes. diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 4ae487c8e2adf7..0eb6a3e59bbbcf 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -80,6 +80,10 @@ extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */ +// Data larger than this will be read in chunks, to prevent extreme +// overallocation. +#define MIN_READ_BUF_SIZE (1 << 20) + /* * Offset type for positioning. */ diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 4602f2b42a6017..259bee28945447 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -407,8 +407,8 @@ _enter_buffered_busy(buffered *self) #define MINUS_LAST_BLOCK(self, size) \ (self->buffer_mask ? \ - (size & ~self->buffer_mask) : \ - (self->buffer_size * (size / self->buffer_size))) + ((size) & ~self->buffer_mask) : \ + (self->buffer_size * ((size) / self->buffer_size))) static int @@ -1071,6 +1071,7 @@ _io__Buffered_read1_impl(buffered *self, Py_ssize_t n) } _bufferedreader_reset_buf(self); + n = Py_MIN(n, self->buffer_size); PyBytesWriter *writer = PyBytesWriter_Create(n); if (writer == NULL) { return NULL; @@ -1795,25 +1796,32 @@ _bufferedreader_read_fast(buffered *self, Py_ssize_t n) * or until an EOF occurs or until read() would block. */ static PyObject * -_bufferedreader_read_generic(buffered *self, Py_ssize_t n) +_bufferedreader_read_generic(buffered *self, Py_ssize_t size) { - Py_ssize_t current_size, remaining, written; + Py_ssize_t current_size, written; current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); - if (n <= current_size) - return _bufferedreader_read_fast(self, n); + if (size <= current_size) + return _bufferedreader_read_fast(self, size); - PyBytesWriter *writer = PyBytesWriter_Create(n); + Py_ssize_t chunksize = self->buffer_size; + if (chunksize < MIN_READ_BUF_SIZE) { + chunksize = MINUS_LAST_BLOCK(self, MIN_READ_BUF_SIZE); + } + Py_ssize_t allocated = size, resize_after = size; + if (size - current_size > chunksize) { + allocated = current_size + chunksize; + resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4); + } + PyBytesWriter *writer = PyBytesWriter_Create(allocated); if (writer == NULL) { goto error; } char *out = PyBytesWriter_GetData(writer); - remaining = n; written = 0; if (current_size > 0) { memcpy(out, self->buffer + self->pos, current_size); - remaining -= current_size; written += current_size; self->pos += current_size; } @@ -1825,12 +1833,27 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n) Py_DECREF(r); } _bufferedreader_reset_buf(self); - while (remaining > 0) { + while (written < size) { /* We want to read a whole block at the end into buffer. - If we had readv() we could do this in one pass. */ - Py_ssize_t r = MINUS_LAST_BLOCK(self, remaining); - if (r == 0) + If we had readv() we could do this in one pass for the last chunc. */ + if (written > resize_after) { + if (size - allocated > chunksize) { + allocated += chunksize; + resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4); + chunksize += Py_MIN(chunksize, size - allocated - chunksize); + } + else { + resize_after = allocated = size; + } + if (PyBytesWriter_Resize(writer, allocated) < 0) { + PyBytesWriter_Discard(writer); + goto error; + } + } + Py_ssize_t r = MINUS_LAST_BLOCK(self, allocated - written); + if (r == 0) { break; + } r = _bufferedreader_raw_read(self, out + written, r); if (r == -1) goto error; @@ -1842,13 +1865,19 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n) PyBytesWriter_Discard(writer); Py_RETURN_NONE; } - remaining -= r; written += r; } + Py_ssize_t remaining = size - written; assert(remaining <= self->buffer_size); self->pos = 0; self->raw_pos = 0; self->read_end = 0; + if (allocated < size) { + if (PyBytesWriter_Resize(writer, size) < 0) { + PyBytesWriter_Discard(writer); + goto error; + } + } /* NOTE: when the read is satisfied, we avoid issuing any additional reads, which could block indefinitely (e.g. on a socket). See issue #9550. */ diff --git a/Modules/_io/clinic/iobase.c.h b/Modules/_io/clinic/iobase.c.h index 402448545dfc51..b065743a5111b4 100644 --- a/Modules/_io/clinic/iobase.c.h +++ b/Modules/_io/clinic/iobase.c.h @@ -393,13 +393,13 @@ PyDoc_STRVAR(_io__RawIOBase_read__doc__, {"read", _PyCFunction_CAST(_io__RawIOBase_read), METH_FASTCALL, _io__RawIOBase_read__doc__}, static PyObject * -_io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n); +_io__RawIOBase_read_impl(PyObject *self, Py_ssize_t size); static PyObject * _io__RawIOBase_read(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - Py_ssize_t n = -1; + Py_ssize_t size = -1; if (!_PyArg_CheckPositional("read", nargs, 0, 1)) { goto exit; @@ -417,10 +417,10 @@ _io__RawIOBase_read(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (ival == -1 && PyErr_Occurred()) { goto exit; } - n = ival; + size = ival; } skip_optional: - return_value = _io__RawIOBase_read_impl(self, n); + return_value = _io__RawIOBase_read_impl(self, size); exit: return return_value; @@ -443,4 +443,4 @@ _io__RawIOBase_readall(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _io__RawIOBase_readall_impl(self); } -/*[clinic end generated code: output=9359e74d95534bef input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f5511c22b1ea321a input=a9049054013a1b77]*/ diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 5d7741fdd830a5..e8635ae0f05649 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -886,25 +886,41 @@ _io_FileIO_read_impl(fileio *self, PyTypeObject *cls, Py_ssize_t size) size = _PY_READ_MAX; } - PyBytesWriter *writer = PyBytesWriter_Create(size); + Py_ssize_t allocated = Py_MIN(size, MIN_READ_BUF_SIZE); + Py_ssize_t written = 0; + + PyBytesWriter *writer = PyBytesWriter_Create(allocated); if (writer == NULL) { return NULL; } - char *ptr = PyBytesWriter_GetData(writer); - - Py_ssize_t n = _Py_read(self->fd, ptr, size); - if (n == -1) { - // copy errno because PyBytesWriter_Discard() can indirectly modify it - int err = errno; - PyBytesWriter_Discard(writer); - if (err == EAGAIN) { - PyErr_Clear(); - Py_RETURN_NONE; + while (1) { + char *ptr = PyBytesWriter_GetData(writer); + Py_ssize_t n = _Py_read(self->fd, ptr + written, allocated - written); + if (n == -1) { + if (errno == EAGAIN) { + if (!written) { + // Nothing was read yet -- return None. + PyBytesWriter_Discard(writer); + PyErr_Clear(); + Py_RETURN_NONE; + } + break; + } + PyBytesWriter_Discard(writer); + return NULL; + } + written += n; + if (written < allocated || allocated >= size) { + break; + } + allocated += Py_MIN(allocated, size - allocated); + if (PyBytesWriter_Resize(writer, allocated) < 0) { + PyBytesWriter_Discard(writer); + return NULL; } - return NULL; } - return PyBytesWriter_FinishWithSize(writer, n); + return PyBytesWriter_FinishWithSize(writer, written); } /*[clinic input] diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index f036ea503b11e8..83415d525f0c40 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -913,42 +913,81 @@ PyDoc_STRVAR(rawiobase_doc, /*[clinic input] _io._RawIOBase.read - size as n: Py_ssize_t = -1 + size: Py_ssize_t = -1 / [clinic start generated code]*/ static PyObject * -_io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) -/*[clinic end generated code: output=6cdeb731e3c9f13c input=b6d0dcf6417d1374]*/ +_io__RawIOBase_read_impl(PyObject *self, Py_ssize_t size) +/*[clinic end generated code: output=25e2e6fa930c6cbe input=af5eafca0450e62a]*/ { - PyObject *b, *res; + PyObject *res; + Py_ssize_t written = 0; + Py_buffer buf; - if (n < 0) { + if (size < 0) { return PyObject_CallMethodNoArgs(self, &_Py_ID(readall)); } - b = PyByteArray_FromStringAndSize(NULL, n); + Py_ssize_t allocated = Py_MIN(size, MIN_READ_BUF_SIZE); + Py_ssize_t chunksize = allocated; + + PyObject *b = PyByteArray_FromStringAndSize(NULL, allocated); if (b == NULL) { return NULL; } - res = PyObject_CallMethodObjArgs(self, &_Py_ID(readinto), b, NULL); - if (res == NULL || res == Py_None) { - goto cleanup; - } + PyObject *buffer = b; // borrowed reference + while (1) { + res = PyObject_CallMethodObjArgs(self, &_Py_ID(readinto), buffer, NULL); + if (buffer != b) { // memoryview + Py_DECREF(buffer); + } + if (res == NULL) { + goto cleanup; + } + if (res == Py_None) { + if (!written) { + // Nothing was read yet -- return None. + goto cleanup; + } + Py_CLEAR(res); + break; + } - Py_ssize_t bytes_filled = PyNumber_AsSsize_t(res, PyExc_ValueError); - Py_CLEAR(res); - if (bytes_filled == -1 && PyErr_Occurred()) { - goto cleanup; - } - if (bytes_filled < 0 || bytes_filled > n) { - PyErr_Format(PyExc_ValueError, - "readinto returned %zd outside buffer size %zd", - bytes_filled, n); - goto cleanup; + Py_ssize_t n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_CLEAR(res); + if (n == -1 && PyErr_Occurred()) { + goto cleanup; + } + if (n < 0 || n > chunksize) { + PyErr_Format(PyExc_ValueError, + "readinto returned %zd outside buffer size %zd", + n, chunksize); + goto cleanup; + } + written += n; + if (written < allocated || allocated >= size) { + break; + } + chunksize = Py_MIN(allocated, size - allocated); + allocated += chunksize; + if (PyByteArray_Resize(b, allocated) < 0) { + goto cleanup; + } + + if (PyBuffer_FillInfo(&buf, NULL, + PyByteArray_AS_STRING(b) + written, + chunksize, 0, PyBUF_CONTIG) < 0) + { + goto cleanup; + } + buffer = PyMemoryView_FromBuffer(&buf); + if (buffer == NULL) { + return NULL; + } } - if (PyByteArray_Resize(b, bytes_filled) < 0) { + if (PyByteArray_Resize(b, written) < 0) { goto cleanup; } res = PyObject_CallMethodNoArgs(b, &_Py_ID(take_bytes)); diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index d880fc52bb370d..b4c26480c72c63 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -7726,7 +7726,7 @@ os_lseek(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(os_read__doc__, -"read($module, fd, length, /)\n" +"read($module, fd, size, /)\n" "--\n" "\n" "Read from a file descriptor. Returns a bytes object."); @@ -7735,14 +7735,14 @@ PyDoc_STRVAR(os_read__doc__, {"read", _PyCFunction_CAST(os_read), METH_FASTCALL, os_read__doc__}, static PyObject * -os_read_impl(PyObject *module, int fd, Py_ssize_t length); +os_read_impl(PyObject *module, int fd, Py_ssize_t size); static PyObject * os_read(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; int fd; - Py_ssize_t length; + Py_ssize_t size; if (!_PyArg_CheckPositional("read", nargs, 2, 2)) { goto exit; @@ -7761,9 +7761,9 @@ os_read(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (ival == -1 && PyErr_Occurred()) { goto exit; } - length = ival; + size = ival; } - return_value = os_read_impl(module, fd, length); + return_value = os_read_impl(module, fd, size); exit: return return_value; @@ -13610,4 +13610,4 @@ os__emscripten_log(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #ifndef OS__EMSCRIPTEN_LOG_METHODDEF #define OS__EMSCRIPTEN_LOG_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_LOG_METHODDEF) */ -/*[clinic end generated code: output=82f60940338c70e4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c5099bda73ce7aa4 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index fc609b2707c6c6..e49ca4ed3006fd 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -619,6 +619,9 @@ static const unsigned int _Py_STATX_KNOWN = (STATX_BASIC_STATS | STATX_BTIME # define HAVE_PTSNAME_R_RUNTIME 1 #endif +// Data larger than this will be read in chunks, to prevent extreme +// overallocation. +#define MIN_READ_BUF_SIZE (1 << 20) // --- os module ------------------------------------------------------------ @@ -11940,35 +11943,51 @@ os_lseek_impl(PyObject *module, int fd, Py_off_t position, int how) /*[clinic input] os.read fd: int - length: Py_ssize_t + size: Py_ssize_t / Read from a file descriptor. Returns a bytes object. [clinic start generated code]*/ static PyObject * -os_read_impl(PyObject *module, int fd, Py_ssize_t length) -/*[clinic end generated code: output=dafbe9a5cddb987b input=1df2eaa27c0bf1d3]*/ +os_read_impl(PyObject *module, int fd, Py_ssize_t size) +/*[clinic end generated code: output=418a4484921f48ac input=74aab5415dcf1c3b]*/ { - if (length < 0) { + if (size < 0) { errno = EINVAL; return posix_error(); } - length = Py_MIN(length, _PY_READ_MAX); + size = Py_MIN(size, _PY_READ_MAX); + Py_ssize_t allocated = Py_MIN(size, MIN_READ_BUF_SIZE); + Py_ssize_t written = 0; - PyBytesWriter *writer = PyBytesWriter_Create(length); + PyBytesWriter *writer = PyBytesWriter_Create(allocated); if (writer == NULL) { return NULL; } - Py_ssize_t n = _Py_read(fd, PyBytesWriter_GetData(writer), length); - if (n == -1) { - PyBytesWriter_Discard(writer); - return NULL; + while (1) { + Py_ssize_t n = _Py_read(fd, PyBytesWriter_GetData(writer), allocated); + if (n == -1) { + if (written && errno == EAGAIN) { + break; + } + PyBytesWriter_Discard(writer); + return NULL; + } + written += n; + if (written < allocated || allocated >= size) { + break; + } + allocated += Py_MIN(allocated, size - allocated); + if (PyBytesWriter_Resize(writer, allocated) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } } - return PyBytesWriter_FinishWithSize(writer, n); + return PyBytesWriter_FinishWithSize(writer, written); } /*[clinic input] @@ -12151,27 +12170,42 @@ os_pread_impl(PyObject *module, int fd, Py_ssize_t length, Py_off_t offset) errno = EINVAL; return posix_error(); } - PyBytesWriter *writer = PyBytesWriter_Create(length); + Py_ssize_t allocated = Py_MIN(length, MIN_READ_BUF_SIZE); + Py_ssize_t written = 0; + PyBytesWriter *writer = PyBytesWriter_Create(allocated); if (writer == NULL) { return NULL; } - do { - Py_BEGIN_ALLOW_THREADS - _Py_BEGIN_SUPPRESS_IPH - n = pread(fd, PyBytesWriter_GetData(writer), length, offset); - _Py_END_SUPPRESS_IPH - Py_END_ALLOW_THREADS - } while (n < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); + while (1) { + do { + Py_BEGIN_ALLOW_THREADS + _Py_BEGIN_SUPPRESS_IPH + n = pread(fd, PyBytesWriter_GetData(writer) + written, + allocated - written, offset); + _Py_END_SUPPRESS_IPH + Py_END_ALLOW_THREADS + } while (n < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); - if (n < 0) { - if (!async_err) { - posix_error(); + if (n < 0) { + if (!async_err) { + posix_error(); + } + PyBytesWriter_Discard(writer); + return NULL; } - PyBytesWriter_Discard(writer); - return NULL; + written += n; + if (written < allocated || allocated >= length) { + break; + } + allocated += Py_MIN(allocated, length - allocated); + if (PyBytesWriter_Resize(writer, allocated) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + offset += n; } - return PyBytesWriter_FinishWithSize(writer, n); + return PyBytesWriter_FinishWithSize(writer, written); } #endif /* HAVE_PREAD */ From fd2f0ef04118294709c486f346a4e35930672949 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 27 Nov 2025 23:24:19 +0200 Subject: [PATCH 2/2] Fix Buffered.read(). --- Lib/test/test_io/test_bufferedio.py | 7 +++++++ Modules/_io/bufferedio.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/Lib/test/test_io/test_bufferedio.py b/Lib/test/test_io/test_bufferedio.py index 52f18bda0c36bd..891039e392d2cf 100644 --- a/Lib/test/test_io/test_bufferedio.py +++ b/Lib/test/test_io/test_bufferedio.py @@ -439,6 +439,13 @@ def test_large_read_from_small_file(self): bufio = self.tp(rawio) self.assertEqual(bufio.read(size), b'abc') + def test_large_read_from_large_file(self): + data = b'abc' * ((5 << 20) + 54321) + for size in (len(data), sys.maxsize): + rawio = self.MockFileIO(data) + bufio = self.tp(rawio) + self.assertEqual(bufio.read(size), data) + def test_large_read1_from_small_file(self): for size in support.itersize(1 << 20, sys.maxsize): rawio = self.MockRawIO((b'abc',)) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 259bee28945447..3a30b5e5a91702 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1849,6 +1849,7 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t size) PyBytesWriter_Discard(writer); goto error; } + out = PyBytesWriter_GetData(writer); } Py_ssize_t r = MINUS_LAST_BLOCK(self, allocated - written); if (r == 0) { @@ -1877,6 +1878,7 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t size) PyBytesWriter_Discard(writer); goto error; } + out = PyBytesWriter_GetData(writer); } /* NOTE: when the read is satisfied, we avoid issuing any additional reads, which could block indefinitely (e.g. on a socket).