From b4baad1d59aea27d6aceef623fa30dcba0f530b3 Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Sat, 7 Feb 2026 21:50:32 -0800 Subject: [PATCH 1/2] Fix GzipDecompressor to handle concatenated gzip members The GzipDecompressor class now properly handles concatenated gzip members by checking for unused_data after each decompression and creating a new decompressor to continue processing subsequent members. This prevents silent data loss when decompressing multi-member gzip streams. Changes: - Added loop in decompress() to process all concatenated gzip members - Added _flushed flag to prevent decompress() calls after flush() - Added comprehensive test cases for single, double, and triple member streams - Added test to verify RuntimeError after flush() Fixes #3560 --- tornado/test/util_test.py | 56 +++++++++++++++++++++++++++++++++++++++ tornado/util.py | 36 +++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/tornado/test/util_test.py b/tornado/test/util_test.py index 83b78e5cf..081b4a1e7 100644 --- a/tornado/test/util_test.py +++ b/tornado/test/util_test.py @@ -1,4 +1,5 @@ import datetime +import gzip import re import sys import textwrap @@ -15,6 +16,7 @@ raise_exc_info, re_unescape, timedelta_to_seconds, + GzipDecompressor, ) @@ -366,3 +368,57 @@ def test_version_info_compatible(self): def test_current_version(self): self.assert_version_info_compatible(tornado.version, tornado.version_info) + + +class GzipDecompressorTest(unittest.TestCase): + def test_concatenated_gzip_members(self): + """Test that concatenated gzip members are fully decompressed.""" + data1 = b"First gzip member content." + data2 = b"Second gzip member content." + + member1 = gzip.compress(data1) + member2 = gzip.compress(data2) + + concatenated = member1 + member2 + decompressor = GzipDecompressor() + result = decompressor.decompress(concatenated) + + expected = data1 + data2 + self.assertEqual( + result, expected, "Concatenated gzip members should be fully decompressed" + ) + + def test_single_gzip_member(self): + """Test that single gzip member is decompressed correctly.""" + data = b"This is some example data that will be compressed using gzip." + compressed = gzip.compress(data) + + decompressor = GzipDecompressor() + result = decompressor.decompress(compressed) + + self.assertEqual(result, data) + + def test_multiple_concatenated_members(self): + """Test that three or more concatenated gzip members are fully decompressed.""" + data1 = b"First member." + data2 = b"Second member." + data3 = b"Third member." + + concatenated = gzip.compress(data1) + gzip.compress(data2) + gzip.compress(data3) + decompressor = GzipDecompressor() + result = decompressor.decompress(concatenated) + + expected = data1 + data2 + data3 + self.assertEqual(result, expected) + + def test_decompress_after_flush_raises(self): + """Test that decompress() raises RuntimeError after flush().""" + data = b"Test data" + compressed = gzip.compress(data) + + decompressor = GzipDecompressor() + decompressor.decompress(compressed) + decompressor.flush() + + with self.assertRaises(RuntimeError): + decompressor.decompress(gzip.compress(b"More data")) diff --git a/tornado/util.py b/tornado/util.py index 810732a67..483f335c7 100644 --- a/tornado/util.py +++ b/tornado/util.py @@ -70,6 +70,7 @@ def __init__(self) -> None: # http://stackoverflow.com/questions/1838699/how-can-i-decompress-a-gzip-stream-with-zlib # This works on cpython and pypy, but not jython. self.decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS) + self._flushed = False def decompress(self, value: bytes, max_length: int = 0) -> bytes: """Decompress a chunk, returning newly-available data. @@ -82,7 +83,36 @@ def decompress(self, value: bytes, max_length: int = 0) -> bytes: in ``unconsumed_tail``; you must retrieve this value and pass it back to a future call to `decompress` if it is not empty. """ - return self.decompressobj.decompress(value, max_length) + if self._flushed: + raise RuntimeError("Cannot call decompress() after flush()") + + data = value + out = bytearray() + remaining = max_length + + while True: + if remaining: + chunk = self.decompressobj.decompress(data, remaining) + else: + chunk = self.decompressobj.decompress(data) + + out.extend(chunk) + + if remaining: + remaining = max(0, max_length - len(out)) + if remaining == 0: + break + + # Handle concatenated gzip members + unused = getattr(self.decompressobj, "unused_data", b"") + if unused: + data = unused + self.decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS) + continue + + break + + return bytes(out) @property def unconsumed_tail(self) -> bytes: @@ -95,7 +125,9 @@ def flush(self) -> bytes: Also checks for errors such as truncated input. No other methods may be called on this object after `flush`. """ - return self.decompressobj.flush() + result = self.decompressobj.flush() + self._flushed = True + return result def import_object(name: str) -> Any: From 21364c9fbfae284345ba7c44502347fe4ab1bcac Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Tue, 17 Mar 2026 21:48:25 -0700 Subject: [PATCH 2/2] Address review feedback on GzipDecompressor - Remove unnecessary getattr() guard on unused_data since it's a documented member of zlib.Decompress - Simplify decompress loop by always passing remaining to decompressobj.decompress() (0 means unlimited) - Fix remaining calculation to not clamp negative values to 0, which would incorrectly mean "no limit" --- tornado/util.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tornado/util.py b/tornado/util.py index 483f335c7..51caeac7a 100644 --- a/tornado/util.py +++ b/tornado/util.py @@ -91,20 +91,16 @@ def decompress(self, value: bytes, max_length: int = 0) -> bytes: remaining = max_length while True: - if remaining: - chunk = self.decompressobj.decompress(data, remaining) - else: - chunk = self.decompressobj.decompress(data) - + chunk = self.decompressobj.decompress(data, remaining) out.extend(chunk) - if remaining: - remaining = max(0, max_length - len(out)) - if remaining == 0: + if max_length: + remaining = max_length - len(out) + if remaining <= 0: break # Handle concatenated gzip members - unused = getattr(self.decompressobj, "unused_data", b"") + unused = self.decompressobj.unused_data if unused: data = unused self.decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS)