Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changes

## Unreleased

* Fix #112 - Reading an empty compressed file via a system-level decompressor (gzip/pigz) no longer raises a spurious `EOFError`. System decompressors exit non-zero on empty input, which surfaced as a nondeterministic failure depending on a process-exit race (more frequent on single-CPU machines).

## v4.4.1 (2020.12.06)

* Fix #41 - Windows does not support SIGPIPE
Expand Down
21 changes: 21 additions & 0 deletions tests/test_xphyle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from unittest import TestCase, skipIf
from . import *
import gzip
import os
from io import BytesIO, IOBase
from xphyle import *
from xphyle.paths import TempDir, STDIN, STDOUT, STDERR, EXECUTABLE_CACHE
Expand Down Expand Up @@ -194,8 +195,15 @@ def test_xopen_file(self):
with self.assertRaises(IOError):
xopen("foobar", "r")
path = self.root.make_file(suffix=".gz")
# Write a real gzip stream so the read path decompresses actual data
# rather than an empty file (see issue #112: system decompressors exit
# non-zero on empty input, which made this test fail nondeterministically
# depending on a process-exit race).
with gzip.open(path, "wt") as o:
o.write("bar")
with xopen(path, "rU", context_wrapper=True) as i:
assert "rt" == i.mode
assert i.read() == "bar"
with xopen(path, "w", compression=True, context_wrapper=True) as o:
assert cast(FileLikeWrapper, o).compression == "gzip"
o.write("foo")
Expand All @@ -211,6 +219,19 @@ def test_xopen_file(self):
with xopen(existing_file, "wt", overwrite=False):
pass

def test_xopen_empty_compressed_file(self):
# Regression test for issue #112: reading an empty (zero-byte)
# compressed file via the system-level decompressor must not raise.
# System tools such as gzip/pigz exit non-zero on empty input, which
# previously surfaced as a nondeterministic EOFError depending on a
# process-exit race (failing far more often on single-CPU machines).
path = self.root.make_file(suffix=".gz")
assert os.path.getsize(path) == 0
# Force the system-level read path and consume the whole stream so the
# subprocess exit code is deterministically checked.
with xopen(path, "rb", use_system=True, context_wrapper=True) as i:
assert i.read() == b""

def test_xopen_fileobj(self):
path = self.root.make_file(suffix=".gz")
with open(path, "wb") as out1:
Expand Down
16 changes: 15 additions & 1 deletion xphyle/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,28 @@ def __iter__(self) -> Iterator:
def _raise_if_error(self) -> None:
"""Raise EOFError if process is not running anymore and the
exit code is nonzero.

An empty source file is a special case: system decompressors such as
``gzip``/``pigz`` exit non-zero on empty input ("unexpected end of
file"), whereas the Python implementations treat an empty file as a
valid, empty stream. To keep the system- and library-level read paths
consistent, a non-zero exit code is ignored when the source file is
empty.
"""
retcode = self.process.poll()
if retcode is not None and retcode != 0: # pragma: no-cover
if retcode is not None and retcode != 0 and not self._source_is_empty():
raise EOFError(
f"{self.executable_name} process returned non-zero exit code "
f"{retcode}. Is the input file truncated or corrupt?"
)

def _source_is_empty(self) -> bool:
"""Return True if the source file exists and is zero bytes."""
try:
return os.path.getsize(self._name) == 0
except OSError: # pragma: no-cover
return False

def read(self, *args) -> bytes:
"""Read bytes from the stream. Arguments are passed through to the
subprocess ``read`` method.
Expand Down