From fca1d61ae0b4646b6dee0ed077a134e2ebfe4205 Mon Sep 17 00:00:00 2001 From: Greg Brandt Date: Sat, 14 Feb 2026 15:53:36 -0800 Subject: [PATCH] Bugfixes - Fix size 0 for empty file on GCS - Download/seek to end in append mode - Add version variable for pydoc --- avrokit/__init__.py | 4 ++++ avrokit/url/google.py | 8 +++++--- avrokit/url/s3.py | 6 ++++-- pyproject.toml | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/avrokit/__init__.py b/avrokit/__init__.py index 77d7356..4aefcc0 100644 --- a/avrokit/__init__.py +++ b/avrokit/__init__.py @@ -77,6 +77,10 @@ ... writer.roll() # Create a new partition file """ +from importlib.metadata import version + +__version__ = version("avrokit") + from .url import URL, FileURL, parse_url, create_url_mapping, flatten_urls from .io import ( Appendable, diff --git a/avrokit/url/google.py b/avrokit/url/google.py index de2eee7..ef317fd 100644 --- a/avrokit/url/google.py +++ b/avrokit/url/google.py @@ -107,7 +107,7 @@ def size(self) -> int: if not blob.exists(): return 0 blob.reload() # N.b. loads metadata including size - return blob.size + return blob.size if blob.size is not None else 0 @override def open(self) -> IO[Any]: @@ -119,11 +119,13 @@ def open(self) -> IO[Any]: self._current_remote = blob self._current_local = tmpfile self._current_local_stream = self._current_local - # Download to file if r or "rb" mode - if "r" in self.mode: + # Download to file if r/rb mode, or if append mode (to preserve existing content on failure) + if "r" in self.mode or "a" in self.mode: # N.b. always writes in binary mode blob.download_to_file(tmpfile) tmpfile.seek(0) + if "a" in self.mode: + tmpfile.seek(0, 2) # Seek to end for append mode if "b" not in self.mode: # So if the user wants to read text, we need to decode it self._current_local_stream = io.TextIOWrapper(tmpfile, encoding="utf-8") diff --git a/avrokit/url/s3.py b/avrokit/url/s3.py index 16cd157..2edeee4 100644 --- a/avrokit/url/s3.py +++ b/avrokit/url/s3.py @@ -94,11 +94,13 @@ def open(self) -> IO[Any]: self._current_client = client self._current_local = tmpfile self._current_local_stream = self._current_local - # Download to file if "r" or "rb" mode - if "r" in self.mode: + # Download to file if "r"/"rb" mode, or if append mode (to preserve existing content on failure) + if "r" in self.mode or "a" in self.mode: # N.b. always writes in binary mode client.download_fileobj(self.bucket, self.path, tmpfile) tmpfile.seek(0) + if "a" in self.mode: + tmpfile.seek(0, 2) # Seek to end for append mode if "b" not in self.mode: # So if the user wants to read text, we need to decode it self._current_local_stream = io.TextIOWrapper(tmpfile, encoding="utf-8") diff --git a/pyproject.toml b/pyproject.toml index 124f850..84ce178 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.poetry] name = "avrokit" -version = "0.0.2" +version = "0.0.3" description = "Python utilities for working with Avro data files" authors = ["Greg Brandt "] license = "Apache-2.0"