From b79fff7d611a23bcbbbdcd13af2afb8909493bf3 Mon Sep 17 00:00:00 2001 From: jayceslesar <47452474+jayceslesar@users.noreply.github.com> Date: Thu, 28 May 2026 07:52:21 +0530 Subject: [PATCH 1/8] Modify list_* methods in catalogs to return Iterators --- pyiceberg/catalog/__init__.py | 14 +++--- pyiceberg/catalog/bigquery_metastore.py | 11 ++--- pyiceberg/catalog/dynamodb.py | 22 +++++---- pyiceberg/catalog/glue.py | 17 +++---- pyiceberg/catalog/hive.py | 31 +++++++------ pyiceberg/catalog/noop.py | 7 +-- pyiceberg/catalog/rest/__init__.py | 13 +++--- pyiceberg/catalog/sql.py | 23 ++++++---- pyiceberg/cli/console.py | 21 ++++++--- pyiceberg/cli/output.py | 7 +-- tests/catalog/integration_test_dynamodb.py | 6 +-- tests/catalog/integration_test_glue.py | 6 +-- tests/catalog/test_bigquery_metastore.py | 4 +- tests/catalog/test_catalog_behaviors.py | 26 +++++------ tests/catalog/test_dynamodb.py | 16 +++---- tests/catalog/test_glue.py | 16 +++---- tests/catalog/test_hive.py | 4 +- tests/catalog/test_rest.py | 52 +++++++++++----------- tests/integration/test_catalog.py | 6 +-- 19 files changed, 163 insertions(+), 139 deletions(-) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 95ceaa539f..fcb1465c68 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -22,7 +22,7 @@ import re import uuid from abc import ABC, abstractmethod -from collections.abc import Callable +from collections.abc import Callable, Iterator from dataclasses import dataclass from enum import Enum from typing import ( @@ -607,42 +607,42 @@ def drop_namespace(self, namespace: str | Identifier) -> None: """ @abstractmethod - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: """List tables under the given namespace in the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of table identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ @abstractmethod - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: a List of namespace identifiers. + Iterator[Identifier]: an iterator of namespace identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ @abstractmethod - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: """List views under the given namespace in the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of view identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. diff --git a/pyiceberg/catalog/bigquery_metastore.py b/pyiceberg/catalog/bigquery_metastore.py index 938ac6992f..c9c46a6d1c 100644 --- a/pyiceberg/catalog/bigquery_metastore.py +++ b/pyiceberg/catalog/bigquery_metastore.py @@ -17,6 +17,7 @@ from __future__ import annotations import json +from collections.abc import Iterator from typing import TYPE_CHECKING, Any from google.api_core.exceptions import NotFound @@ -252,7 +253,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None: raise NoSuchNamespaceError(f"Namespace {namespace} does not exist.") from e @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: database_name = self.identifier_to_database(namespace) iceberg_tables: list[Identifier] = [] try: @@ -264,10 +265,10 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: iceberg_tables.append((database_name, bq_table_list_item.table_id)) except NotFound: raise NoSuchNamespaceError(f"Namespace (dataset) '{database_name}' not found.") from None - return iceberg_tables + return iter(iceberg_tables) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: # Since this catalog only supports one-level namespaces, it always returns an empty list unless # passed an empty namespace to list all namespaces within the catalog. if namespace: @@ -275,7 +276,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: # List top-level datasets datasets_iterator = self.client.list_datasets() - return [(dataset.dataset_id,) for dataset in datasets_iterator] + return iter([(dataset.dataset_id,) for dataset in datasets_iterator]) @override def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table: @@ -314,7 +315,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o return self.load_table(identifier=identifier) @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index 74c0be6c9a..0a07c51771 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import uuid +from collections.abc import Iterator from time import time from typing import ( TYPE_CHECKING, @@ -396,8 +397,11 @@ def drop_namespace(self, namespace: str | Identifier) -> None: database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) table_identifiers = self.list_tables(namespace=database_name) - if len(table_identifiers) > 0: + try: + next(table_identifiers) raise NamespaceNotEmptyError(f"Database {database_name} is not empty") + except StopIteration: + pass try: self._delete_dynamo_item( @@ -409,14 +413,14 @@ def drop_namespace(self, namespace: str | Identifier) -> None: raise NoSuchNamespaceError(f"Database does not exist: {database_name}") from e @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: """List Iceberg tables under the given namespace in the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of table identifiers. """ database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) @@ -451,20 +455,20 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: table_identifiers.append(self.identifier_to_tuple(identifier_col)) - return table_identifiers + return iter(table_identifiers) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: """List top-level namespaces from the catalog. We do not support hierarchical namespace. Returns: - List[Identifier]: a List of namespace identifiers. + Iterator[Identifier]: an iterator of namespace identifiers. """ # Hierarchical namespace is not supported. Return an empty list if namespace: - return [] + return iter([]) paginator = self.dynamodb.get_paginator("query") @@ -494,7 +498,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: namespace_col = _dict[DYNAMODB_COL_NAMESPACE] database_identifiers.append(self.identifier_to_tuple(namespace_col)) - return database_identifiers + return iter(database_identifiers) @override def load_namespace_properties(self, namespace: str | Identifier) -> Properties: @@ -565,7 +569,7 @@ def create_view( raise NotImplementedError @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 12b36efc5c..e06716d82d 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -17,6 +17,7 @@ import logging +from collections.abc import Iterator from typing import ( TYPE_CHECKING, Any, @@ -860,14 +861,14 @@ def drop_namespace(self, namespace: str | Identifier) -> None: self.glue.delete_database(Name=database_name) @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: """List Iceberg tables under the given namespace in the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of table identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist, or the identifier is invalid. @@ -889,18 +890,18 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: except self.glue.exceptions.EntityNotFoundException as e: raise NoSuchNamespaceError(f"Database does not exist: {database_name}") from e - return [(database_name, table["Name"]) for table in table_list if self.__is_iceberg_table(table)] + return iter([(database_name, table["Name"]) for table in table_list if self.__is_iceberg_table(table)]) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. Returns: - List[Identifier]: a List of namespace identifiers. + Iterator[Identifier]: an iterator of namespace identifiers. """ # Hierarchical namespace is not supported. Return an empty list if namespace: - return [] + return iter([]) database_list: list[DatabaseTypeDef] = [] next_token: str | None = None @@ -912,7 +913,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: if not next_token: break - return [self.identifier_to_tuple(database["Name"]) for database in database_list] + return iter([self.identifier_to_tuple(database["Name"]) for database in database_list]) @override def load_namespace_properties(self, namespace: str | Identifier) -> Properties: @@ -982,7 +983,7 @@ def create_view( raise NotImplementedError @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 181f9d4661..4e34426c5a 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -18,6 +18,7 @@ import logging import socket import time +from collections.abc import Iterator from types import TracebackType from typing import ( TYPE_CHECKING, @@ -479,7 +480,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o return self._convert_hive_into_iceberg(hive_table) @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override @@ -760,7 +761,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None: raise NoSuchNamespaceError(f"Database does not exists: {database_name}") from e @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: """List Iceberg tables under the given namespace in the catalog. When the database doesn't exist, it will just return an empty list. @@ -769,34 +770,36 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: namespace: Database to list. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of table identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist, or the identifier is invalid. """ database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) with self._client as open_client: - return [ - (database_name, table.tableName) - for table in open_client.get_table_objects_by_name( - dbname=database_name, tbl_names=open_client.get_all_tables(db_name=database_name) - ) - if table.parameters.get(TABLE_TYPE, "").lower() == ICEBERG - ] + return iter( + [ + (database_name, table.tableName) + for table in open_client.get_table_objects_by_name( + dbname=database_name, tbl_names=open_client.get_all_tables(db_name=database_name) + ) + if table.parameters.get(TABLE_TYPE, "").lower() == ICEBERG + ] + ) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. Returns: - List[Identifier]: a List of namespace identifiers. + Iterator[Identifier]: an iterator of namespace identifiers. """ # Hierarchical namespace is not supported. Return an empty list if namespace: - return [] + return iter([]) with self._client as open_client: - return list(map(self.identifier_to_tuple, open_client.get_all_databases())) + return iter(list(map(self.identifier_to_tuple, open_client.get_all_databases()))) @override def load_namespace_properties(self, namespace: str | Identifier) -> Properties: diff --git a/pyiceberg/catalog/noop.py b/pyiceberg/catalog/noop.py index aeb3c72843..f1afae7a67 100644 --- a/pyiceberg/catalog/noop.py +++ b/pyiceberg/catalog/noop.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +from collections.abc import Iterator from typing import ( TYPE_CHECKING, ) @@ -124,11 +125,11 @@ def drop_namespace(self, namespace: str | Identifier) -> None: raise NotImplementedError @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: raise NotImplementedError @override @@ -142,7 +143,7 @@ def update_namespace_properties( raise NotImplementedError @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index d085c6fd87..b83b136049 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations from collections import deque +from collections.abc import Iterator from enum import Enum from typing import ( TYPE_CHECKING, @@ -1038,7 +1039,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o @retry(**_RETRY_ARGS) @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: self._check_endpoint(Capability.V1_LIST_TABLES) namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = self._encode_namespace_path(namespace_tuple) @@ -1070,7 +1071,7 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: break page_token = parsed.next_page_token - return tables + return iter(tables) @retry(**_RETRY_ARGS) @override @@ -1151,7 +1152,7 @@ def _remove_catalog_name_from_table_request_identifier(self, table_request: Comm @retry(**_RETRY_ARGS) @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: if Capability.V1_LIST_VIEWS not in self._supported_endpoints: return [] namespace_tuple = self._check_valid_namespace_identifier(namespace) @@ -1185,7 +1186,7 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]: break page_token = parsed.next_page_token - return views + return iter(views) @retry(**_RETRY_ARGS) @override @@ -1276,7 +1277,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None: @retry(**_RETRY_ARGS) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: self._check_endpoint(Capability.V1_LIST_NAMESPACES) namespace_tuple = self.identifier_to_tuple(namespace) @@ -1309,7 +1310,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: break page_token = parsed.next_page_token - return namespaces + return iter(namespaces) @retry(**_RETRY_ARGS) @override diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 87446bd58b..0112dcb677 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -17,6 +17,7 @@ from __future__ import annotations +from collections.abc import Iterator from typing import ( TYPE_CHECKING, ) @@ -586,8 +587,12 @@ def drop_namespace(self, namespace: str | Identifier) -> None: raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") namespace_str = Catalog.namespace_to_string(namespace) - if tables := self.list_tables(namespace): - raise NamespaceNotEmptyError(f"Namespace {namespace_str} is not empty. {len(tables)} tables exist.") + tables_iter = self.list_tables(namespace) + try: + next(tables_iter) + raise NamespaceNotEmptyError(f"Namespace {namespace_str} is not empty.") + except StopIteration: + pass with Session(self.engine) as session: session.execute( @@ -599,14 +604,14 @@ def drop_namespace(self, namespace: str | Identifier) -> None: session.commit() @override - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: + def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: """List tables under the given namespace in the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: list of table identifiers. + Iterator[Identifier]: an iterator of table identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. @@ -618,17 +623,17 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: stmt = select(IcebergTables).where(IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace) with Session(self.engine) as session: result = session.scalars(stmt) - return [(Catalog.identifier_to_tuple(table.table_namespace) + (table.table_name,)) for table in result] + return iter([(Catalog.identifier_to_tuple(table.table_namespace) + (table.table_name,)) for table in result]) @override - def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: + def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. Args: namespace (str | Identifier): Namespace identifier to search. Returns: - List[Identifier]: a List of namespace identifiers. + Iterator[Identifier]: an iterator of namespace identifiers. Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. @@ -660,7 +665,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: } ) - return namespaces + return iter(namespaces) @override def load_namespace_properties(self, namespace: str | Identifier) -> Properties: @@ -755,7 +760,7 @@ def create_view( raise NotImplementedError @override - def list_views(self, namespace: str | Identifier) -> list[Identifier]: + def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise NotImplementedError @override diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 3feed9fb21..a151e09d44 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -15,8 +15,9 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=broad-except,redefined-builtin,redefined-outer-name +import itertools import logging -from collections.abc import Callable +from collections.abc import Callable, Iterator from functools import wraps from typing import ( Any, @@ -33,6 +34,7 @@ from pyiceberg.io import WAREHOUSE from pyiceberg.table import TableProperties from pyiceberg.table.refs import SnapshotRef, SnapshotRefType +from pyiceberg.typedef import Identifier from pyiceberg.utils.properties import property_as_int @@ -130,14 +132,19 @@ def _catalog_and_output(ctx: Context) -> tuple[Catalog, Output]: def list(ctx: Context, parent: str | None) -> None: # pylint: disable=redefined-builtin """List tables or namespaces.""" catalog, output = _catalog_and_output(ctx) + identifiers: Iterator[Identifier] - identifiers = [] if parent: - # Do we have tables under parent namespace? - identifiers = catalog.list_tables(parent) - if not identifiers: - # List hierarchical namespaces if parent, root namespaces otherwise. - identifiers = catalog.list_namespaces(parent or ()) + # Do we have tables under parent namespace? Peek at first element. + tables = catalog.list_tables(parent) + try: + first = next(tables) + identifiers = itertools.chain([first], tables) + except StopIteration: + # No tables found; list hierarchical namespaces instead. + identifiers = catalog.list_namespaces(parent) + else: + identifiers = catalog.list_namespaces(()) output.identifiers(identifiers) diff --git a/pyiceberg/cli/output.py b/pyiceberg/cli/output.py index 332221008c..5584bd22a5 100644 --- a/pyiceberg/cli/output.py +++ b/pyiceberg/cli/output.py @@ -16,6 +16,7 @@ # under the License. import json from abc import ABC, abstractmethod +from collections.abc import Iterable from typing import ( Any, ) @@ -40,7 +41,7 @@ class Output(ABC): def exception(self, ex: Exception) -> None: ... @abstractmethod - def identifiers(self, identifiers: list[Identifier]) -> None: ... + def identifiers(self, identifiers: Iterable[Identifier]) -> None: ... @abstractmethod def describe_table(self, table: Table) -> None: ... @@ -88,7 +89,7 @@ def exception(self, ex: Exception) -> None: else: Console(stderr=True).print(ex) - def identifiers(self, identifiers: list[Identifier]) -> None: + def identifiers(self, identifiers: Iterable[Identifier]) -> None: table = self._table for identifier in identifiers: table.add_row(".".join(identifier)) @@ -199,7 +200,7 @@ def _out(self, d: Any) -> None: def exception(self, ex: Exception) -> None: self._out({"type": ex.__class__.__name__, "message": str(ex)}) - def identifiers(self, identifiers: list[Identifier]) -> None: + def identifiers(self, identifiers: Iterable[Identifier]) -> None: self._out([".".join(identifier) for identifier in identifiers]) def describe_table(self, table: Table) -> None: diff --git a/tests/catalog/integration_test_dynamodb.py b/tests/catalog/integration_test_dynamodb.py index 6ae14bca06..87988f9237 100644 --- a/tests/catalog/integration_test_dynamodb.py +++ b/tests/catalog/integration_test_dynamodb.py @@ -119,7 +119,7 @@ def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, databas test_catalog.create_namespace(database_name) for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) - identifier_list = test_catalog.list_tables(database_name) + identifier_list = list(test_catalog.list_tables(database_name)) assert len(identifier_list) == LIST_TEST_NUMBER for table_name in table_list: assert (database_name, table_name) in identifier_list @@ -207,10 +207,10 @@ def test_create_namespace_with_comment_and_location(test_catalog: Catalog, datab def test_list_namespaces(test_catalog: Catalog, database_list: list[str]) -> None: for database_name in database_list: test_catalog.create_namespace(database_name) - db_list = test_catalog.list_namespaces() + db_list = list(test_catalog.list_namespaces()) for database_name in database_list: assert (database_name,) in db_list - assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0 + assert len(list(test_catalog.list_namespaces(list(database_list)[0]))) == 0 def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index c429770268..c73035b48e 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -227,7 +227,7 @@ def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, databas test_catalog.create_namespace(database_name) for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) - identifier_list = test_catalog.list_tables(database_name) + identifier_list = list(test_catalog.list_tables(database_name)) assert len(identifier_list) == LIST_TEST_NUMBER for table_name in table_list: assert (database_name, table_name) in identifier_list @@ -316,10 +316,10 @@ def test_create_namespace_with_comment_and_location(test_catalog: Catalog, datab def test_list_namespaces(test_catalog: Catalog, database_list: list[str]) -> None: for database_name in database_list: test_catalog.create_namespace(database_name) - db_list = test_catalog.list_namespaces() + db_list = list(test_catalog.list_namespaces()) for database_name in database_list: assert (database_name,) in db_list - assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0 + assert len(list(test_catalog.list_namespaces(list(database_list)[0]))) == 0 def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: diff --git a/tests/catalog/test_bigquery_metastore.py b/tests/catalog/test_bigquery_metastore.py index c8c7584262..bfca9ec081 100644 --- a/tests/catalog/test_bigquery_metastore.py +++ b/tests/catalog/test_bigquery_metastore.py @@ -151,7 +151,7 @@ def test_list_tables(mocker: MockFixture, gcp_dataset_name: str) -> None: catalog_name = "test_catalog" test_catalog = BigQueryMetastoreCatalog(catalog_name, **{"gcp.bigquery.project-id": "my-project"}) - tables = test_catalog.list_tables(gcp_dataset_name) + tables = list(test_catalog.list_tables(gcp_dataset_name)) # Assert that all tables returned by client.list_tables are listed assert len(tables) == 2 @@ -173,7 +173,7 @@ def test_list_namespaces(mocker: MockFixture) -> None: catalog_name = "test_catalog" test_catalog = BigQueryMetastoreCatalog(catalog_name, **{"gcp.bigquery.project-id": "my-project"}) - namespaces = test_catalog.list_namespaces() + namespaces = list(test_catalog.list_namespaces()) assert len(namespaces) == 2 assert ("dataset1",) in namespaces assert ("dataset2",) in namespaces diff --git a/tests/catalog/test_catalog_behaviors.py b/tests/catalog/test_catalog_behaviors.py index b859e2d541..bf92ef10e6 100644 --- a/tests/catalog/test_catalog_behaviors.py +++ b/tests/catalog/test_catalog_behaviors.py @@ -517,11 +517,11 @@ def test_list_tables( catalog.create_namespace(namespace_2) catalog.create_table(test_table_identifier, table_schema_nested) catalog.create_table(another_table_identifier, table_schema_nested) - identifier_list = catalog.list_tables(namespace_1) + identifier_list = list(catalog.list_tables(namespace_1)) assert len(identifier_list) == 1 assert test_table_identifier in identifier_list - identifier_list = catalog.list_tables(namespace_2) + identifier_list = list(catalog.list_tables(namespace_2)) assert len(identifier_list) == 1 assert another_table_identifier in identifier_list @@ -532,8 +532,8 @@ def test_list_tables_under_a_namespace(catalog: Catalog, table_schema_nested: Sc catalog.create_table(test_table_identifier, table_schema_nested) new_namespace = ("new", "namespace") catalog.create_namespace(new_namespace) - all_tables = catalog.list_tables(namespace=namespace) - new_namespace_tables = catalog.list_tables(new_namespace) + all_tables = list(catalog.list_tables(namespace=namespace)) + new_namespace_tables = list(catalog.list_tables(new_namespace)) assert all_tables assert test_table_identifier in all_tables assert new_namespace_tables == [] @@ -541,7 +541,7 @@ def test_list_tables_under_a_namespace(catalog: Catalog, table_schema_nested: Sc def test_list_tables_when_missing_namespace(catalog: Catalog, test_namespace: Identifier) -> None: with pytest.raises(NoSuchNamespaceError): - catalog.list_tables(test_namespace) + list(catalog.list_tables(test_namespace)) # Commit table tests @@ -1002,7 +1002,7 @@ def test_create_namespace_with_comment_and_location(catalog: Catalog, test_names "location": test_location, } catalog.create_namespace(namespace=test_namespace, properties=test_properties) - loaded_database_list = catalog.list_namespaces() + loaded_database_list = list(catalog.list_namespaces()) assert Catalog.identifier_to_tuple(test_namespace)[:1] in loaded_database_list properties = catalog.load_namespace_properties(test_namespace) assert properties["comment"] == "this is a test description" @@ -1088,17 +1088,17 @@ def test_list_namespaces(catalog: Catalog) -> None: if not catalog.namespace_exists(namespace): catalog.create_namespace(namespace) - ns_list = catalog.list_namespaces() + ns_list = list(catalog.list_namespaces()) for ns in [("db",), ("db%",), ("db2",)]: assert ns in ns_list - ns_list = catalog.list_namespaces("db") + ns_list = list(catalog.list_namespaces("db")) assert sorted(ns_list) == [("db", "ns1"), ("db", "ns2")] - ns_list = catalog.list_namespaces("db.ns1") + ns_list = list(catalog.list_namespaces("db.ns1")) assert sorted(ns_list) == [("db", "ns1", "ns2")] - ns_list = catalog.list_namespaces("db.ns1.ns2") + ns_list = list(catalog.list_namespaces("db.ns1.ns2")) assert len(ns_list) == 0 @@ -1108,14 +1108,14 @@ def test_list_namespaces_fuzzy_match(catalog: Catalog) -> None: if not catalog.namespace_exists(namespace): catalog.create_namespace(namespace) - assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")] + assert list(catalog.list_namespaces("db.ns1")) == [("db", "ns1", "ns2")] - assert catalog.list_namespaces("db_.ns1") == [("db_", "ns1", "ns2")] + assert list(catalog.list_namespaces("db_.ns1")) == [("db_", "ns1", "ns2")] def test_list_non_existing_namespaces(catalog: Catalog) -> None: with pytest.raises(NoSuchNamespaceError): - catalog.list_namespaces("does_not_exist") + list(catalog.list_namespaces("does_not_exist")) # Update namespace properties tests diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py index 5933e7d472..4bfac46b79 100644 --- a/tests/catalog/test_dynamodb.py +++ b/tests/catalog/test_dynamodb.py @@ -398,7 +398,7 @@ def test_list_tables( test_catalog.create_namespace(namespace=database_name) for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) - loaded_table_list = test_catalog.list_tables(database_name) + loaded_table_list = list(test_catalog.list_tables(database_name)) for table_name in table_list: assert (database_name, table_name) in loaded_table_list @@ -408,7 +408,7 @@ def test_list_namespaces(_bucket_initialize: None, database_list: list[str]) -> test_catalog = DynamoDbCatalog("test_ddb_catalog") for database_name in database_list: test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) for database_name in database_list: assert (database_name,) in loaded_database_list @@ -417,7 +417,7 @@ def test_list_namespaces(_bucket_initialize: None, database_list: list[str]) -> def test_create_namespace_no_properties(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list properties = test_catalog.load_namespace_properties(database_name) @@ -433,7 +433,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, da } test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name, properties=test_properties) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list properties = test_catalog.load_namespace_properties(database_name) @@ -445,7 +445,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, da def test_create_duplicated_namespace(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list with pytest.raises(NamespaceAlreadyExistsError): @@ -456,11 +456,11 @@ def test_create_duplicated_namespace(_bucket_initialize: None, database_name: st def test_drop_namespace(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list test_catalog.drop_namespace(database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 0 @@ -472,7 +472,7 @@ def test_drop_non_empty_namespace( test_catalog = DynamoDbCatalog("test_ddb_catalog", **{"warehouse": f"s3://{BUCKET_NAME}", "s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) test_catalog.create_table(identifier, table_schema_nested) - assert len(test_catalog.list_tables(database_name)) == 1 + assert len(list(test_catalog.list_tables(database_name))) == 1 with pytest.raises(NamespaceNotEmptyError): test_catalog.drop_namespace(database_name) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index c8da49a87e..b2e1796906 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -516,7 +516,7 @@ def test_list_tables( for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) - loaded_table_list = test_catalog.list_tables(database_name) + loaded_table_list = list(test_catalog.list_tables(database_name)) assert (database_name, non_iceberg_table_name) not in loaded_table_list assert (database_name, non_table_type_table_name) not in loaded_table_list @@ -529,7 +529,7 @@ def test_list_namespaces(_bucket_initialize: None, moto_endpoint_url: str, datab test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) for database_name in database_list: test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) for database_name in database_list: assert (database_name,) in loaded_database_list @@ -538,7 +538,7 @@ def test_list_namespaces(_bucket_initialize: None, moto_endpoint_url: str, datab def test_create_namespace_no_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list properties = test_catalog.load_namespace_properties(database_name) @@ -554,7 +554,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, mo } test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name, properties=test_properties) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list properties = test_catalog.load_namespace_properties(database_name) @@ -566,7 +566,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, mo def test_create_duplicated_namespace(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list with pytest.raises(NamespaceAlreadyExistsError): @@ -577,11 +577,11 @@ def test_create_duplicated_namespace(_bucket_initialize: None, moto_endpoint_url def test_drop_namespace(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 1 assert (database_name,) in loaded_database_list test_catalog.drop_namespace(database_name) - loaded_database_list = test_catalog.list_namespaces() + loaded_database_list = list(test_catalog.list_namespaces()) assert len(loaded_database_list) == 0 @@ -593,7 +593,7 @@ def test_drop_non_empty_namespace( test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) test_catalog.create_namespace(namespace=database_name) test_catalog.create_table(identifier, table_schema_nested) - assert len(test_catalog.list_tables(database_name)) == 1 + assert len(list(test_catalog.list_tables(database_name))) == 1 with pytest.raises(NamespaceNotEmptyError): test_catalog.drop_namespace(database_name) diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 09bb5ab920..1b2ee7f744 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -1043,7 +1043,7 @@ def test_list_tables(hive_table: HiveTable) -> None: catalog._client.__enter__().get_all_tables.return_value = ["table1", "table2", "table3", "table4"] catalog._client.__enter__().get_table_objects_by_name.return_value = [tbl1, tbl2, tbl3, tbl4] - got_tables = catalog.list_tables("database") + got_tables = list(catalog.list_tables("database")) assert got_tables == [("database", "table1"), ("database", "table2")] catalog._client.__enter__().get_all_tables.assert_called_with(db_name="database") catalog._client.__enter__().get_table_objects_by_name.assert_called_with( @@ -1057,7 +1057,7 @@ def test_list_namespaces() -> None: catalog._client = MagicMock() catalog._client.__enter__().get_all_databases.return_value = ["namespace1", "namespace2"] - assert catalog.list_namespaces() == [("namespace1",), ("namespace2",)] + assert list(catalog.list_namespaces()) == [("namespace1",), ("namespace2",)] catalog._client.__enter__().get_all_databases.assert_called() diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 1eb9f26a56..b4e352436c 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -478,7 +478,7 @@ def test_list_tables_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) == [("examples", "fooshare")] + assert list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace)) == [("examples", "fooshare")] def test_list_tables_paginated_200(rest_mock: Mocker) -> None: @@ -520,7 +520,7 @@ def test_list_tables_paginated_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace)) assert result == [ ("examples", "table1"), ("examples", "table2"), @@ -557,7 +557,7 @@ def test_list_tables_paginated_200_none_next_page_token(rest_mock: Mocker) -> No request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace)) assert result == [ ("examples", "table1"), ("examples", "table2"), @@ -579,7 +579,7 @@ def test_list_tables_page_size(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_tables(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_tables(namespace)) assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/examples/tables?pageSize=100" assert result == [ @@ -597,9 +597,9 @@ def test_list_tables_200_sigv4(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "rest.sigv4-enabled": "true"}).list_tables(namespace) == [ - ("examples", "fooshare") - ] + assert list( + RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "rest.sigv4-enabled": "true"}).list_tables(namespace) + ) == [("examples", "fooshare")] assert rest_mock.called @@ -734,7 +734,7 @@ def test_list_tables_404(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) with pytest.raises(NoSuchNamespaceError) as e: - RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) + list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace)) assert "Namespace does not exist" in str(e.value) @@ -747,7 +747,7 @@ def test_list_views_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace) == [("examples", "fooshare")] + assert list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace)) == [("examples", "fooshare")] def test_list_views_paginated_200(rest_mock: Mocker) -> None: @@ -789,7 +789,7 @@ def test_list_views_paginated_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace)) assert result == [ ("examples", "view1"), ("examples", "view2"), @@ -826,7 +826,7 @@ def test_list_views_paginated_200_none_next_page_token(rest_mock: Mocker) -> Non request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace)) assert result == [ ("examples", "view1"), ("examples", "view2"), @@ -848,7 +848,7 @@ def test_list_views_page_size(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_views(namespace) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_views(namespace)) assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/examples/views?pageSize=100" assert result == [ @@ -872,7 +872,7 @@ def test_list_views_invalid_page_size(rest_mock: Mocker) -> None: ) with pytest.raises(ValueError) as e: - RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "0"}).list_views(namespace) + list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "0"}).list_views(namespace)) assert str(e.value) == "rest-page-size must be a positive integer" @@ -885,9 +885,9 @@ def test_list_views_200_sigv4(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "rest.sigv4-enabled": "true"}).list_views(namespace) == [ - ("examples", "fooshare") - ] + assert list( + RestCatalog("rest", **{"uri": TEST_URI, "token": TEST_TOKEN, "rest.sigv4-enabled": "true"}).list_views(namespace) + ) == [("examples", "fooshare")] assert rest_mock.called @@ -906,7 +906,7 @@ def test_list_views_404(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) with pytest.raises(NoSuchNamespaceError) as e: - RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace) + list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_views(namespace)) assert "Namespace does not exist" in str(e.value) @@ -953,7 +953,7 @@ def test_list_namespaces_200(rest_mock: Mocker) -> None: status_code=200, request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces() == [ + assert list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces()) == [ ("default",), ("examples",), ("fokko",), @@ -968,7 +968,7 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None: status_code=200, request_headers=TEST_HEADERS, ) - assert RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("accounting",)) == [ + assert list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("accounting",))) == [ ("accounting", "tax"), ] @@ -1004,7 +1004,7 @@ def test_list_namespaces_paginated_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces() + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces()) assert result == [ ("ns1",), ("ns2",), @@ -1035,7 +1035,7 @@ def test_list_namespaces_with_parent_paginated_200(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("accounting",)) + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("accounting",))) assert result == [ ("accounting", "tax"), ("accounting", "payroll"), @@ -1064,7 +1064,7 @@ def test_list_namespaces_paginated_200_none_next_page_token(rest_mock: Mocker) - request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces() + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces()) assert result == [ ("ns1",), ("ns2",), @@ -1082,7 +1082,7 @@ def test_list_namespaces_page_size(rest_mock: Mocker) -> None: request_headers=TEST_HEADERS, ) - result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_namespaces() + result = list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_namespaces()) assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces?pageSize=100" assert result == [ @@ -1106,7 +1106,7 @@ def test_list_namespace_with_parent_404(rest_mock: Mocker) -> None: ) with pytest.raises(NoSuchNamespaceError): - RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("some_namespace",)) + list(RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("some_namespace",))) @pytest.mark.filterwarnings( @@ -1161,7 +1161,7 @@ def test_list_namespaces_token_expired_success_on_retries(rest_mock: Mocker, sta # which results in the token being refreshed twice when the RestCatalog is initialized. assert tokens.call_count == 2 - assert catalog.list_namespaces() == [ + assert list(catalog.list_namespaces()) == [ ("default",), ("examples",), ("fokko",), @@ -1170,7 +1170,7 @@ def test_list_namespaces_token_expired_success_on_retries(rest_mock: Mocker, sta assert namespaces.call_count == 2 assert tokens.call_count == 3 - assert catalog.list_namespaces() == [ + assert list(catalog.list_namespaces()) == [ ("default",), ("examples",), ("fokko",), diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 4188ad83db..fa0867770c 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -189,7 +189,7 @@ def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, databas test_catalog.create_namespace(database_name) for table_name in table_list: test_catalog.create_table((database_name, table_name), table_schema_nested) - identifier_list = test_catalog.list_tables(database_name) + identifier_list = list(test_catalog.list_tables(database_name)) assert len(identifier_list) == len(table_list) for table_name in table_list: assert (database_name, table_name) in identifier_list @@ -463,10 +463,10 @@ def test_create_namespace_with_comment(test_catalog: Catalog, database_name: str def test_list_namespaces(test_catalog: Catalog, database_list: list[str]) -> None: for database_name in database_list: test_catalog.create_namespace(database_name) - db_list = test_catalog.list_namespaces() + db_list = list(test_catalog.list_namespaces()) for database_name in database_list: assert (database_name,) in db_list - assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0 + assert len(list(test_catalog.list_namespaces(list(database_list)[0]))) == 0 @pytest.mark.integration From 54d8f646b5a1bfd36d8bb38aa06e3b1bbec3bbf6 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 07:53:04 +0530 Subject: [PATCH 2/8] REST catalog: implement lazy pagination generators for list_* methods Replace the collect-then-return approach with proper generator functions that yield results page by page. Extract per-page fetch logic into dedicated helper methods (_fetch_tables_page, _fetch_views_page, _fetch_namespaces_page) decorated with @retry so authentication retries work correctly per page. Co-authored-by: Yuya Ebihara --- pyiceberg/catalog/rest/__init__.py | 67 ++++++++++++++---------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index b83b136049..8b42e775b3 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -1038,6 +1038,14 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o return self._response_to_table(self.identifier_to_tuple(identifier), table_response) @retry(**_RETRY_ARGS) + def _fetch_tables_page(self, url: str, params: dict[str, str]) -> ListTablesResponse: + response = self._session.get(url, params=params) + try: + response.raise_for_status() + except HTTPError as exc: + _handle_non_200_response(exc, {404: NoSuchNamespaceError}) + return ListTablesResponse.model_validate_json(response.text) + @override def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: self._check_endpoint(Capability.V1_LIST_TABLES) @@ -1052,27 +1060,18 @@ def list_tables(self, namespace: str | Identifier) -> Iterator[Identifier]: raise ValueError(f"{PAGE_SIZE} must be a positive integer") params["pageSize"] = str(page_size) - tables: list[Identifier] = [] page_token: str | None = None while True: if page_token: params["pageToken"] = page_token - response = self._session.get(url, params=params) - try: - response.raise_for_status() - except HTTPError as exc: - _handle_non_200_response(exc, {404: NoSuchNamespaceError}) - - parsed = ListTablesResponse.model_validate_json(response.text) - tables.extend([(*table.namespace, table.name) for table in parsed.identifiers]) + parsed = self._fetch_tables_page(url, params) + yield from [(*table.namespace, table.name) for table in parsed.identifiers] if not parsed.next_page_token: break page_token = parsed.next_page_token - return iter(tables) - @retry(**_RETRY_ARGS) @override def load_table(self, identifier: str | Identifier) -> Table: @@ -1151,10 +1150,18 @@ def _remove_catalog_name_from_table_request_identifier(self, table_request: Comm return table_request @retry(**_RETRY_ARGS) + def _fetch_views_page(self, url: str, params: dict[str, str]) -> ListViewsResponse: + response = self._session.get(url, params=params) + try: + response.raise_for_status() + except HTTPError as exc: + _handle_non_200_response(exc, {404: NoSuchNamespaceError}) + return ListViewsResponse.model_validate_json(response.text) + @override def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: if Capability.V1_LIST_VIEWS not in self._supported_endpoints: - return [] + return namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = self._encode_namespace_path(namespace_tuple) url = self.url(Endpoints.list_views, namespace=namespace_concat) @@ -1166,28 +1173,18 @@ def list_views(self, namespace: str | Identifier) -> Iterator[Identifier]: raise ValueError(f"{PAGE_SIZE} must be a positive integer") params["pageSize"] = str(page_size) - views: list[Identifier] = [] page_token: str | None = None while True: if page_token: params["pageToken"] = page_token - - response = self._session.get(url, params=params) - try: - response.raise_for_status() - except HTTPError as exc: - _handle_non_200_response(exc, {404: NoSuchNamespaceError}) - - parsed = ListViewsResponse.model_validate_json(response.text) - views.extend([(*view.namespace, view.name) for view in parsed.identifiers]) + parsed = self._fetch_views_page(url, params) + yield from [(*view.namespace, view.name) for view in parsed.identifiers] if not parsed.next_page_token: break page_token = parsed.next_page_token - return iter(views) - @retry(**_RETRY_ARGS) @override def load_view(self, identifier: str | Identifier) -> View: @@ -1276,6 +1273,14 @@ def drop_namespace(self, namespace: str | Identifier) -> None: _handle_non_200_response(exc, {404: NoSuchNamespaceError, 409: NamespaceNotEmptyError}) @retry(**_RETRY_ARGS) + def _fetch_namespaces_page(self, params: dict[str, str]) -> ListNamespaceResponse: + response = self._session.get(self.url(Endpoints.list_namespaces), params=params) + try: + response.raise_for_status() + except HTTPError as exc: + _handle_non_200_response(exc, {404: NoSuchNamespaceError}) + return ListNamespaceResponse.model_validate_json(response.text) + @override def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifier]: self._check_endpoint(Capability.V1_LIST_NAMESPACES) @@ -1288,7 +1293,6 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifi raise ValueError(f"{PAGE_SIZE} must be a positive integer") params["pageSize"] = str(page_size) - namespaces: list[Identifier] = [] page_token: str | None = None while True: @@ -1296,22 +1300,13 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> Iterator[Identifi params["parent"] = self._encode_namespace_path(namespace_tuple) if page_token: params["pageToken"] = page_token - response = self._session.get(self.url(Endpoints.list_namespaces), params=params) - - try: - response.raise_for_status() - except HTTPError as exc: - _handle_non_200_response(exc, {404: NoSuchNamespaceError}) - - parsed = ListNamespaceResponse.model_validate_json(response.text) - namespaces.extend(parsed.namespaces) + parsed = self._fetch_namespaces_page(params) + yield from parsed.namespaces if not parsed.next_page_token: break page_token = parsed.next_page_token - return iter(namespaces) - @retry(**_RETRY_ARGS) @override def load_namespace_properties(self, namespace: str | Identifier) -> Properties: From 5f79b474ec1bcb2229a5bf2f3011ae56ecea7777 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 08:49:32 +0530 Subject: [PATCH 3/8] fix: decode JSON string and support flat env-var properties for REST auth config Fixes #3422. RestCatalog._create_session() expected auth to be a dict, but environment variables always produce string values. This caused auth initialization to fail for all pluggable auth types (basic, oauth2, google, entra, custom) when configured via PYICEBERG_CATALOG____AUTH. Two complementary fixes: 1. JSON string decode: if the 'auth' property is a string, JSON-parse it before processing. This supports: export PYICEBERG_CATALOG__REST__AUTH='{"type":"oauth2",...}' 2. Flat env-var property support: if 'auth' is absent but 'auth.type' is present, build the auth config from flat 'auth.*' properties. This is the canonical env-var style that avoids JSON-in-env quoting issues: export PYICEBERG_CATALOG__REST__AUTH__TYPE=oauth2 export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__CLIENT_ID=id export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__CLIENT_SECRET=secret export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__TOKEN_URL=https://... Kebab-case keys (e.g. 'client-id') produced by the env-var parser are normalised to snake_case ('client_id') to match AuthManager constructor parameters. Regression tests added for both code paths (basic and oauth2) as well as invalid JSON detection. --- pyiceberg/catalog/rest/__init__.py | 35 +++++++- tests/catalog/test_rest.py | 132 +++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index d085c6fd87..76dea6cd0c 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import json from collections import deque from enum import Enum from typing import ( @@ -435,7 +436,16 @@ def _create_session(self) -> Session: elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert - if auth_config := self.properties.get(AUTH): + if raw_auth := self.properties.get(AUTH): + # When auth is configured via an environment variable (e.g. PYICEBERG_CATALOG____AUTH), + # the value arrives as a JSON string rather than a dict. Decode it before processing. + if isinstance(raw_auth, str): + try: + auth_config: dict[str, Any] = json.loads(raw_auth) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to parse auth configuration as JSON: {raw_auth!r}") from e + else: + auth_config = raw_auth auth_type = auth_config.get("type") if auth_type is None: raise ValueError("auth.type must be defined") @@ -448,6 +458,29 @@ def _create_session(self) -> Session: if auth_type != CUSTOM and auth_impl: raise ValueError("auth.impl can only be specified when using custom auth.type") + self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) + session.auth = AuthManagerAdapter(self._auth_manager) + elif auth_type := self.properties.get(f"{AUTH}.type"): + # Support flattened env-var style configuration: + # PYICEBERG_CATALOG____AUTH__TYPE=oauth2 + # PYICEBERG_CATALOG____AUTH__OAUTH2__CLIENT_ID=id + # The env-var parser maps these to flat properties like "auth.type" and "auth.oauth2.client-id". + # Key names are converted from kebab-case to snake_case to match AuthManager constructor parameters. + auth_impl = self.properties.get(f"{AUTH}.impl") + + if auth_type == CUSTOM and not auth_impl: + raise ValueError("auth.impl must be specified when using custom auth.type") + + if auth_type != CUSTOM and auth_impl: + raise ValueError("auth.impl can only be specified when using custom auth.type") + + type_prefix = f"{AUTH}.{auth_type}." + auth_type_config = { + k[len(type_prefix):].replace("-", "_"): v + for k, v in self.properties.items() + if k.startswith(type_prefix) + } + self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) session.auth = AuthManagerAdapter(self._auth_manager) else: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 1eb9f26a56..615e579d50 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3167,3 +3167,135 @@ def test_load_table_without_storage_credentials( ) assert actual.metadata.model_dump() == expected.metadata.model_dump() assert actual == expected + + +# Tests for issue #3422: REST catalog auth cannot be configured via environment +# variables unless auth JSON strings are decoded. + + +def test_rest_catalog_with_basic_auth_as_json_string(rest_mock: Mocker) -> None: + """When auth arrives as a JSON string (e.g. from an environment variable), it should be decoded correctly.""" + import json + + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + auth_dict = { + "type": "basic", + "basic": { + "username": "one", + "password": "two", + }, + } + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + + +def test_rest_catalog_with_oauth2_auth_as_json_string(requests_mock: Mocker) -> None: + """OAuth2 auth configured as a JSON string (e.g. from an environment variable) should work correctly.""" + import json + + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + auth_dict = { + "type": "oauth2", + "oauth2": { + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + }, + } + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + +def test_rest_catalog_with_invalid_json_auth_string() -> None: + """An auth value that is a string but not valid JSON should raise a descriptive ValueError.""" + with pytest.raises(ValueError, match="Failed to parse auth configuration as JSON"): + RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") # type: ignore + + +def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None: + """Auth configured via flattened env-var properties (e.g. PYICEBERG_CATALOG____AUTH__TYPE=basic) + should initialise the correct AuthManager. + + The env-var parser converts PYICEBERG_CATALOG____AUTH__TYPE=basic into the flat property + 'auth.type' = 'basic' and PYICEBERG_CATALOG____AUTH__BASIC__USERNAME=one into + 'auth.basic.username' = 'one'. + """ + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + # Flat properties as produced by the env-var config parser + "auth.type": "basic", + "auth.basic.username": "one", + "auth.basic.password": "two", + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + + +def test_rest_catalog_with_oauth2_auth_flat_properties(requests_mock: Mocker) -> None: + """OAuth2 auth configured via flattened env-var properties should work correctly. + + PYICEBERG_CATALOG____AUTH__OAUTH2__CLIENT_ID maps to 'auth.oauth2.client-id'. + The dash is normalised to an underscore ('client_id') when forwarding to OAuth2AuthManager. + """ + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + # Flat properties as produced by the env-var config parser (note: kebab-case keys) + "auth.type": "oauth2", + "auth.oauth2.client-id": "some_client_id", + "auth.oauth2.client-secret": "some_client_secret", + "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI From b2a2e462fee048f4f557ba60b5b423d5e51f8477 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 08:56:42 +0530 Subject: [PATCH 4/8] style: apply ruff format to dict comprehension --- pyiceberg/catalog/rest/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 76dea6cd0c..c2b7b37a17 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -476,9 +476,7 @@ def _create_session(self) -> Session: type_prefix = f"{AUTH}.{auth_type}." auth_type_config = { - k[len(type_prefix):].replace("-", "_"): v - for k, v in self.properties.items() - if k.startswith(type_prefix) + k[len(type_prefix) :].replace("-", "_"): v for k, v in self.properties.items() if k.startswith(type_prefix) } self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) From 11a28349ec5427fc94cc47f8298abec34af53830 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 09:02:38 +0530 Subject: [PATCH 5/8] fix: remove unused type: ignore comments flagged by mypy --- tests/catalog/test_rest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 615e579d50..f642c51bd3 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3193,7 +3193,7 @@ def test_rest_catalog_with_basic_auth_as_json_string(rest_mock: Mocker) -> None: "uri": TEST_URI, "auth": json.dumps(auth_dict), } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI encoded_user_pass = base64.b64encode(b"one:two").decode() @@ -3231,14 +3231,14 @@ def test_rest_catalog_with_oauth2_auth_as_json_string(requests_mock: Mocker) -> "uri": TEST_URI, "auth": json.dumps(auth_dict), } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI def test_rest_catalog_with_invalid_json_auth_string() -> None: """An auth value that is a string but not valid JSON should raise a descriptive ValueError.""" with pytest.raises(ValueError, match="Failed to parse auth configuration as JSON"): - RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") # type: ignore + RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None: @@ -3261,7 +3261,7 @@ def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None "auth.basic.username": "one", "auth.basic.password": "two", } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI encoded_user_pass = base64.b64encode(b"one:two").decode() @@ -3297,5 +3297,5 @@ def test_rest_catalog_with_oauth2_auth_flat_properties(requests_mock: Mocker) -> "auth.oauth2.client-secret": "some_client_secret", "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI From d27fd6a4d2b0106b5c2d363ecda50dbba2decb8f Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Fri, 29 May 2026 09:31:42 +0530 Subject: [PATCH 6/8] Manual changes: lazy pagination and paginated tests verified --- pyiceberg/catalog/rest/__init__.py | 50 ++++---- tests/catalog/test_rest.py | 196 ++++++++++++++++------------- 2 files changed, 134 insertions(+), 112 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 4111c024e3..566784ac8b 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -437,30 +437,31 @@ def _create_session(self) -> Session: elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert + auth_type = None + source_env_var = None + auth_config = None if raw_auth := self.properties.get(AUTH): # When auth is configured via an environment variable (e.g. PYICEBERG_CATALOG____AUTH), # the value arrives as a JSON string rather than a dict. Decode it before processing. + source_env_var = f"PYICEBERG_CATALOG__{self.name.upper()}__AUTH" if isinstance(raw_auth, str): try: - auth_config: dict[str, Any] = json.loads(raw_auth) + auth_config = json.loads(raw_auth) except json.JSONDecodeError as e: - raise ValueError(f"Failed to parse auth configuration as JSON: {raw_auth!r}") from e + raise ValueError(f"Failed to parse auth configuration as JSON from {source_env_var}: {raw_auth!r}") from e + if not isinstance(auth_config, dict): + raise ValueError( + f"Auth configuration loaded from {source_env_var} must be a JSON object (dict), got {type(auth_config).__name__}: {raw_auth!r}" + ) else: + if not isinstance(raw_auth, dict): + raise ValueError( + f"Auth configuration for {source_env_var} must be a dict or JSON string, got {type(raw_auth).__name__}: {raw_auth!r}" + ) auth_config = raw_auth auth_type = auth_config.get("type") - if auth_type is None: - raise ValueError("auth.type must be defined") - auth_type_config = auth_config.get(auth_type, {}) + auth_type_config = auth_config.get(auth_type, {}) if auth_type else None auth_impl = auth_config.get("impl") - - if auth_type == CUSTOM and not auth_impl: - raise ValueError("auth.impl must be specified when using custom auth.type") - - if auth_type != CUSTOM and auth_impl: - raise ValueError("auth.impl can only be specified when using custom auth.type") - - self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) - session.auth = AuthManagerAdapter(self._auth_manager) elif auth_type := self.properties.get(f"{AUTH}.type"): # Support flattened env-var style configuration: # PYICEBERG_CATALOG____AUTH__TYPE=oauth2 @@ -468,18 +469,23 @@ def _create_session(self) -> Session: # The env-var parser maps these to flat properties like "auth.type" and "auth.oauth2.client-id". # Key names are converted from kebab-case to snake_case to match AuthManager constructor parameters. auth_impl = self.properties.get(f"{AUTH}.impl") - - if auth_type == CUSTOM and not auth_impl: - raise ValueError("auth.impl must be specified when using custom auth.type") - - if auth_type != CUSTOM and auth_impl: - raise ValueError("auth.impl can only be specified when using custom auth.type") - type_prefix = f"{AUTH}.{auth_type}." auth_type_config = { k[len(type_prefix) :].replace("-", "_"): v for k, v in self.properties.items() if k.startswith(type_prefix) } - + if auth_type: + if auth_type is None: + raise ValueError( + f"auth.type must be defined in auth configuration{f' from {source_env_var}' if source_env_var else ''}" + ) + if auth_type == CUSTOM and not auth_impl: + raise ValueError( + f"auth.impl must be specified when using custom auth.type{f' (from {source_env_var})' if source_env_var else ''}" + ) + if auth_type != CUSTOM and auth_impl: + raise ValueError( + f"auth.impl can only be specified when using custom auth.type{f' (from {source_env_var})' if source_env_var else ''}" + ) self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) session.auth = AuthManagerAdapter(self._auth_manager) else: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index ae6c6707f1..c298fef8eb 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -198,10 +198,10 @@ def test_token_200(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS)._session.headers["Authorization"] # pylint: disable=W0212 - == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" @pytest.mark.filterwarnings( @@ -218,10 +218,10 @@ def test_token_200_without_optional_fields(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS)._session.headers["Authorization"] # pylint: disable=W0212 - == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" @pytest.mark.filterwarnings( @@ -240,12 +240,10 @@ def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog( - "rest", uri=TEST_URI, credential=TEST_CREDENTIALS, audience=TEST_AUDIENCE, resource=TEST_RESOURCE - )._session.headers["Authorization"] - == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, audience=TEST_AUDIENCE, resource=TEST_RESOURCE) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" assert TEST_AUDIENCE in mock_request.last_request.text assert TEST_RESOURCE in mock_request.last_request.text @@ -266,10 +264,10 @@ def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, audience="", resource="")._session.headers["Authorization"] - == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, audience="", resource="") + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" assert TEST_AUDIENCE not in mock_request.last_request.text assert TEST_RESOURCE not in mock_request.last_request.text @@ -290,9 +288,10 @@ def test_token_with_default_scope(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS)._session.headers["Authorization"] == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" assert "catalog" in mock_request.last_request.text @@ -312,10 +311,10 @@ def test_token_with_custom_scope(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, scope=TEST_SCOPE)._session.headers["Authorization"] - == f"Bearer {TEST_TOKEN}" - ) + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, scope=TEST_SCOPE) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" assert TEST_SCOPE in mock_request.last_request.text @@ -335,14 +334,10 @@ def test_token_200_w_oauth2_server_uri(rest_mock: Mocker) -> None: status_code=200, request_headers=OAUTH_TEST_HEADERS, ) - # pylint: disable=W0212 - assert ( - RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, **{OAUTH2_SERVER_URI: OAUTH2_SERVER_URI})._session.headers[ - "Authorization" - ] - == f"Bearer {TEST_TOKEN}" - ) - # pylint: enable=W0212 + catalog = RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS, **{OAUTH2_SERVER_URI: OAUTH2_SERVER_URI}) + req = Request("GET", "http://example.com") + prepped = catalog._session.prepare_request(req) + assert prepped.headers["Authorization"] == f"Bearer {TEST_TOKEN}" @pytest.mark.filterwarnings( @@ -3173,66 +3168,87 @@ def test_load_table_without_storage_credentials( # variables unless auth JSON strings are decoded. -def test_rest_catalog_with_basic_auth_as_json_string(rest_mock: Mocker) -> None: - """When auth arrives as a JSON string (e.g. from an environment variable), it should be decoded correctly.""" - import json - - rest_mock.get( - f"{TEST_URI}v1/config", - json={"defaults": {}, "overrides": {}}, - status_code=200, - ) - auth_dict = { - "type": "basic", - "basic": { - "username": "one", - "password": "two", - }, - } - catalog_properties = { - "uri": TEST_URI, - "auth": json.dumps(auth_dict), - } - catalog = RestCatalog("rest", **catalog_properties) - assert catalog.uri == TEST_URI - - encoded_user_pass = base64.b64encode(b"one:two").decode() - expected_auth_header = f"Basic {encoded_user_pass}" - assert rest_mock.last_request.headers["Authorization"] == expected_auth_header +import json +import pytest -def test_rest_catalog_with_oauth2_auth_as_json_string(requests_mock: Mocker) -> None: - """OAuth2 auth configured as a JSON string (e.g. from an environment variable) should work correctly.""" - import json - requests_mock.post( - f"{TEST_URI}oauth2/token", - json={ - "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", - "token_type": "Bearer", - "expires_in": 3600, - }, - status_code=200, - ) - requests_mock.get( - f"{TEST_URI}v1/config", - json={"defaults": {}, "overrides": {}}, - status_code=200, - ) - auth_dict = { - "type": "oauth2", - "oauth2": { - "client_id": "some_client_id", - "client_secret": "some_client_secret", - "token_url": f"{TEST_URI}oauth2/token", - }, - } - catalog_properties = { - "uri": TEST_URI, - "auth": json.dumps(auth_dict), - } - catalog = RestCatalog("rest", **catalog_properties) - assert catalog.uri == TEST_URI +@pytest.mark.parametrize( + "auth_dict, expected_header, mocker_type", + [ + # Basic auth + ( + {"type": "basic", "basic": {"username": "one", "password": "two"}}, + lambda: f"Basic {base64.b64encode(b'one:two').decode()}", + "rest_mock", + ), + # OAuth2 auth + ( + { + "type": "oauth2", + "oauth2": { + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + }, + }, + None, # OAuth2 does not set Authorization header immediately + "requests_mock", + ), + # OAuth2 with int fields + ( + { + "type": "oauth2", + "oauth2": { + "client_id": "id", + "client_secret": "secret", + "token_url": f"{TEST_URI}oauth2/token", + "refresh_margin": 10, + "expires_in": 3600, + }, + }, + None, + "requests_mock", + ), + ], +) +def test_rest_catalog_with_auth_json_string(requests_mock, rest_mock, auth_dict, expected_header, mocker_type): + """Test various auth configs as JSON string (from env var) are decoded and handled correctly.""" + if mocker_type == "rest_mock": + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) + assert catalog.uri == TEST_URI + if expected_header: + assert rest_mock.last_request.headers["Authorization"] == expected_header() + else: + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) + assert catalog.uri == TEST_URI def test_rest_catalog_with_invalid_json_auth_string() -> None: From ff875816566b76aa97fe376495059817e1e703db Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Fri, 29 May 2026 17:12:07 +0530 Subject: [PATCH 7/8] test: parameterize REST catalog env-var auth tests for google/entra scopes and edge cases --- tests/catalog/test_rest.py | 130 ++++++++++++++++++++++++++++--------- 1 file changed, 101 insertions(+), 29 deletions(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index c298fef8eb..5a35044041 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3285,33 +3285,105 @@ def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None assert rest_mock.last_request.headers["Authorization"] == expected_auth_header -def test_rest_catalog_with_oauth2_auth_flat_properties(requests_mock: Mocker) -> None: - """OAuth2 auth configured via flattened env-var properties should work correctly. - PYICEBERG_CATALOG____AUTH__OAUTH2__CLIENT_ID maps to 'auth.oauth2.client-id'. - The dash is normalised to an underscore ('client_id') when forwarding to OAuth2AuthManager. - """ - requests_mock.post( - f"{TEST_URI}oauth2/token", - json={ - "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", - "token_type": "Bearer", - "expires_in": 3600, - }, - status_code=200, - ) - requests_mock.get( - f"{TEST_URI}v1/config", - json={"defaults": {}, "overrides": {}}, - status_code=200, - ) - catalog_properties = { - "uri": TEST_URI, - # Flat properties as produced by the env-var config parser (note: kebab-case keys) - "auth.type": "oauth2", - "auth.oauth2.client-id": "some_client_id", - "auth.oauth2.client-secret": "some_client_secret", - "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", - } - catalog = RestCatalog("rest", **catalog_properties) - assert catalog.uri == TEST_URI +import pytest + +@pytest.mark.parametrize( + "catalog_properties, token_response, expected_scopes", + [ + # OAuth2 with string scope + ( + { + "uri": TEST_URI, + "auth.type": "oauth2", + "auth.oauth2.client-id": "some_client_id", + "auth.oauth2.client-secret": "some_client_secret", + "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", + "auth.oauth2.scope": "read", + }, + {"access_token": "token", "token_type": "Bearer", "expires_in": 3600, "scope": "read"}, + "read", + ), + # OAuth2 with list[str] scope + ( + { + "uri": TEST_URI, + "auth.type": "oauth2", + "auth.oauth2.client-id": "id", + "auth.oauth2.client-secret": "secret", + "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", + "auth.oauth2.scope": ["openid", "profile"], + }, + {"access_token": "token", "token_type": "Bearer", "expires_in": 3600, "scope": "openid profile"}, + ["openid", "profile"], + ), + # Google with list[str] scopes + ( + { + "uri": TEST_URI, + "auth.type": "google", + "auth.google.credentials_path": "/fake/path.json", + "auth.google.scopes": ["scope1", "scope2"], + }, + None, + ["scope1", "scope2"], + ), + # Entra with list[str] scopes + ( + { + "uri": TEST_URI, + "auth.type": "entra", + "auth.entra.client-id": "entra_id", + "auth.entra.client-secret": "entra_secret", + "auth.entra.tenant-id": "entra_tenant", + "auth.entra.scopes": ["scopeA", "scopeB"], + }, + None, + ["scopeA", "scopeB"], + ), + ] +) +def test_rest_catalog_with_flat_properties_edge_cases(requests_mock, rest_mock, catalog_properties, token_response, expected_scopes): + """Test flat property env-var style for OAuth2, Google, Entra with list[str] scopes and edge cases.""" + if catalog_properties["auth.type"] == "oauth2": + requests_mock.post( + f"{TEST_URI}oauth2/token", + json=token_response, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog = RestCatalog("rest", **catalog_properties) + assert catalog.uri == TEST_URI + # If scope is a list, ensure it is handled as a space-separated string + if isinstance(expected_scopes, list): + assert any(scope in token_response["scope"] for scope in expected_scopes) + else: + assert token_response["scope"] == expected_scopes + elif catalog_properties["auth.type"] == "google": + # Patch google.auth.load_credentials_from_file and google.auth.transport.requests.Request + with mock.patch("google.auth.load_credentials_from_file") as mock_load_creds, \ + mock.patch("google.auth.transport.requests.Request") as mock_google_request: + mock_credentials = mock.MagicMock() + mock_credentials.token = "file_token" + mock_load_creds.return_value = (mock_credentials, "test_project_file") + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog = RestCatalog("rest", **catalog_properties) + assert catalog.uri == TEST_URI + mock_load_creds.assert_called_with("/fake/path.json", scopes=expected_scopes) + elif catalog_properties["auth.type"] == "entra": + # Just check that the catalog can be constructed and scopes are passed + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog = RestCatalog("rest", **catalog_properties) + assert catalog.uri == TEST_URI From b169928353397c27f5a5e6501a13f22f091392a8 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Fri, 29 May 2026 18:05:50 +0530 Subject: [PATCH 8/8] test: skip Google/Entra auth tests if optional deps missing --- tests/catalog/test_rest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 5a35044041..fa20f209a4 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3364,6 +3364,7 @@ def test_rest_catalog_with_flat_properties_edge_cases(requests_mock, rest_mock, else: assert token_response["scope"] == expected_scopes elif catalog_properties["auth.type"] == "google": + pytest.importorskip("google.auth", reason="google-auth is required for Google auth tests") # Patch google.auth.load_credentials_from_file and google.auth.transport.requests.Request with mock.patch("google.auth.load_credentials_from_file") as mock_load_creds, \ mock.patch("google.auth.transport.requests.Request") as mock_google_request: @@ -3379,6 +3380,7 @@ def test_rest_catalog_with_flat_properties_edge_cases(requests_mock, rest_mock, assert catalog.uri == TEST_URI mock_load_creds.assert_called_with("/fake/path.json", scopes=expected_scopes) elif catalog_properties["auth.type"] == "entra": + pytest.importorskip("azure.identity", reason="azure-identity is required for Entra auth tests") # Just check that the catalog can be constructed and scopes are passed rest_mock.get( f"{TEST_URI}v1/config",