Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pyiceberg/environment_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from importlib.metadata import version


class EnvironmentContext:
_PROPERTIES: dict[str, str] = {
"engine-name": "pyiceberg",
"engine-version": version("pyiceberg"),
}

def __init__(self) -> None:
raise NotImplementedError("EnvironmentContext is a utility class and cannot be instantiated.")

@classmethod
def get(cls) -> dict[str, str]:
"""Return a read-only copy of all properties."""
return cls._PROPERTIES.copy()

@classmethod
def put(cls, key: str, value: str) -> None:
"""Will add the given key/value pair in a global properties map."""
cls._PROPERTIES[key] = value

@classmethod
def remove(cls, key: str) -> str | None:
"""Remove the key from the global properties map."""
return cls._PROPERTIES.pop(key, None)
4 changes: 4 additions & 0 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from pydantic import Field, PrivateAttr, model_serializer

from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.io import FileIO
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
Expand Down Expand Up @@ -402,6 +403,9 @@ def _update_totals(total_property: str, added_property: str, removed_property: s
removed_property=REMOVED_EQUALITY_DELETES,
)

for key, value in EnvironmentContext.get().items():
summary.__setitem__(key, value)

return summary


Expand Down
3 changes: 3 additions & 0 deletions tests/integration/test_deletes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pyspark.sql import SparkSession

from pyiceberg.catalog.rest import RestCatalog
from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.expressions import AlwaysTrue, EqualTo, LessThanOrEqual
from pyiceberg.manifest import ManifestEntryStatus
Expand Down Expand Up @@ -480,6 +481,8 @@ def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSessio
"total-files-size": snapshots[2].summary["total-files-size"],
"total-position-deletes": "1",
"total-records": "4",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
},
)

Expand Down
5 changes: 5 additions & 0 deletions tests/integration/test_inspect_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pytest_lazy_fixtures import lf

from pyiceberg.catalog import Catalog
from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.expressions import (
And,
Expand Down Expand Up @@ -277,6 +278,8 @@ def test_inspect_snapshots(
("total-files-size", str(file_size)),
("total-position-deletes", "0"),
("total-equality-deletes", "0"),
("engine-name", "pyiceberg"),
("engine-version", EnvironmentContext.get().get("engine-version")),
]

# Delete
Expand All @@ -290,6 +293,8 @@ def test_inspect_snapshots(
("total-files-size", "0"),
("total-position-deletes", "0"),
("total-equality-deletes", "0"),
("engine-name", "pyiceberg"),
("engine-version", EnvironmentContext.get().get("engine-version")),
]

lhs = spark.table(f"{identifier}.snapshots").toPandas()
Expand Down
15 changes: 15 additions & 0 deletions tests/integration/test_writes/test_partitioned_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pyspark.sql import SparkSession

from pyiceberg.catalog import Catalog
from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
Expand Down Expand Up @@ -498,6 +499,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": str(file_size),
"total-position-deletes": "0",
"total-records": "3",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

assert summaries[1] == {
Expand All @@ -511,6 +514,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": str(file_size * 2),
"total-position-deletes": "0",
"total-records": "6",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert summaries[2] == {
"removed-files-size": str(file_size * 2),
Expand All @@ -523,6 +528,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": "0",
"total-data-files": "0",
"total-records": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert summaries[3] == {
"changed-partition-count": "3",
Expand All @@ -535,6 +542,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": str(file_size),
"total-data-files": "3",
"total-records": "3",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert summaries[4] == {
"changed-partition-count": "3",
Expand All @@ -547,6 +556,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": str(file_size * 2),
"total-data-files": "6",
"total-records": "6",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert "removed-files-size" in summaries[5]
assert "total-files-size" in summaries[5]
Expand All @@ -561,6 +572,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": summaries[5]["total-files-size"],
"total-data-files": "2",
"total-records": "2",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert "added-files-size" in summaries[6]
assert "total-files-size" in summaries[6]
Expand All @@ -575,6 +588,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
"total-files-size": summaries[6]["total-files-size"],
"total-data-files": "4",
"total-records": "4",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}


Expand Down
25 changes: 25 additions & 0 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.hive import HiveCatalog
from pyiceberg.catalog.sql import SqlCatalog
from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.exceptions import CommitFailedException, NoSuchTableError
from pyiceberg.expressions import And, EqualTo, GreaterThanOrEqual, In, LessThan, Not
from pyiceberg.io.pyarrow import UnsupportedPyArrowTypeException, _dataframe_to_data_files
Expand Down Expand Up @@ -231,6 +232,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
"total-files-size": str(file_size),
"total-position-deletes": "0",
"total-records": "3",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

# Append
Expand All @@ -244,6 +247,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
"total-files-size": str(file_size * 2),
"total-position-deletes": "0",
"total-records": "6",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

# Delete
Expand All @@ -257,6 +262,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
"total-files-size": "0",
"total-position-deletes": "0",
"total-records": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

# Append
Expand All @@ -270,6 +277,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
"total-files-size": str(file_size),
"total-position-deletes": "0",
"total-records": "3",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}


Expand Down Expand Up @@ -326,6 +335,8 @@ def test_summaries_partial_overwrite(spark: SparkSession, session_catalog: Catal
"total-files-size": summaries[0]["total-files-size"],
"total-position-deletes": "0",
"total-records": "5",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
# Java produces:
# {
Expand Down Expand Up @@ -367,6 +378,8 @@ def test_summaries_partial_overwrite(spark: SparkSession, session_catalog: Catal
"total-files-size": summaries[1]["total-files-size"],
"total-position-deletes": "0",
"total-records": "4",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}
assert len(tbl.scan().to_pandas()) == 4

Expand Down Expand Up @@ -831,6 +844,8 @@ def test_summaries_with_only_nulls(
"total-files-size": "0",
"total-position-deletes": "0",
"total-records": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

assert summaries[1] == {
Expand All @@ -843,6 +858,8 @@ def test_summaries_with_only_nulls(
"total-files-size": str(file_size),
"total-position-deletes": "0",
"total-records": "2",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

assert summaries[2] == {
Expand All @@ -855,6 +872,8 @@ def test_summaries_with_only_nulls(
"total-files-size": "0",
"total-position-deletes": "0",
"total-records": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

assert summaries[3] == {
Expand All @@ -864,6 +883,8 @@ def test_summaries_with_only_nulls(
"total-files-size": "0",
"total-position-deletes": "0",
"total-records": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}


Expand Down Expand Up @@ -1156,6 +1177,8 @@ def test_inspect_snapshots(
("total-files-size", str(file_size)),
("total-position-deletes", "0"),
("total-equality-deletes", "0"),
("engine-name", "pyiceberg"),
("engine-version", EnvironmentContext.get().get("engine-version")),
]

# Delete
Expand All @@ -1169,6 +1192,8 @@ def test_inspect_snapshots(
("total-files-size", "0"),
("total-position-deletes", "0"),
("total-equality-deletes", "0"),
("engine-name", "pyiceberg"),
("engine-version", EnvironmentContext.get().get("engine-version")),
]

lhs = spark.table(f"{identifier}.snapshots").toPandas()
Expand Down
7 changes: 7 additions & 0 deletions tests/table/test_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pytest

from pyiceberg.environment_context import EnvironmentContext
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestFile
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
Expand Down Expand Up @@ -315,6 +316,8 @@ def test_merge_snapshot_summaries_empty() -> None:
"total-files-size": "0",
"total-position-deletes": "0",
"total-equality-deletes": "0",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
},
)

Expand Down Expand Up @@ -349,6 +352,8 @@ def test_merge_snapshot_summaries_new_summary() -> None:
"total-files-size": "4",
"total-position-deletes": "5",
"total-equality-deletes": "3",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
},
)

Expand Down Expand Up @@ -391,6 +396,8 @@ def test_merge_snapshot_summaries_overwrite_summary() -> None:
"total-files-size": "5",
"total-position-deletes": "6",
"total-equality-deletes": "4",
"engine-name": "pyiceberg",
"engine-version": EnvironmentContext.get().get("engine-version"),
}

assert actual.additional_properties == expected
Expand Down
34 changes: 34 additions & 0 deletions tests/test_environment_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import re

from pyiceberg.environment_context import EnvironmentContext


def test_default_value() -> None:
actual = EnvironmentContext.get()
assert len(actual) == 2
assert actual["engine-name"] == "pyiceberg"
assert re.match(r"^\d+\.\d+\.\d+", actual["engine-version"])


def test_put_and_remove() -> None:
EnvironmentContext.put("test-key", "test-value")
assert EnvironmentContext.get()["test-key"] == "test-value"

EnvironmentContext.remove("test-key")
assert "test-key" not in EnvironmentContext.get()