Skip to content

Commit 4125818

Browse files
geruhkevinjqliu
andauthored
fix: Add check table UUID to detect table replacement (#2890)
# Rationale for this change This PR adds table UUID validation on refresh and commit to detect when a table has been replaced. For example, if a table is dropped and recreated with the same name, this prevents accidentally operating on a different table than expected. Modeled after the Java implementation. https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java#L202-L209 Python was missing this check. ## Are these changes tested? Added some tests at the table and catalog level ## Are there any user-facing changes? no --------- Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>
1 parent 097b458 commit 4125818

3 files changed

Lines changed: 119 additions & 0 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,7 @@ def refresh(self) -> Table:
11331133
An updated instance of the same Iceberg table
11341134
"""
11351135
fresh = self.catalog.load_table(self._identifier)
1136+
self._check_uuid(self.metadata, fresh.metadata)
11361137
self.metadata = fresh.metadata
11371138
self.io = fresh.io
11381139
self.metadata_location = fresh.metadata_location
@@ -1513,9 +1514,21 @@ def refs(self) -> dict[str, SnapshotRef]:
15131514
"""Return the snapshot references in the table."""
15141515
return self.metadata.refs
15151516

1517+
@staticmethod
1518+
def _check_uuid(current_metadata: TableMetadata, new_metadata: TableMetadata) -> None:
1519+
"""Validate that the table UUID matches after refresh."""
1520+
current = current_metadata.table_uuid
1521+
refreshed = new_metadata.table_uuid
1522+
1523+
if current != refreshed:
1524+
raise ValueError(f"Table UUID does not match: current={current} != refreshed={refreshed}")
1525+
15161526
def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: tuple[TableRequirement, ...]) -> None:
15171527
response = self.catalog.commit_table(self, requirements, updates)
15181528

1529+
# Ensure table uuid has not changed
1530+
self._check_uuid(self.metadata, response.metadata)
1531+
15191532
# https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
15201533
# delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set to true and uses
15211534
# TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many previous versions to keep -

tests/catalog/test_rest.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from pyiceberg.table.sorting import SortField, SortOrder
5858
from pyiceberg.transforms import IdentityTransform, TruncateTransform
5959
from pyiceberg.typedef import RecursiveDict
60+
from pyiceberg.types import StringType
6061
from pyiceberg.utils.config import Config
6162

6263
TEST_URI = "https://iceberg-test-catalog/"
@@ -1165,6 +1166,9 @@ def test_create_staged_table_200(
11651166
example_table_metadata_with_no_location: dict[str, Any],
11661167
example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any],
11671168
) -> None:
1169+
expected_table_uuid = example_table_metadata_with_no_location["metadata"]["table-uuid"]
1170+
example_table_metadata_no_snapshot_v1_rest_json["metadata"]["table-uuid"] = expected_table_uuid
1171+
11681172
rest_mock.post(
11691173
f"{TEST_URI}v1/namespaces/fokko/tables",
11701174
json=example_table_metadata_with_no_location,
@@ -2226,3 +2230,87 @@ def test_view_endpoints_enabled_with_config(self, requests_mock: Mocker) -> None
22262230
# View endpoints should be supported when enabled
22272231
catalog._check_endpoint(Capability.V1_LIST_VIEWS)
22282232
catalog._check_endpoint(Capability.V1_DELETE_VIEW)
2233+
2234+
2235+
def test_table_uuid_check_on_commit(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2236+
"""Test that UUID mismatch is detected on commit response (matches Java RESTTableOperations behavior)."""
2237+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2238+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2239+
metadata_location = "s3://warehouse/database/table/metadata.json"
2240+
2241+
rest_mock.get(
2242+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2243+
json={
2244+
"metadata-location": metadata_location,
2245+
"metadata": example_table_metadata_v2,
2246+
"config": {},
2247+
},
2248+
status_code=200,
2249+
request_headers=TEST_HEADERS,
2250+
)
2251+
2252+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2253+
table = catalog.load_table(("namespace", "table_name"))
2254+
2255+
assert str(table.metadata.table_uuid) == original_uuid
2256+
2257+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2258+
2259+
rest_mock.post(
2260+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2261+
json={
2262+
"metadata-location": metadata_location,
2263+
"metadata": metadata_with_different_uuid,
2264+
},
2265+
status_code=200,
2266+
request_headers=TEST_HEADERS,
2267+
)
2268+
2269+
with pytest.raises(ValueError) as exc_info:
2270+
table.update_schema().add_column("new_col", StringType()).commit()
2271+
2272+
assert "Table UUID does not match" in str(exc_info.value)
2273+
assert f"current={original_uuid}" in str(exc_info.value)
2274+
assert f"refreshed={different_uuid}" in str(exc_info.value)
2275+
2276+
2277+
def test_table_uuid_check_on_refresh(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2278+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2279+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2280+
metadata_location = "s3://warehouse/database/table/metadata.json"
2281+
2282+
rest_mock.get(
2283+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2284+
json={
2285+
"metadata-location": metadata_location,
2286+
"metadata": example_table_metadata_v2,
2287+
"config": {},
2288+
},
2289+
status_code=200,
2290+
request_headers=TEST_HEADERS,
2291+
)
2292+
2293+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2294+
table = catalog.load_table(("namespace", "table_name"))
2295+
2296+
assert str(table.metadata.table_uuid) == original_uuid
2297+
2298+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2299+
2300+
rest_mock.get(
2301+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2302+
json={
2303+
"metadata-location": metadata_location,
2304+
"metadata": metadata_with_different_uuid,
2305+
"config": {},
2306+
},
2307+
status_code=200,
2308+
request_headers=TEST_HEADERS,
2309+
)
2310+
2311+
with pytest.raises(ValueError) as exc_info:
2312+
table.refresh()
2313+
2314+
assert "Table UUID does not match" in str(exc_info.value)
2315+
assert f"current={original_uuid}" in str(exc_info.value)
2316+
assert f"refreshed={different_uuid}" in str(exc_info.value)

tests/table/test_init.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
16391639
if model != type(model).model_validate(model_data):
16401640
pytest.fail(f"model {type(model)} did not roundtrip successfully")
16411641
return True
1642+
1643+
1644+
def test_check_uuid_raises_when_mismatch(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1645+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
1646+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
1647+
new_metadata = TableMetadataV2(**metadata_with_different_uuid)
1648+
1649+
with pytest.raises(ValueError) as exc_info:
1650+
Table._check_uuid(table_v2.metadata, new_metadata)
1651+
1652+
assert "Table UUID does not match" in str(exc_info.value)
1653+
assert different_uuid in str(exc_info.value)
1654+
1655+
1656+
def test_check_uuid_passes_when_match(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1657+
new_metadata = TableMetadataV2(**example_table_metadata_v2)
1658+
# Should not raise with same uuid
1659+
Table._check_uuid(table_v2.metadata, new_metadata)

0 commit comments

Comments
 (0)