Skip to content

Commit fe0a237

Browse files
authored
Make REST catalog namespace separator configurable (#2826)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> Closes #1183 This adds a configurable namespace separator in the REST Catalog. # Rationale for this change Certain implementations expect a different namespace separator. ## Are these changes tested? Tests included. ## Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent ba7676c commit fe0a237

3 files changed

Lines changed: 95 additions & 10 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Any,
2222
Union,
2323
)
24+
from urllib.parse import quote, unquote
2425

2526
from pydantic import ConfigDict, Field, TypeAdapter, field_validator
2627
from requests import HTTPError, Session
@@ -234,7 +235,8 @@ class IdentifierKind(Enum):
234235
VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"
235236
VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False
236237

237-
NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
238+
NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
239+
DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
238240

239241

240242
def _retry_hook(retry_state: RetryCallState) -> None:
@@ -330,6 +332,7 @@ class RestCatalog(Catalog):
330332
_session: Session
331333
_auth_manager: AuthManager | None
332334
_supported_endpoints: set[Endpoint]
335+
_namespace_separator: str
333336

334337
def __init__(self, name: str, **properties: str):
335338
"""Rest Catalog.
@@ -596,6 +599,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]:
596599

597600
return optional_oauth_param
598601

602+
def _encode_namespace_path(self, namespace: Identifier) -> str:
603+
"""
604+
Encode a namespace for use as a path parameter in a URL.
605+
606+
Each part of the namespace is URL-encoded using `urllib.parse.quote`
607+
(ensuring characters like '/' are encoded) and then joined by the
608+
configured namespace separator.
609+
"""
610+
return self._namespace_separator.join(quote(part, safe="") for part in namespace)
611+
599612
def _fetch_config(self) -> None:
600613
params = {}
601614
if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
@@ -628,6 +641,11 @@ def _fetch_config(self) -> None:
628641
if property_as_bool(self.properties, VIEW_ENDPOINTS_SUPPORTED, VIEW_ENDPOINTS_SUPPORTED_DEFAULT):
629642
self._supported_endpoints.update(VIEW_ENDPOINTS)
630643

644+
separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR)
645+
if not separator_from_properties:
646+
raise ValueError("Namespace separator cannot be an empty string")
647+
self._namespace_separator = unquote(separator_from_properties)
648+
631649
def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identifier:
632650
identifier_tuple = self.identifier_to_tuple(identifier)
633651
if len(identifier_tuple) <= 1:
@@ -638,10 +656,17 @@ def _split_identifier_for_path(
638656
self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE
639657
) -> Properties:
640658
if isinstance(identifier, TableIdentifier):
641-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name}
659+
return {
660+
"namespace": self._encode_namespace_path(tuple(identifier.namespace.root)),
661+
kind.value: quote(identifier.name, safe=""),
662+
}
642663
identifier_tuple = self._identifier_to_validated_tuple(identifier)
643664

644-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]}
665+
# Use quote to ensure that '/' aren't treated as path separators.
666+
return {
667+
"namespace": self._encode_namespace_path(identifier_tuple[:-1]),
668+
kind.value: quote(identifier_tuple[-1], safe=""),
669+
}
645670

646671
def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]:
647672
identifier_tuple = self._identifier_to_validated_tuple(identifier)
@@ -864,7 +889,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
864889
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
865890
self._check_endpoint(Capability.V1_LIST_TABLES)
866891
namespace_tuple = self._check_valid_namespace_identifier(namespace)
867-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
892+
namespace_concat = self._encode_namespace_path(namespace_tuple)
868893
response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat))
869894
try:
870895
response.raise_for_status()
@@ -950,7 +975,7 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]:
950975
if Capability.V1_LIST_VIEWS not in self._supported_endpoints:
951976
return []
952977
namespace_tuple = self._check_valid_namespace_identifier(namespace)
953-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
978+
namespace_concat = self._encode_namespace_path(namespace_tuple)
954979
response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat))
955980
try:
956981
response.raise_for_status()
@@ -1020,7 +1045,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties =
10201045
def drop_namespace(self, namespace: str | Identifier) -> None:
10211046
self._check_endpoint(Capability.V1_DELETE_NAMESPACE)
10221047
namespace_tuple = self._check_valid_namespace_identifier(namespace)
1023-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
1048+
namespace = self._encode_namespace_path(namespace_tuple)
10241049
response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace))
10251050
try:
10261051
response.raise_for_status()
@@ -1033,7 +1058,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
10331058
namespace_tuple = self.identifier_to_tuple(namespace)
10341059
response = self._session.get(
10351060
self.url(
1036-
f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}"
1061+
f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}"
10371062
if namespace_tuple
10381063
else Endpoints.list_namespaces
10391064
),
@@ -1049,7 +1074,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
10491074
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
10501075
self._check_endpoint(Capability.V1_LOAD_NAMESPACE)
10511076
namespace_tuple = self._check_valid_namespace_identifier(namespace)
1052-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
1077+
namespace = self._encode_namespace_path(namespace_tuple)
10531078
response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace))
10541079
try:
10551080
response.raise_for_status()
@@ -1064,7 +1089,7 @@ def update_namespace_properties(
10641089
) -> PropertiesUpdateSummary:
10651090
self._check_endpoint(Capability.V1_UPDATE_NAMESPACE)
10661091
namespace_tuple = self._check_valid_namespace_identifier(namespace)
1067-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
1092+
namespace = self._encode_namespace_path(namespace_tuple)
10681093
payload = {"removals": list(removals or []), "updates": updates}
10691094
response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload)
10701095
try:
@@ -1081,7 +1106,8 @@ def update_namespace_properties(
10811106
@retry(**_RETRY_ARGS)
10821107
def namespace_exists(self, namespace: str | Identifier) -> bool:
10831108
namespace_tuple = self._check_valid_namespace_identifier(namespace)
1084-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
1109+
namespace = self._encode_namespace_path(namespace_tuple)
1110+
10851111
# fallback in order to work with older rest catalog implementations
10861112
if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints:
10871113
try:

tests/catalog/test_rest.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,31 @@ def test_rest_catalog_with_google_credentials_path(
19911991
assert actual_headers["Authorization"] == expected_auth_header
19921992

19931993

1994+
def test_custom_namespace_separator(rest_mock: Mocker) -> None:
1995+
custom_separator = "-"
1996+
namespace_part1 = "some"
1997+
namespace_part2 = "namespace"
1998+
# The expected URL path segment should use the literal custom_separator
1999+
expected_url_path_segment = f"{namespace_part1}{custom_separator}{namespace_part2}"
2000+
2001+
rest_mock.get(
2002+
f"{TEST_URI}v1/config",
2003+
json={"defaults": {}, "overrides": {}},
2004+
status_code=200,
2005+
)
2006+
rest_mock.get(
2007+
f"{TEST_URI}v1/namespaces/{expected_url_path_segment}",
2008+
json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}},
2009+
status_code=200,
2010+
request_headers=TEST_HEADERS,
2011+
)
2012+
2013+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{"namespace-separator": custom_separator})
2014+
catalog.load_namespace_properties((namespace_part1, namespace_part2))
2015+
2016+
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/{expected_url_path_segment}"
2017+
2018+
19942019
@pytest.mark.filterwarnings(
19952020
"ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
19962021
)

tests/integration/test_catalog.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717

1818
import os
19+
import uuid
1920
from collections.abc import Generator
2021
from pathlib import Path, PosixPath
2122

@@ -601,3 +602,36 @@ def test_register_table_existing(test_catalog: Catalog, table_schema_nested: Sch
601602
# Assert that registering the table again raises TableAlreadyExistsError
602603
with pytest.raises(TableAlreadyExistsError):
603604
test_catalog.register_table(identifier, metadata_location=table.metadata_location)
605+
606+
607+
@pytest.mark.integration
608+
def test_rest_custom_namespace_separator(rest_catalog: RestCatalog, table_schema_simple: Schema) -> None:
609+
"""
610+
Tests that the REST catalog correctly picks up the namespace-separator from the config endpoint.
611+
The REST Catalog is configured with a '.' namespace separator.
612+
"""
613+
assert rest_catalog._namespace_separator == "."
614+
615+
unique_id = uuid.uuid4().hex
616+
parent_namespace = (f"test_parent_{unique_id}",)
617+
child_namespace_part = "child"
618+
full_namespace_tuple = (*parent_namespace, child_namespace_part)
619+
620+
table_name = "my_table"
621+
full_table_identifier_tuple = (*full_namespace_tuple, table_name)
622+
623+
rest_catalog.create_namespace(namespace=parent_namespace)
624+
rest_catalog.create_namespace(namespace=full_namespace_tuple)
625+
626+
namespaces = rest_catalog.list_namespaces(parent_namespace)
627+
assert full_namespace_tuple in namespaces
628+
629+
# Test with a table
630+
table = rest_catalog.create_table(identifier=full_table_identifier_tuple, schema=table_schema_simple)
631+
assert table.name() == full_table_identifier_tuple
632+
633+
tables = rest_catalog.list_tables(full_namespace_tuple)
634+
assert full_table_identifier_tuple in tables
635+
636+
loaded_table = rest_catalog.load_table(identifier=full_table_identifier_tuple)
637+
assert loaded_table.name() == full_table_identifier_tuple

0 commit comments

Comments
 (0)