From 338613f451091f52079235d2cf3112acc144feea Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Fri, 12 Dec 2025 10:39:02 -0800 Subject: [PATCH 1/7] Make REST catalog namespace separator configurable The REST spec currently uses %1F as the UTF-8 encoded namespace separator for multi-part namespaces. --- pyiceberg/catalog/rest/__init__.py | 44 +++++++++++++++++++++++------- tests/catalog/test_rest.py | 27 ++++++++++++++++++ 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index d7ef6ec86f..fa2afe10c5 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -21,6 +21,7 @@ Any, Union, ) +from urllib.parse import quote, unquote from pydantic import ConfigDict, Field, TypeAdapter, field_validator from requests import HTTPError, Session @@ -234,7 +235,8 @@ class IdentifierKind(Enum): VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported" VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False -NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) +NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator" +DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) def _retry_hook(retry_state: RetryCallState) -> None: @@ -330,6 +332,7 @@ class RestCatalog(Catalog): _session: Session _auth_manager: AuthManager | None _supported_endpoints: set[Endpoint] + _namespace_separator: str def __init__(self, name: str, **properties: str): """Rest Catalog. @@ -345,6 +348,8 @@ def __init__(self, name: str, **properties: str): self.uri = properties[URI] self._fetch_config() self._session = self._create_session() + separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: """Create a request session with provided catalog configuration.""" @@ -596,6 +601,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]: return optional_oauth_param + def _encode_namespace_path(self, namespace: Identifier) -> str: + """ + Encode a namespace for use as a path parameter in a URL. + + Each part of the namespace is URL-encoded using `urllib.parse.quote` + (ensuring characters like '/' are encoded) and then joined by the + configured namespace separator. + """ + return self._namespace_separator.join(quote(part, safe="") for part in namespace) + def _fetch_config(self) -> None: params = {} if warehouse_location := self.properties.get(WAREHOUSE_LOCATION): @@ -637,11 +652,19 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi def _split_identifier_for_path( self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE ) -> Properties: + from urllib.parse import quote + if isinstance(identifier, TableIdentifier): - return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name} + return { + "namespace": self._encode_namespace_path(tuple(identifier.namespace.root)), + kind.value: quote(identifier.name, safe=""), + } identifier_tuple = self._identifier_to_validated_tuple(identifier) - return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]} + return { + "namespace": self._encode_namespace_path(identifier_tuple[:-1]), + kind.value: quote(identifier_tuple[-1], safe=""), + } def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]: identifier_tuple = self._identifier_to_validated_tuple(identifier) @@ -864,7 +887,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) - def list_tables(self, namespace: str | Identifier) -> list[Identifier]: self._check_endpoint(Capability.V1_LIST_TABLES) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace_concat = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) try: response.raise_for_status() @@ -950,7 +973,7 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]: if Capability.V1_LIST_VIEWS not in self._supported_endpoints: return [] namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace_concat = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat)) try: response.raise_for_status() @@ -1020,7 +1043,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties = def drop_namespace(self, namespace: str | Identifier) -> None: self._check_endpoint(Capability.V1_DELETE_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace)) try: response.raise_for_status() @@ -1033,7 +1056,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: namespace_tuple = self.identifier_to_tuple(namespace) response = self._session.get( self.url( - f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}" + f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}" if namespace_tuple else Endpoints.list_namespaces ), @@ -1049,7 +1072,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: def load_namespace_properties(self, namespace: str | Identifier) -> Properties: self._check_endpoint(Capability.V1_LOAD_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace)) try: response.raise_for_status() @@ -1064,7 +1087,7 @@ def update_namespace_properties( ) -> PropertiesUpdateSummary: self._check_endpoint(Capability.V1_UPDATE_NAMESPACE) namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) payload = {"removals": list(removals or []), "updates": updates} response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload) try: @@ -1081,7 +1104,8 @@ def update_namespace_properties( @retry(**_RETRY_ARGS) def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) - namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + namespace = self._encode_namespace_path(namespace_tuple) + # fallback in order to work with older rest catalog implementations if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: try: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 2e3b0d5080..65b8f256f2 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1990,6 +1990,33 @@ def test_rest_catalog_with_google_credentials_path( actual_headers = history[0].headers assert actual_headers["Authorization"] == expected_auth_header + assert actual_headers["Authorization"] == expected_auth_header + + +def test_custom_namespace_separator(rest_mock: Mocker) -> None: + custom_separator = "-" + namespace_part1 = "some" + namespace_part2 = "namespace" + # The expected URL path segment should use the literal custom_separator + expected_url_path_segment = f"{namespace_part1}{custom_separator}{namespace_part2}" + + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + rest_mock.get( + f"{TEST_URI}v1/namespaces/{expected_url_path_segment}", + json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}}, + status_code=204, + request_headers=TEST_HEADERS, + ) + + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{"namespace-separator": custom_separator}) + catalog.load_namespace_properties((namespace_part1, namespace_part2)) + + assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/{expected_url_path_segment}" + @pytest.mark.filterwarnings( "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning" From 4a1d9fdb209c1d5bccbd5e705021836119f5f982 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 15 Dec 2025 10:36:40 -0800 Subject: [PATCH 2/7] PR comments --- pyiceberg/catalog/rest/__init__.py | 14 +++----------- tests/catalog/test_rest.py | 4 +--- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index fa2afe10c5..b520bc9c0a 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -349,6 +349,8 @@ def __init__(self, name: str, **properties: str): self._fetch_config() self._session = self._create_session() separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + if not separator_from_properties: + raise ValueError("Namespace separator cannot be an empty string") self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: @@ -652,8 +654,6 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi def _split_identifier_for_path( self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE ) -> Properties: - from urllib.parse import quote - if isinstance(identifier, TableIdentifier): return { "namespace": self._encode_namespace_path(tuple(identifier.namespace.root)), @@ -1056,7 +1056,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]: namespace_tuple = self.identifier_to_tuple(namespace) response = self._session.get( self.url( - f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}" + f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}" if namespace_tuple else Endpoints.list_namespaces ), @@ -1106,14 +1106,6 @@ def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = self._encode_namespace_path(namespace_tuple) - # fallback in order to work with older rest catalog implementations - if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: - try: - self.load_namespace_properties(namespace_tuple) - return True - except NoSuchNamespaceError: - return False - response = self._session.head(self.url(Endpoints.namespace_exists, namespace=namespace)) if response.status_code == 404: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 65b8f256f2..71341c8b39 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1990,8 +1990,6 @@ def test_rest_catalog_with_google_credentials_path( actual_headers = history[0].headers assert actual_headers["Authorization"] == expected_auth_header - assert actual_headers["Authorization"] == expected_auth_header - def test_custom_namespace_separator(rest_mock: Mocker) -> None: custom_separator = "-" @@ -2008,7 +2006,7 @@ def test_custom_namespace_separator(rest_mock: Mocker) -> None: rest_mock.get( f"{TEST_URI}v1/namespaces/{expected_url_path_segment}", json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}}, - status_code=204, + status_code=200, request_headers=TEST_HEADERS, ) From 3482e870f469adcd712f30ecd30328144c7eec51 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 5 Jan 2026 14:34:42 -0800 Subject: [PATCH 3/7] Test fixes from PR --- pyiceberg/catalog/rest/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index b520bc9c0a..a7d4f3a348 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -348,10 +348,6 @@ def __init__(self, name: str, **properties: str): self.uri = properties[URI] self._fetch_config() self._session = self._create_session() - separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) - if not separator_from_properties: - raise ValueError("Namespace separator cannot be an empty string") - self._namespace_separator = unquote(separator_from_properties) def _create_session(self) -> Session: """Create a request session with provided catalog configuration.""" @@ -645,6 +641,11 @@ def _fetch_config(self) -> None: if property_as_bool(self.properties, VIEW_ENDPOINTS_SUPPORTED, VIEW_ENDPOINTS_SUPPORTED_DEFAULT): self._supported_endpoints.update(VIEW_ENDPOINTS) + separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR) + if not separator_from_properties: + raise ValueError("Namespace separator cannot be an empty string") + self._namespace_separator = unquote(separator_from_properties) + def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identifier: identifier_tuple = self.identifier_to_tuple(identifier) if len(identifier_tuple) <= 1: @@ -661,6 +662,7 @@ def _split_identifier_for_path( } identifier_tuple = self._identifier_to_validated_tuple(identifier) + # Use quote to ensure that '/' aren't treated as path separators. return { "namespace": self._encode_namespace_path(identifier_tuple[:-1]), kind.value: quote(identifier_tuple[-1], safe=""), From 2353bfddb5e8dea6ce48e4c95fc5d266bb8aca1b Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 5 Jan 2026 15:05:41 -0800 Subject: [PATCH 4/7] add test --- tests/integration/test_catalog.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 0c77666568..6cb957c379 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -16,6 +16,7 @@ # under the License. import os +import uuid from collections.abc import Generator from pathlib import Path, PosixPath @@ -601,3 +602,36 @@ def test_register_table_existing(test_catalog: Catalog, table_schema_nested: Sch # Assert that registering the table again raises TableAlreadyExistsError with pytest.raises(TableAlreadyExistsError): test_catalog.register_table(identifier, metadata_location=table.metadata_location) + + +@pytest.mark.integration +def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_simple: Schema): + """ + Tests that the REST catalog correctly picks up the namespace-separator from the config endpoint. + The REST Catalog is configured with a '.' namespace separator. + """ + assert rest_catalog._namespace_separator == "." + + unique_id = uuid.uuid4().hex + parent_namespace = (f"test_parent_{unique_id}",) + child_namespace_part = "child" + full_namespace_tuple = (*parent_namespace, child_namespace_part) + + table_name = "my_table" + full_table_identifier_tuple = (*full_namespace_tuple, table_name) + + rest_catalog.create_namespace(namespace=parent_namespace) + rest_catalog.create_namespace(namespace=full_namespace_tuple) + + namespaces = rest_catalog.list_namespaces(parent_namespace) + assert full_namespace_tuple in namespaces + + # Test with a table + table = rest_catalog.create_table(identifier=full_table_identifier_tuple, schema=table_schema_simple) + assert table.identifier == full_table_identifier_tuple + + tables = rest_catalog.list_tables(full_namespace_tuple) + assert full_table_identifier_tuple in tables + + loaded_table = rest_catalog.load_table(identifier=full_table_identifier_tuple) + assert loaded_table.identifier == full_table_identifier_tuple From b043a9d0bc83c3a5ff3701702e82dd19012bc45b Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 8 Jan 2026 12:06:25 -0800 Subject: [PATCH 5/7] test fix --- tests/integration/test_catalog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 6cb957c379..717f51e605 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -628,10 +628,10 @@ def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_sim # Test with a table table = rest_catalog.create_table(identifier=full_table_identifier_tuple, schema=table_schema_simple) - assert table.identifier == full_table_identifier_tuple + assert table.name() == full_table_identifier_tuple tables = rest_catalog.list_tables(full_namespace_tuple) assert full_table_identifier_tuple in tables loaded_table = rest_catalog.load_table(identifier=full_table_identifier_tuple) - assert loaded_table.identifier == full_table_identifier_tuple + assert loaded_table.name() == full_table_identifier_tuple From 4df5c36dbbf482ca0f2179650ce2fedef8e18635 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 8 Jan 2026 12:12:24 -0800 Subject: [PATCH 6/7] lint fixes --- tests/integration/test_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 717f51e605..fb7026256c 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -605,7 +605,7 @@ def test_register_table_existing(test_catalog: Catalog, table_schema_nested: Sch @pytest.mark.integration -def test_rest_custom_namespace_separator(rest_catalog: Catalog, table_schema_simple: Schema): +def test_rest_custom_namespace_separator(rest_catalog: RestCatalog, table_schema_simple: Schema) -> None: """ Tests that the REST catalog correctly picks up the namespace-separator from the config endpoint. The REST Catalog is configured with a '.' namespace separator. From 14742c3d2e5432ca588f5b2883a121f312193be4 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Tue, 20 Jan 2026 10:22:41 -0800 Subject: [PATCH 7/7] Address comment --- pyiceberg/catalog/rest/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index a7d4f3a348..e3cc09edc0 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -1108,6 +1108,14 @@ def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = self._encode_namespace_path(namespace_tuple) + # fallback in order to work with older rest catalog implementations + if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: + try: + self.load_namespace_properties(namespace_tuple) + return True + except NoSuchNamespaceError: + return False + response = self._session.head(self.url(Endpoints.namespace_exists, namespace=namespace)) if response.status_code == 404: