-
Notifications
You must be signed in to change notification settings - Fork 198
Enable Legacy Profiles/Config in Unified Mode #1240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b6e7e33
39224c3
9e50715
64bfd75
026c1d3
0b1aa0c
30a73f6
55b09a8
71ba1e1
cd84d86
6127b13
579f2b1
5c63b97
8791ff1
cd485db
5bd02cf
20832ed
1d19a12
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,7 +46,10 @@ def __get__(self, cfg: "Config", owner): | |
| return cfg._inner.get(self.name, None) | ||
|
|
||
| def __set__(self, cfg: "Config", value: any): | ||
| cfg._inner[self.name] = self.transform(value) | ||
| if value is None: | ||
| cfg._inner.pop(self.name, None) | ||
| else: | ||
| cfg._inner[self.name] = self.transform(value) | ||
|
|
||
| def __repr__(self) -> str: | ||
| return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>" | ||
|
|
@@ -264,14 +267,20 @@ def __init__( | |
| self.databricks_environment = kwargs["databricks_environment"] | ||
| del kwargs["databricks_environment"] | ||
| self._clock = clock if clock is not None else RealClock() | ||
|
|
||
| try: | ||
| self._set_inner_config(kwargs) | ||
| self._load_from_env() | ||
| self._known_file_config_loader() | ||
| self._fix_host_if_needed() | ||
| # Resolve the legacy profile based on configuration just before validation. | ||
| self._resolve_legacy_profile() | ||
| self._validate() | ||
| self.init_auth() | ||
| self._init_product(product, product_version) | ||
| # Extract the workspace ID for legacy profiles. This is extracted from an API call. | ||
| if not self.workspace_id and not self.account_id and self.experimental_is_unified_host: | ||
| self.workspace_id = self._fetch_workspace_id() | ||
| except ValueError as e: | ||
| message = self.wrap_debug_info(str(e)) | ||
| raise ValueError(message) from e | ||
|
|
@@ -335,33 +344,25 @@ def _get_azure_environment_name(self) -> str: | |
| @property | ||
| def environment(self) -> DatabricksEnvironment: | ||
| """Returns the environment based on configuration.""" | ||
| if self.databricks_environment: | ||
| return self.databricks_environment | ||
| if not self.host and self.azure_workspace_resource_id: | ||
| azure_env = self._get_azure_environment_name() | ||
| for environment in ALL_ENVS: | ||
| if environment.cloud != Cloud.AZURE: | ||
| continue | ||
| if environment.azure_environment.name != azure_env: | ||
| continue | ||
| if environment.dns_zone.startswith(".dev") or environment.dns_zone.startswith(".staging"): | ||
| continue | ||
| return environment | ||
| return get_environment_for_hostname(self.host) | ||
| if not self.experimental_is_unified_host: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We now resolve this once during new Config, not lazily. This ensures that we don't read the host after Config is created. |
||
| # Preserve old behavior by default. | ||
| # TODO: Remove this when making the unified mode the default. | ||
| self._resolve_environment() | ||
| return self.databricks_environment | ||
|
|
||
| @property | ||
| def is_azure(self) -> bool: | ||
| if self.azure_workspace_resource_id: | ||
| return True | ||
| return self.environment.cloud == Cloud.AZURE | ||
| return self.environment is not None and self.environment.cloud == Cloud.AZURE | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unified Hosts will return false on all 3 clouds. |
||
|
|
||
| @property | ||
| def is_gcp(self) -> bool: | ||
| return self.environment.cloud == Cloud.GCP | ||
| return self.environment is not None and self.environment.cloud == Cloud.GCP | ||
|
|
||
| @property | ||
| def is_aws(self) -> bool: | ||
| return self.environment.cloud == Cloud.AWS | ||
| return self.environment is not None and self.environment.cloud == Cloud.AWS | ||
|
|
||
| @property | ||
| def host_type(self) -> HostType: | ||
|
|
@@ -384,7 +385,9 @@ def host_type(self) -> HostType: | |
|
|
||
| @property | ||
| def client_type(self) -> ClientType: | ||
| """Determine the type of client configuration. | ||
| """ | ||
| [Deprecated] Deprecated. Use host_type instead. Some hosts can support both account and workspace clients. | ||
| Determine the type of client configuration. | ||
|
|
||
| This is separate from host_type. For example, a unified host can support both | ||
| workspace and account client types. | ||
|
|
@@ -403,25 +406,24 @@ def client_type(self) -> ClientType: | |
| return ClientType.WORKSPACE | ||
|
|
||
| if host_type == HostType.UNIFIED: | ||
| if not self.account_id: | ||
| raise ValueError("Unified host requires account_id to be set") | ||
| if self.workspace_id: | ||
| return ClientType.WORKSPACE | ||
| return ClientType.ACCOUNT | ||
| if self.account_id: | ||
| return ClientType.ACCOUNT | ||
| # Legacy workspace hosts don't have a workspace_id until AFTER the auth is resolved. | ||
| return ClientType.WORKSPACE | ||
|
|
||
| # Default to workspace for backward compatibility | ||
| return ClientType.WORKSPACE | ||
|
|
||
| @property | ||
| def is_account_client(self) -> bool: | ||
| """[Deprecated] Use host_type or client_type instead. | ||
| """[Deprecated] Use host_type instead. | ||
|
|
||
| Determines if this is an account client based on the host URL. | ||
| Determines if this config is compatible with an account client based on the host URL and account_id. | ||
| """ | ||
| if self.experimental_is_unified_host: | ||
| raise ValueError( | ||
| "is_account_client cannot be used with unified hosts; use host_type or client_type instead" | ||
| ) | ||
| return self.account_id | ||
| if not self.host: | ||
| return False | ||
| return self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.") | ||
|
|
@@ -480,7 +482,7 @@ def oidc_endpoints(self) -> Optional[OidcEndpoints]: | |
| # Handle unified hosts | ||
| if self.host_type == HostType.UNIFIED: | ||
| if not self.account_id: | ||
| raise ValueError("Unified host requires account_id to be set for OAuth endpoints") | ||
| return get_workspace_endpoints(self.host) | ||
| return get_unified_endpoints(self.host, self.account_id) | ||
|
|
||
| # Handle traditional account hosts | ||
|
|
@@ -525,12 +527,9 @@ def sql_http_path(self) -> Optional[str]: | |
| return None | ||
| if self.cluster_id and self.warehouse_id: | ||
| raise ValueError("cannot have both cluster_id and warehouse_id") | ||
| headers = self.authenticate() | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extracted to a separate |
||
| headers["User-Agent"] = f"{self.user_agent} sdk-feature/sql-http-path" | ||
| if self.cluster_id: | ||
| response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers) | ||
| # get workspace ID from the response header | ||
| workspace_id = response.headers.get("x-databricks-org-id") | ||
| # Reuse cached workspace_id or fetch it | ||
| workspace_id = self.workspace_id or self._fetch_workspace_id() | ||
| return f"sql/protocolv1/o/{workspace_id}/{self.cluster_id}" | ||
| if self.warehouse_id: | ||
| return f"/sql/1.0/warehouses/{self.warehouse_id}" | ||
|
|
@@ -721,3 +720,44 @@ def copy(self): | |
| def deep_copy(self): | ||
| """Creates a deep copy of the config object.""" | ||
| return copy.deepcopy(self) | ||
|
|
||
| # The code below is used to support legacy hosts. | ||
| def _resolve_environment(self): | ||
| """Resolve the environment based on configuration.""" | ||
| if self.databricks_environment: | ||
| return | ||
| if not self.host and self.azure_workspace_resource_id: | ||
| azure_env = self._get_azure_environment_name() | ||
| for environment in ALL_ENVS: | ||
| if environment.cloud != Cloud.AZURE: | ||
| continue | ||
| if environment.azure_environment.name != azure_env: | ||
| continue | ||
| if environment.dns_zone.startswith(".dev") or environment.dns_zone.startswith(".staging"): | ||
| continue | ||
| self.databricks_environment = environment | ||
| return | ||
| self.databricks_environment = get_environment_for_hostname(self.host) | ||
|
|
||
| def _resolve_legacy_profile(self): | ||
| """Resolve the legacy profile based on configuration.""" | ||
|
|
||
| # This only applies to the unified mode. | ||
| # TODO: Remove this when making the unified mode the default. | ||
| if not self.experimental_is_unified_host: | ||
| return | ||
| # New Profiles always have an account ID. | ||
| if not self.account_id: | ||
| self._resolve_environment() | ||
|
|
||
| if self.host and (self.host.startswith("https://accounts.") or self.host.startswith("https://accounts-dod.")): | ||
| self._resolve_environment() | ||
|
|
||
| def _fetch_workspace_id(self) -> Optional[str]: | ||
| """Fetch the workspace ID from the host.""" | ||
| headers = self.authenticate() | ||
| headers["User-Agent"] = f"{self.user_agent} sdk-feature/sql-http-path" | ||
| response = requests.get(f"{self.host}/api/2.0/preview/scim/v2/Me", headers=headers) | ||
| response.raise_for_status() | ||
| # get workspace ID from the response header | ||
| return response.headers.get("x-databricks-org-id") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a bug fix. Before, this:
would actually set