-
-
Notifications
You must be signed in to change notification settings - Fork 211
[ENH] V1 → V2 API Migration - core structure #1576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0159f47
58e9175
bdd65ff
52ef379
5dfcbce
2acbe99
af99880
74ab366
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| from openml._api.runtime.core import APIContext | ||
|
|
||
|
|
||
| def set_api_version(version: str, *, strict: bool = False) -> None: | ||
| api_context.set_version(version=version, strict=strict) | ||
|
|
||
|
|
||
| api_context = APIContext() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
| from typing import Literal | ||
|
|
||
| DelayMethod = Literal["human", "robot"] | ||
|
|
||
|
|
||
| @dataclass | ||
| class APIConfig: | ||
| server: str | ||
| base_url: str | ||
| key: str | ||
| timeout: int = 10 # seconds | ||
|
|
||
|
|
||
| @dataclass | ||
| class APISettings: | ||
| v1: APIConfig | ||
| v2: APIConfig | ||
|
|
||
|
|
||
| @dataclass | ||
| class ConnectionConfig: | ||
| retries: int = 3 | ||
| delay_method: DelayMethod = "human" | ||
| delay_time: int = 1 # seconds | ||
|
|
||
| def __post_init__(self) -> None: | ||
| if self.delay_method not in ("human", "robot"): | ||
| raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") | ||
|
|
||
|
|
||
| @dataclass | ||
| class CacheConfig: | ||
| dir: str = "~/.openml/cache" | ||
| ttl: int = 60 * 60 * 24 * 7 # one week | ||
|
|
||
|
|
||
| @dataclass | ||
| class Settings: | ||
| api: APISettings | ||
| connection: ConnectionConfig | ||
| cache: CacheConfig | ||
|
|
||
|
|
||
| settings = Settings( | ||
| api=APISettings( | ||
| v1=APIConfig( | ||
| server="https://www.openml.org/", | ||
| base_url="api/v1/xml/", | ||
| key="...", | ||
| ), | ||
| v2=APIConfig( | ||
| server="http://127.0.0.1:8001/", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be hardcoded? I guess this is just for your local development
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is hard-coded, they are the default values though the local endpoints will be replaced by remote server when deployed hopefully before merging this in main |
||
| base_url="", | ||
| key="...", | ||
| ), | ||
| ), | ||
| connection=ConnectionConfig(), | ||
| cache=CacheConfig(), | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| from openml._api.http.client import HTTPClient | ||
|
|
||
| __all__ = ["HTTPClient"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,151 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
| from typing import TYPE_CHECKING, Any | ||
| from urllib.parse import urlencode, urljoin, urlparse | ||
|
|
||
| import requests | ||
| from requests import Response | ||
|
|
||
| from openml.__version__ import __version__ | ||
| from openml._api.config import settings | ||
|
|
||
| if TYPE_CHECKING: | ||
| from openml._api.config import APIConfig | ||
|
|
||
|
|
||
| class CacheMixin: | ||
| @property | ||
| def dir(self) -> str: | ||
| return settings.cache.dir | ||
|
|
||
| @property | ||
| def ttl(self) -> int: | ||
| return settings.cache.ttl | ||
|
|
||
| def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: | ||
| parsed_url = urlparse(url) | ||
| netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain | ||
| path_parts = parsed_url.path.strip("/").split("/") | ||
|
|
||
| # remove api_key and serialize params if any | ||
| filtered_params = {k: v for k, v in params.items() if k != "api_key"} | ||
| params_part = [urlencode(filtered_params)] if filtered_params else [] | ||
|
|
||
| return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) | ||
|
|
||
| def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 | ||
| return Response() | ||
|
|
||
| def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 | ||
| return None | ||
|
|
||
|
|
||
| class HTTPClient(CacheMixin): | ||
| def __init__(self, config: APIConfig) -> None: | ||
| self.config = config | ||
| self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} | ||
|
|
||
| @property | ||
| def server(self) -> str: | ||
| return self.config.server | ||
|
|
||
| @property | ||
| def base_url(self) -> str: | ||
| return self.config.base_url | ||
|
|
||
| @property | ||
| def key(self) -> str: | ||
| return self.config.key | ||
|
|
||
| @property | ||
| def timeout(self) -> int: | ||
| return self.config.timeout | ||
|
|
||
| def request( | ||
| self, | ||
| method: str, | ||
| path: str, | ||
| *, | ||
| use_cache: bool = False, | ||
| use_api_key: bool = False, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| url = urljoin(self.server, urljoin(self.base_url, path)) | ||
|
|
||
| params = request_kwargs.pop("params", {}) | ||
| params = params.copy() | ||
| if use_api_key: | ||
| params["api_key"] = self.key | ||
|
|
||
| headers = request_kwargs.pop("headers", {}) | ||
| headers = headers.copy() | ||
| headers.update(self.headers) | ||
|
|
||
| timeout = request_kwargs.pop("timeout", self.timeout) | ||
| cache_dir = self._get_cache_dir(url, params) | ||
|
|
||
| if use_cache: | ||
| try: | ||
| return self._get_cache_response(cache_dir) | ||
| # TODO: handle ttl expired error | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The PR is out of draft, but this caching is not implemented. I guess this is out of scope for this PR.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes the PR is currently a draft, should I mark it with draft as well? There are a bunch of work items that I'll separate if they can worked without affecting derived PRs, otherwise implement myself. For caching specifically I plan to implement it myself otherwise stacking is going to be challenging.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general a draft marks a PR where changes are not finalized. I do it the following way: If a PR is not finished and needs developer input for implementation I open a draft. If I think the PR is finished and can be merged, then I change it to a normal PR. But I cannot find an explanation that matches my procedure, so I guess this is just my practice.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. I'll put this as draft since that doesn't hurt it from getting reviewed or other people working on top of it. |
||
| except Exception: | ||
| raise | ||
|
|
||
| response = requests.request( | ||
| method=method, | ||
| url=url, | ||
| params=params, | ||
| headers=headers, | ||
| timeout=timeout, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| if use_cache: | ||
| self._set_cache_response(cache_dir, response) | ||
|
|
||
| return response | ||
|
|
||
| def get( | ||
| self, | ||
| path: str, | ||
| *, | ||
| use_cache: bool = False, | ||
| use_api_key: bool = False, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| # TODO: remove override when cache is implemented | ||
| use_cache = False | ||
| return self.request( | ||
| method="GET", | ||
| path=path, | ||
| use_cache=use_cache, | ||
| use_api_key=use_api_key, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| def post( | ||
| self, | ||
| path: str, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| return self.request( | ||
| method="POST", | ||
| path=path, | ||
| use_cache=False, | ||
| use_api_key=True, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| def delete( | ||
| self, | ||
| path: str, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| return self.request( | ||
| method="DELETE", | ||
| path=path, | ||
| use_cache=False, | ||
| use_api_key=True, | ||
| **request_kwargs, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| from openml._api.resources.datasets import DatasetsV1, DatasetsV2 | ||
| from openml._api.resources.tasks import TasksV1, TasksV2 | ||
|
|
||
| __all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from abc import ABC, abstractmethod | ||
| from typing import TYPE_CHECKING | ||
|
|
||
| if TYPE_CHECKING: | ||
| from requests import Response | ||
|
|
||
| from openml._api.http import HTTPClient | ||
| from openml.datasets.dataset import OpenMLDataset | ||
| from openml.tasks.task import OpenMLTask | ||
|
|
||
|
|
||
| class ResourceAPI: | ||
| def __init__(self, http: HTTPClient): | ||
| self._http = http | ||
|
|
||
|
|
||
| class DatasetsAPI(ResourceAPI, ABC): | ||
| @abstractmethod | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... | ||
|
|
||
|
|
||
| class TasksAPI(ResourceAPI, ABC): | ||
| @abstractmethod | ||
| def get( | ||
| self, | ||
| task_id: int, | ||
| *, | ||
| return_response: bool = False, | ||
| ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING | ||
|
|
||
| from openml._api.resources.base import DatasetsAPI | ||
|
|
||
| if TYPE_CHECKING: | ||
| from responses import Response | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In production this would be requests, right? You used responses for the mocking here during development.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes this should should be requests, I'll fix it. |
||
|
|
||
| from openml.datasets.dataset import OpenMLDataset | ||
|
|
||
|
|
||
| class DatasetsV1(DatasetsAPI): | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: | ||
| raise NotImplementedError | ||
|
|
||
|
|
||
| class DatasetsV2(DatasetsAPI): | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: | ||
| raise NotImplementedError | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would move the settings to the individual classes. I think this design introduces too high coupling of the classes to this file. You cannot move the classes around, or add a new API version without making non-extensible changes to this file here - because
APISettingswill require a constructor change and new classes it accepts.Instead, a better design is to apply the strategy pattern cleanly to the different API definitions - v1 and v2 - and move the config either to their
__init__, or aset_config(or similar) method.