Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
62 changes: 62 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Literal

DelayMethod = Literal["human", "robot"]


@dataclass
class APIConfig:
server: str
base_url: str
key: str
timeout: int = 10 # seconds


@dataclass
class APISettings:
v1: APIConfig
v2: APIConfig


@dataclass
class ConnectionConfig:
retries: int = 3
delay_method: DelayMethod = "human"
delay_time: int = 1 # seconds

def __post_init__(self) -> None:
if self.delay_method not in ("human", "robot"):
raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}")


@dataclass
class CacheConfig:
dir: str = "~/.openml/cache"
ttl: int = 60 * 60 * 24 * 7 # one week


@dataclass
class Settings:
api: APISettings
connection: ConnectionConfig
cache: CacheConfig


settings = Settings(
api=APISettings(
v1=APIConfig(
server="https://www.openml.org/",
base_url="api/v1/xml/",
key="...",
),
v2=APIConfig(
server="http://127.0.0.1:8001/",
base_url="",
key="...",
),
),
connection=ConnectionConfig(),
cache=CacheConfig(),
)
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
151 changes: 151 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urljoin, urlparse

import requests
from requests import Response

from openml.__version__ import __version__
from openml._api.config import settings

if TYPE_CHECKING:
from openml._api.config import APIConfig


class CacheMixin:
@property
def dir(self) -> str:
return settings.cache.dir

@property
def ttl(self) -> int:
return settings.cache.ttl

def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path:
parsed_url = urlparse(url)
netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain
path_parts = parsed_url.path.strip("/").split("/")

# remove api_key and serialize params if any
filtered_params = {k: v for k, v in params.items() if k != "api_key"}
params_part = [urlencode(filtered_params)] if filtered_params else []

return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part)

def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002
return Response()

def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002
return None


class HTTPClient(CacheMixin):
def __init__(self, config: APIConfig) -> None:
self.config = config
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

@property
def server(self) -> str:
return self.config.server

@property
def base_url(self) -> str:
return self.config.base_url

@property
def key(self) -> str:
return self.config.key

@property
def timeout(self) -> int:
return self.config.timeout

def request(
self,
method: str,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
url = urljoin(self.server, urljoin(self.base_url, path))

params = request_kwargs.pop("params", {})
params = params.copy()
if use_api_key:
params["api_key"] = self.key

headers = request_kwargs.pop("headers", {})
headers = headers.copy()
headers.update(self.headers)

timeout = request_kwargs.pop("timeout", self.timeout)
cache_dir = self._get_cache_dir(url, params)

if use_cache:
try:
return self._get_cache_response(cache_dir)
# TODO: handle ttl expired error
except Exception:
raise

response = requests.request(
method=method,
url=url,
params=params,
headers=headers,
timeout=timeout,
**request_kwargs,
)

if use_cache:
self._set_cache_response(cache_dir, response)

return response

def get(
self,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
# TODO: remove override when cache is implemented
use_cache = False
return self.request(
method="GET",
path=path,
use_cache=use_cache,
use_api_key=use_api_key,
**request_kwargs,
)

def post(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="POST",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)

def delete(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="DELETE",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)
Empty file added openml/_api/http/utils.py
Empty file.
5 changes: 5 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.studies import StudiesV1, StudiesV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "StudiesV1", "StudiesV2", "TasksV1", "TasksV2"]
36 changes: 36 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataset
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...


class StudiesAPI(ResourceAPI, ABC):
@abstractmethod
def list(self, **kwargs: Any) -> Any: ...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
39 changes: 39 additions & 0 deletions openml/_api/resources/studies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any

from openml._api.resources.base import StudiesAPI


class StudiesV1(StudiesAPI):
def list(self, **kwargs: Any) -> Any:
limit = kwargs.get("limit")
offset = kwargs.get("offset")
status = kwargs.get("status")
main_entity_type = kwargs.get("main_entity_type")
uploader = kwargs.get("uploader")
benchmark_suite = kwargs.get("benchmark_suite")

api_call = "study/list"

if limit is not None:
api_call += f"/limit/{limit}"
if offset is not None:
api_call += f"/offset/{offset}"
if status is not None:
api_call += f"/status/{status}"
if main_entity_type is not None:
api_call += f"/main_entity_type/{main_entity_type}"
if uploader is not None:
api_call += f"/uploader/{','.join(str(u) for u in uploader)}"
if benchmark_suite is not None:
api_call += f"/benchmark_suite/{benchmark_suite}"

# Make the GET request and return the XML text
response = self._http.get(api_call)
return response.text


class StudiesV2(StudiesAPI):
def list(self, **kwargs: Any) -> Any:
raise NotImplementedError("V2 API implementation is not yet available")
Loading