Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
5 changes: 5 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

API_V1_SERVER = "https://www.openml.org/api/v1/xml"
API_V2_SERVER = "http://127.0.0.1:8001"
API_KEY = "..."
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
39 changes: 39 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any, Mapping

import requests
from requests import Response

from openml.__version__ import __version__


class HTTPClient:
def __init__(self, base_url: str) -> None:
self.base_url = base_url
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

def get(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.get(url, params=params, headers=self.headers, timeout=10)

def post(
self,
path: str,
data: Mapping[str, Any] | None = None,
files: Any = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.post(url, data=data, files=files, headers=self.headers, timeout=10)

def delete(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.delete(url, params=params, headers=self.headers, timeout=10)
Empty file added openml/_api/http/utils.py
Empty file.
5 changes: 5 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.evaluations import EvaluationsV1, EvaluationsV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2", "EvaluationsV1", "EvaluationsV2"]
37 changes: 37 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataset
from openml.evaluations.evaluation import OpenMLEvaluation
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...


class EvaluationsAPI(ResourceAPI, ABC):
@abstractmethod
def list(self, api_call: str) -> list[OpenMLEvaluation]: ...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
137 changes: 137 additions & 0 deletions openml/_api/resources/evaluations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from __future__ import annotations

import json

import xmltodict

from openml._api.resources.base import EvaluationsAPI
from openml.evaluations.evaluation import OpenMLEvaluation


class EvaluationsV1(EvaluationsAPI):
"""V1 API implementation for evaluations.
Fetches evaluations from the v1 XML API endpoint.
"""

def list(self, api_call: str) -> list[OpenMLEvaluation]:
"""Fetch and list evaluations from the OpenML API.

Makes an API call to retrieve evaluation results, parses the XML response,
and converts it into OpenMLEvaluation objects.

Parameters
----------
api_call : str
The API endpoint path (without base URL) to call for evaluations.
Example: "evaluation/list/function/predictive_accuracy/limit/10

Returns
-------
list[OpenMLEvaluation]
A list of OpenMLEvaluation objects containing the parsed evaluations.

Raises
------
ValueError
If the XML response does not contain the expected structure.
AssertionError
If the evaluation data is not in list format as expected.

Notes
-----
This method performs two API calls:
1. Fetches evaluation data from the specified endpoint
2. Fetches user information for all uploaders in the evaluation data

The user information is used to map uploader IDs to usernames.
"""
eval_response = self._http.get(api_call)
xml_content = eval_response.text

evals_dict = xmltodict.parse(xml_content, force_list=("oml:evaluation",))
# Minimalistic check if the XML is useful
if "oml:evaluations" not in evals_dict:
raise ValueError(
"Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
)

assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
evals_dict["oml:evaluations"]["oml:evaluation"],
)

uploader_ids = list(
{eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]},
)
api_users = "user/list/user_id/" + ",".join(uploader_ids)
user_response = self._http.get(api_users)
xml_content_user = user_response.text

users = xmltodict.parse(xml_content_user, force_list=("oml:user",))
user_dict = {
user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]
}

evals = []
for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]:
run_id = int(eval_["oml:run_id"])
value = float(eval_["oml:value"]) if "oml:value" in eval_ else None
values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None
array_data = eval_.get("oml:array_data")

evals.append(
OpenMLEvaluation(
run_id=run_id,
task_id=int(eval_["oml:task_id"]),
setup_id=int(eval_["oml:setup_id"]),
flow_id=int(eval_["oml:flow_id"]),
flow_name=eval_["oml:flow_name"],
data_id=int(eval_["oml:data_id"]),
data_name=eval_["oml:data_name"],
function=eval_["oml:function"],
upload_time=eval_["oml:upload_time"],
uploader=int(eval_["oml:uploader"]),
uploader_name=user_dict[eval_["oml:uploader"]],
value=value,
values=values,
array_data=array_data,
)
)

return evals


class EvaluationsV2(EvaluationsAPI):
"""V2 API implementation for evaluations.
Fetches evaluations from the v2 json API endpoint.
"""

def list(self, api_call: str) -> list[OpenMLEvaluation]:
"""Fetch and list evaluations from the OpenML API.

Makes an API call to retrieve evaluation results, parses the json response,
and converts it into OpenMLEvaluation objects.

Parameters
----------
api_call : str
The API endpoint path (without base URL) to call for evaluations.
Example: "evaluation/list/function/predictive_accuracy/limit/10

Returns
-------
list[OpenMLEvaluation]
A list of OpenMLEvaluation objects containing the parsed evaluations.

Raises
------
NotImplementedError

Notes
-----
This method performs two API calls:
1. Fetches evaluation data from the specified endpoint
2. Fetches user information for all uploaders in the evaluation data

The user information is used to map uploader IDs to usernames.
"""
raise NotImplementedError("V2 API implementation is not yet available")
128 changes: 128 additions & 0 deletions openml/_api/resources/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import xmltodict

from openml._api.resources.base import TasksAPI
from openml.tasks.task import (
OpenMLClassificationTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
OpenMLRegressionTask,
OpenMLTask,
TaskType,
)

if TYPE_CHECKING:
from requests import Response


class TasksV1(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
path = f"task/{task_id}"
response = self._http.get(path)
xml_content = response.text
task = self._create_task_from_xml(xml_content)

if return_response:
return task, response

return task

def _create_task_from_xml(self, xml: str) -> OpenMLTask:
"""Create a task given a xml string.

Parameters
----------
xml : string
Task xml representation.

Returns
-------
OpenMLTask
"""
dic = xmltodict.parse(xml)["oml:task"]
estimation_parameters = {}
inputs = {}
# Due to the unordered structure we obtain, we first have to extract
# the possible keys of oml:input; dic["oml:input"] is a list of
# OrderedDicts

# Check if there is a list of inputs
if isinstance(dic["oml:input"], list):
for input_ in dic["oml:input"]:
name = input_["@name"]
inputs[name] = input_
# Single input case
elif isinstance(dic["oml:input"], dict):
name = dic["oml:input"]["@name"]
inputs[name] = dic["oml:input"]

evaluation_measures = None
if "evaluation_measures" in inputs:
evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
"oml:evaluation_measure"
]

task_type = TaskType(int(dic["oml:task_type_id"]))
common_kwargs = {
"task_id": dic["oml:task_id"],
"task_type": dic["oml:task_type"],
"task_type_id": task_type,
"data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
"evaluation_measure": evaluation_measures,
}
# TODO: add OpenMLClusteringTask?
if task_type in (
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
# Convert some more parameters
for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
"oml:parameter"
]:
name = parameter["@name"]
text = parameter.get("#text", "")
estimation_parameters[name] = text

common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:type"]
common_kwargs["estimation_procedure_id"] = int(
inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
)

common_kwargs["estimation_parameters"] = estimation_parameters
common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
"oml:target_feature"
]
common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:data_splits_url"]

cls = {
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
return cls(**common_kwargs) # type: ignore


class TasksV2(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
raise NotImplementedError
Empty file added openml/_api/runtime/__init__.py
Empty file.
Loading
Loading