openml · EmanAbdelhaleem · Dec 30, 2025 · Dec 31, 2025 · Jan 1, 2026 · Jan 5, 2026
diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py
@@ -0,0 +1,8 @@
+from openml._api.runtime.core import APIContext
+
+
+def set_api_version(version: str, *, strict: bool = False) -> None:
+    api_context.set_version(version=version, strict=strict)
+
+
+api_context = APIContext()
diff --git a/openml/_api/config.py b/openml/_api/config.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+API_V1_SERVER = "https://www.openml.org/api/v1/xml"
+API_V2_SERVER = "http://127.0.0.1:8001"
+API_KEY = "..."
diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py
@@ -0,0 +1,3 @@
+from openml._api.http.client import HTTPClient
+
+__all__ = ["HTTPClient"]
diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import Any, Mapping
+
+import requests
+from requests import Response
+
+from openml.__version__ import __version__
+
+
+class HTTPClient:
+    def __init__(self, base_url: str) -> None:
+        self.base_url = base_url
+        self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
+
+    def get(
+        self,
+        path: str,
+        params: Mapping[str, Any] | None = None,
+    ) -> Response:
+        url = f"{self.base_url}/{path}"
+        return requests.get(url, params=params, headers=self.headers, timeout=10)
+
+    def post(
+        self,
+        path: str,
+        data: Mapping[str, Any] | None = None,
+        files: Any = None,
+    ) -> Response:
+        url = f"{self.base_url}/{path}"
+        return requests.post(url, data=data, files=files, headers=self.headers, timeout=10)
+
+    def delete(
+        self,
+        path: str,
+        params: Mapping[str, Any] | None = None,
+    ) -> Response:
+        url = f"{self.base_url}/{path}"
+        return requests.delete(url, params=params, headers=self.headers, timeout=10)
diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py
diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py
@@ -0,0 +1,5 @@
+from openml._api.resources.datasets import DatasetsV1, DatasetsV2
+from openml._api.resources.evaluations import EvaluationsV1, EvaluationsV2
+from openml._api.resources.tasks import TasksV1, TasksV2
+
+__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2", "EvaluationsV1", "EvaluationsV2"]
diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from requests import Response
+
+    from openml._api.http import HTTPClient
+    from openml.datasets.dataset import OpenMLDataset
+    from openml.evaluations.evaluation import OpenMLEvaluation
+    from openml.tasks.task import OpenMLTask
+
+
+class ResourceAPI:
+    def __init__(self, http: HTTPClient):
+        self._http = http
+
+
+class DatasetsAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...
+
+
+class TasksAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def get(
+        self,
+        task_id: int,
+        *,
+        return_response: bool = False,
+    ) -> OpenMLTask | tuple[OpenMLTask, Response]: ...
+
+
+class EvaluationsAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def list(self, api_call: str) -> list[OpenMLEvaluation]: ...
diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml._api.resources.base import DatasetsAPI
+
+if TYPE_CHECKING:
+    from responses import Response
+
+    from openml.datasets.dataset import OpenMLDataset
+
+
+class DatasetsV1(DatasetsAPI):
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
+        raise NotImplementedError
+
+
+class DatasetsV2(DatasetsAPI):
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
+        raise NotImplementedError
diff --git a/openml/_api/resources/evaluations.py b/openml/_api/resources/evaluations.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+import json
+
+import xmltodict
+
+from openml._api.resources.base import EvaluationsAPI
+from openml.evaluations.evaluation import OpenMLEvaluation
+
+
+class EvaluationsV1(EvaluationsAPI):
+    """V1 API implementation for evaluations.
+    Fetches evaluations from the v1 XML API endpoint.
+    """
+
+    def list(self, api_call: str) -> list[OpenMLEvaluation]:
+        """Fetch and list evaluations from the OpenML API.
+
+        Makes an API call to retrieve evaluation results, parses the XML response,
+        and converts it into OpenMLEvaluation objects.
+
+        Parameters
+        ----------
+        api_call : str
+            The API endpoint path (without base URL) to call for evaluations.
+            Example: "evaluation/list/function/predictive_accuracy/limit/10
+
+        Returns
+        -------
+        list[OpenMLEvaluation]
+            A list of OpenMLEvaluation objects containing the parsed evaluations.
+
+        Raises
+        ------
+        ValueError
+            If the XML response does not contain the expected structure.
+        AssertionError
+            If the evaluation data is not in list format as expected.
+
+        Notes
+        -----
+        This method performs two API calls:
+        1. Fetches evaluation data from the specified endpoint
+        2. Fetches user information for all uploaders in the evaluation data
+
+        The user information is used to map uploader IDs to usernames.
+        """
+        eval_response = self._http.get(api_call)
+        xml_content = eval_response.text
+
+        evals_dict = xmltodict.parse(xml_content, force_list=("oml:evaluation",))
+        # Minimalistic check if the XML is useful
+        if "oml:evaluations" not in evals_dict:
+            raise ValueError(
+                "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
+            )
+
+        assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
+            evals_dict["oml:evaluations"]["oml:evaluation"],
+        )
+
+        uploader_ids = list(
+            {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]},
+        )
+        api_users = "user/list/user_id/" + ",".join(uploader_ids)
+        user_response = self._http.get(api_users)
+        xml_content_user = user_response.text
+
+        users = xmltodict.parse(xml_content_user, force_list=("oml:user",))
+        user_dict = {
+            user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]
+        }
+
+        evals = []
+        for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]:
+            run_id = int(eval_["oml:run_id"])
+            value = float(eval_["oml:value"]) if "oml:value" in eval_ else None
+            values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None
+            array_data = eval_.get("oml:array_data")
+
+            evals.append(
+                OpenMLEvaluation(
+                    run_id=run_id,
+                    task_id=int(eval_["oml:task_id"]),
+                    setup_id=int(eval_["oml:setup_id"]),
+                    flow_id=int(eval_["oml:flow_id"]),
+                    flow_name=eval_["oml:flow_name"],
+                    data_id=int(eval_["oml:data_id"]),
+                    data_name=eval_["oml:data_name"],
+                    function=eval_["oml:function"],
+                    upload_time=eval_["oml:upload_time"],
+                    uploader=int(eval_["oml:uploader"]),
+                    uploader_name=user_dict[eval_["oml:uploader"]],
+                    value=value,
+                    values=values,
+                    array_data=array_data,
+                )
+            )
+
+        return evals
+
+
+class EvaluationsV2(EvaluationsAPI):
+    """V2 API implementation for evaluations.
+    Fetches evaluations from the v2 json API endpoint.
+    """
+
+    def list(self, api_call: str) -> list[OpenMLEvaluation]:
+        """Fetch and list evaluations from the OpenML API.
+
+        Makes an API call to retrieve evaluation results, parses the json response,
+        and converts it into OpenMLEvaluation objects.
+
+        Parameters
+        ----------
+        api_call : str
+            The API endpoint path (without base URL) to call for evaluations.
+            Example: "evaluation/list/function/predictive_accuracy/limit/10
+
+        Returns
+        -------
+        list[OpenMLEvaluation]
+            A list of OpenMLEvaluation objects containing the parsed evaluations.
+
+        Raises
+        ------
+        NotImplementedError
+
+        Notes
+        -----
+        This method performs two API calls:
+        1. Fetches evaluation data from the specified endpoint
+        2. Fetches user information for all uploaders in the evaluation data
+
+        The user information is used to map uploader IDs to usernames.
+        """
+        raise NotImplementedError("V2 API implementation is not yet available")
diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py
@@ -0,0 +1,128 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import xmltodict
+
+from openml._api.resources.base import TasksAPI
+from openml.tasks.task import (
+    OpenMLClassificationTask,
+    OpenMLClusteringTask,
+    OpenMLLearningCurveTask,
+    OpenMLRegressionTask,
+    OpenMLTask,
+    TaskType,
+)
+
+if TYPE_CHECKING:
+    from requests import Response
+
+
+class TasksV1(TasksAPI):
+    def get(
+        self,
+        task_id: int,
+        *,
+        return_response: bool = False,
+    ) -> OpenMLTask | tuple[OpenMLTask, Response]:
+        path = f"task/{task_id}"
+        response = self._http.get(path)
+        xml_content = response.text
+        task = self._create_task_from_xml(xml_content)
+
+        if return_response:
+            return task, response
+
+        return task
+
+    def _create_task_from_xml(self, xml: str) -> OpenMLTask:
+        """Create a task given a xml string.
+
+        Parameters
+        ----------
+        xml : string
+            Task xml representation.
+
+        Returns
+        -------
+        OpenMLTask
+        """
+        dic = xmltodict.parse(xml)["oml:task"]
+        estimation_parameters = {}
+        inputs = {}
+        # Due to the unordered structure we obtain, we first have to extract
+        # the possible keys of oml:input; dic["oml:input"] is a list of
+        # OrderedDicts
+
+        # Check if there is a list of inputs
+        if isinstance(dic["oml:input"], list):
+            for input_ in dic["oml:input"]:
+                name = input_["@name"]
+                inputs[name] = input_
+        # Single input case
+        elif isinstance(dic["oml:input"], dict):
+            name = dic["oml:input"]["@name"]
+            inputs[name] = dic["oml:input"]
+
+        evaluation_measures = None
+        if "evaluation_measures" in inputs:
+            evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
+                "oml:evaluation_measure"
+            ]
+
+        task_type = TaskType(int(dic["oml:task_type_id"]))
+        common_kwargs = {
+            "task_id": dic["oml:task_id"],
+            "task_type": dic["oml:task_type"],
+            "task_type_id": task_type,
+            "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
+            "evaluation_measure": evaluation_measures,
+        }
+        # TODO: add OpenMLClusteringTask?
+        if task_type in (
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.LEARNING_CURVE,
+        ):
+            # Convert some more parameters
+            for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
+                "oml:parameter"
+            ]:
+                name = parameter["@name"]
+                text = parameter.get("#text", "")
+                estimation_parameters[name] = text
+
+            common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
+                "oml:estimation_procedure"
+            ]["oml:type"]
+            common_kwargs["estimation_procedure_id"] = int(
+                inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
+            )
+
+            common_kwargs["estimation_parameters"] = estimation_parameters
+            common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
+                "oml:target_feature"
+            ]
+            common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
+                "oml:estimation_procedure"
+            ]["oml:data_splits_url"]
+
+        cls = {
+            TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+            TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+            TaskType.CLUSTERING: OpenMLClusteringTask,
+            TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+        }.get(task_type)
+        if cls is None:
+            raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
+        return cls(**common_kwargs)  # type: ignore
+
+
+class TasksV2(TasksAPI):
+    def get(
+        self,
+        task_id: int,
+        *,
+        return_response: bool = False,
+    ) -> OpenMLTask | tuple[OpenMLTask, Response]:
+        raise NotImplementedError
diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from openml._api.http.client import HTTPClient

		__all__ = ["HTTPClient"]