openml · PGijsbers · Jun 18, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
diff --git a/.gitignore b/.gitignore
@@ -94,3 +94,4 @@ dmypy.sock
 
 # Tests
 .pytest_cache
+.venv
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -168,6 +168,7 @@ to create a pull request from your fork.
 (If any of the above seems like magic to you, please look up the
 [Git documentation](https://git-scm.com/documentation) on the web, or ask a friend or another contributor for help.)
 
+
 ## Pre-commit Details
 [Pre-commit](https://pre-commit.com/) is used for various style checking and code formatting.
 Before each commit, it will automatically run:

diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -119,7 +119,7 @@ def print_compare_runtimes(measures):
 )
 for repeat, val1 in measures["predictive_accuracy"].items():
     for fold, val2 in val1.items():
-        print("Repeat #{}-Fold #{}: {:.4f}".format(repeat, fold, val2))
+        print(f"Repeat #{repeat}-Fold #{fold}: {val2:.4f}")
     print()
 
 ################################################################################
@@ -242,7 +242,7 @@ def print_compare_runtimes(measures):
 # the 2-fold (inner) CV search performed.
 
 # We earlier extracted the number of repeats and folds for this task:
-print("# repeats: {}\n# folds: {}".format(n_repeats, n_folds))
+print(f"# repeats: {n_repeats}\n# folds: {n_folds}")
 
 # To extract the training runtime of the first repeat, first fold:
 print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0])

diff --git a/openml/base.py b/openml/base.py
@@ -78,7 +78,7 @@ def _apply_repr_template(
             self.__class__.__name__[len("OpenML") :],
         )
         header_text = f"OpenML {name_with_spaces}"
-        header = "{}\n{}\n".format(header_text, "=" * len(header_text))
+        header = f"{header_text}\n{'=' * len(header_text)}\n"
 
         _body_fields: list[tuple[str, str | int | list[str]]] = [
             (k, "None" if v is None else v) for k, v in body_fields

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -191,7 +191,7 @@ def _list_datasets(
             if value is not None:
                 api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
+        api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}"
     return __list_datasets(api_call=api_call)
 
 

diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py
@@ -100,7 +100,7 @@ def _to_dict(self) -> dict:
 
     def __repr__(self) -> str:
         header = "OpenML Evaluation"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "Upload Date": self.upload_time,

diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -204,15 +204,15 @@ def _list_evaluations(  # noqa: C901
             if value is not None:
                 api_call += f"/{operator}/{value}"
     if tasks is not None:
-        api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
+        api_call += f"/task/{','.join([str(int(i)) for i in tasks])}"
     if setups is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setups])}"
     if flows is not None:
-        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
+        api_call += f"/flow/{','.join([str(int(i)) for i in flows])}"
     if runs is not None:
-        api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
+        api_call += f"/run/{','.join([str(int(i)) for i in runs])}"
     if uploaders is not None:
-        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
+        api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}"
     if study is not None:
         api_call += f"/study/{study}"
     if sort_order is not None:

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -223,7 +223,7 @@ def remove_all_in_parentheses(string: str) -> str:
             # then the pipeline steps are formatted e.g.:
             # step1name=sklearn.submodule.ClassName,step2name...
             components = [component.split(".")[-1] for component in pipeline.split(",")]
-            pipeline = "{}({})".format(pipeline_class, ",".join(components))
+            pipeline = f"{pipeline_class}({','.join(components)})"
             if len(short_name.format(pipeline)) > extra_trim_length:
                 pipeline = f"{pipeline_class}(...,{components[-1]})"
         else:
@@ -482,9 +482,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
             )
         else:
             raise TypeError(o)
-        logger.info(
-            "-{} flow_to_sklearn END   o={}, rval={}".format("-" * recursion_depth, o, rval)
-        )
+        logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}")
         return rval
 
     def model_to_flow(self, model: Any) -> OpenMLFlow:
@@ -574,7 +572,7 @@ def get_version_information(self) -> list[str]:
         import sklearn
 
         major, minor, micro, _, _ = sys.version_info
-        python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)]))
+        python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}."
         sklearn_version = f"Sklearn_{sklearn.__version__}."
         numpy_version = f"NumPy_{numpy.__version__}."  # type: ignore
         scipy_version = f"SciPy_{scipy.__version__}."
@@ -628,7 +626,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         """
 
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * "-")
+            return f"{s}\n{len(s) * '-'}\n"
 
         s = inspect.getdoc(model)
         if s is None:
@@ -680,7 +678,7 @@ def _extract_sklearn_parameter_docstring(self, model) -> None | str:
         """
 
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * "-")
+            return f"{s}\n{len(s) * '-'}\n"
 
         s = inspect.getdoc(model)
         if s is None:
@@ -689,7 +687,7 @@ def match_format(s):
             index1 = s.index(match_format("Parameters"))
         except ValueError as e:
             # when sklearn docstring has no 'Parameters' section
-            logger.warning("{} {}".format(match_format("Parameters"), e))
+            logger.warning(f"{match_format('Parameters')} {e}")
             return None
 
         headings = ["Attributes", "Notes", "See also", "Note", "References"]
@@ -1151,7 +1149,7 @@ def _deserialize_model(  # noqa: C901
         recursion_depth: int,
         strict_version: bool = True,  # noqa: FBT002, FBT001
     ) -> Any:
-        logger.info("-{} deserialize {}".format("-" * recursion_depth, flow.name))
+        logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}")
         model_name = flow.class_name
         self._check_dependencies(flow.dependencies, strict_version=strict_version)
 
@@ -1168,9 +1166,7 @@ def _deserialize_model(  # noqa: C901
 
         for name in parameters:
             value = parameters.get(name)
-            logger.info(
-                "--{} flow_parameter={}, value={}".format("-" * recursion_depth, name, value)
-            )
+            logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}")
             rval = self._deserialize_sklearn(
                 value,
                 components=components_,
@@ -1186,9 +1182,7 @@ def _deserialize_model(  # noqa: C901
             if name not in components_:
                 continue
             value = components[name]
-            logger.info(
-                "--{} flow_component={}, value={}".format("-" * recursion_depth, name, value)
-            )
+            logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}")
             rval = self._deserialize_sklearn(
                 value,
                 recursion_depth=recursion_depth + 1,

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -1154,15 +1154,15 @@ def _list_runs(  # noqa: PLR0913, C901
     if offset is not None:
         api_call += f"/offset/{offset}"
     if id is not None:
-        api_call += "/run/{}".format(",".join([str(int(i)) for i in id]))
+        api_call += f"/run/{','.join([str(int(i)) for i in id])}"
     if task is not None:
-        api_call += "/task/{}".format(",".join([str(int(i)) for i in task]))
+        api_call += f"/task/{','.join([str(int(i)) for i in task])}"
     if setup is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setup])}"
     if flow is not None:
-        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow]))
+        api_call += f"/flow/{','.join([str(int(i)) for i in flow])}"
     if uploader is not None:
-        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader]))
+        api_call += f"/uploader/{','.join([str(int(i)) for i in uploader])}"
     if study is not None:
         api_call += "/study/%d" % study
     if display_errors:

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
@@ -207,7 +207,7 @@ def _list_setups(
     if offset is not None:
         api_call += f"/offset/{offset}"
     if setup is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setup])}"
     if flow is not None:
         api_call += f"/flow/{flow}"
     if tag is not None:

diff --git a/openml/setups/setup.py b/openml/setups/setup.py
@@ -45,7 +45,7 @@ def _to_dict(self) -> dict[str, Any]:
 
     def __repr__(self) -> str:
         header = "OpenML Setup"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "Setup ID": self.setup_id,
@@ -125,7 +125,7 @@ def _to_dict(self) -> dict[str, Any]:
 
     def __repr__(self) -> str:
         header = "OpenML Parameter"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "ID": self.id,
@@ -137,7 +137,7 @@ def __repr__(self) -> str:
         }
         # indented prints for parameter attributes
         # indention = 2 spaces + 1 | + 2 underscores
-        indent = "{}|{}".format(" " * 2, "_" * 2)
+        indent = f"{' ' * 2}|{'_' * 2}"
         parameter_data_type = f"{indent}Data Type"
         fields[parameter_data_type] = self.data_type
         parameter_default = f"{indent}Default"

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -521,7 +521,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
         TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
     }.get(task_type)
     if cls is None:
-        raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"]))
+        raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
     return cls(**common_kwargs)  # type: ignore
 
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -109,7 +109,7 @@ def delete_remote_files(tracker, flow_names) -> None:
 
     # deleting all collected entities published to test server
     # 'run's are deleted first to prevent dependency issue of entities on deletion
-    logger.info("Entity Types: {}".format(["run", "data", "flow", "task", "study"]))
+    logger.info(f"Entity Types: {['run', 'data', 'flow', 'task', 'study']}")
     for entity_type in ["run", "data", "flow", "task", "study"]:
         logger.info(f"Deleting {entity_type}s...")
         for _i, entity in enumerate(tracker[entity_type]):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -94,3 +94,4 @@ dmypy.sock

		# Tests
		.pytest_cache
		.venv