Skip to content
13 changes: 11 additions & 2 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,11 @@ def _get_arff(self, format: str) -> dict: # noqa: A002
file_size = filepath.stat().st_size
if file_size > MB_120:
raise NotImplementedError(
f"File {filename} too big for {file_size}-bit system ({bits} bytes).",
f"File '{filename}' ({file_size / 1e6:.1f} MB)"
f"exceeds the maximum supported size of 120 MB. "
f"This limitation applies to {bits}-bit systems. "
f"Large dataset handling is currently not fully supported. "
f"Please consider using a smaller dataset"
)

if format.lower() == "arff":
Expand Down Expand Up @@ -780,7 +784,12 @@ def get_data( # noqa: C901
# All the assumptions below for the target are dependant on the number of targets being 1
n_targets = len(target_names)
if n_targets > 1:
raise NotImplementedError(f"Number of targets {n_targets} not implemented.")
raise NotImplementedError(
f"Multi-target prediction is not yet supported."
f"Found {n_targets} target columns: {target_names}. "
f"Currently, only single-target datasets are supported. "
f"Please select a single target column."
)

target_name = target_names[0]
x = data.drop(columns=[target_name])
Expand Down
22 changes: 19 additions & 3 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,12 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913
test_x = None
test_y = None
else:
raise NotImplementedError(task.task_type)
raise NotImplementedError(
f"Task type '{task.task_type}' is not supported. "
f"Only OpenMLSupervisedTask and OpenMLClusteringTask are currently implemented. "
f"Task details: task_id={getattr(task, 'task_id', 'unknown')}, "
f"task_class={task.__class__.__name__}"
)

config.logger.info(
f"Going to run model {model!s} on "
Expand Down Expand Up @@ -982,7 +987,13 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): # type: ignore
if "predictions" not in files and from_server is True:
task = openml.tasks.get_task(task_id)
if task.task_type_id == TaskType.SUBGROUP_DISCOVERY:
raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
raise NotImplementedError(
f"Subgroup discovery tasks are not yet supported. "
f"Task ID: {task_id}. Please check the OpenML documentation"
f"for supported task types. "
f"Currently supported task types: Classification, Regression,"
f"Clustering, and Learning Curve."
)

# JvR: actually, I am not sure whether this error should be raised.
# a run can consist without predictions. But for now let's keep it
Expand Down Expand Up @@ -1282,7 +1293,12 @@ def format_prediction( # noqa: PLR0913
if isinstance(task, OpenMLRegressionTask):
return [repeat, fold, index, prediction, truth]

raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
raise NotImplementedError(
f"Formatting for {type(task)} is not supported."
f"Supported task types: OpenMLClassificationTask, OpenMLRegressionTask,"
f"and OpenMLLearningCurveTask. "
f"Please ensure your task is one of these types."
)


def delete_run(run_id: int) -> bool:
Expand Down
7 changes: 6 additions & 1 deletion openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,12 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
]

else:
raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.")
raise NotImplementedError(
f"Task type '{task.task_type}' is not yet supported. "
f"Supported task types: Classification, Regression, Clustering, Learning Curve. "
f"Task ID: {task.task_id}. "
f"Please check the OpenML documentation for supported task types."
)

return arff_dict

Expand Down
16 changes: 14 additions & 2 deletions openml/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,23 @@ def _to_dict(self) -> dict[str, dict]:

def push_tag(self, tag: str) -> None:
"""Add a tag to the study."""
raise NotImplementedError("Tags for studies is not (yet) supported.")
raise NotImplementedError(
"Tag management for studies is not yet supported. "
"The OpenML Python SDK does not currently provide functionality"
"for adding tags to studies."
"For updates on this feature, please refer to the GitHub issues at: "
"https://github.com/openml/openml-python/issues"
)

def remove_tag(self, tag: str) -> None:
"""Remove a tag from the study."""
raise NotImplementedError("Tags for studies is not (yet) supported.")
raise NotImplementedError(
"Tag management for studies is not yet supported. "
"The OpenML Python SDK does not currently provide functionality"
"for removing tags from studies. "
"For updates on this feature, please refer to the GitHub issues at: "
"https://github.com/openml/openml-python/issues"
)


class OpenMLStudy(BaseStudy):
Expand Down
15 changes: 13 additions & 2 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,12 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
raise NotImplementedError(
f"Task type '{common_kwargs['task_type']}' is not supported. "
f"Supported task types: SUPERVISED_CLASSIFICATION,"
f"SUPERVISED_REGRESSION, CLUSTERING, LEARNING_CURVE."
f"Please check the OpenML documentation for available task types."
)
return cls(**common_kwargs) # type: ignore


Expand Down Expand Up @@ -584,7 +589,13 @@ def create_task(
elif task_type == TaskType.SUPERVISED_REGRESSION:
task_cls = OpenMLRegressionTask # type: ignore
else:
raise NotImplementedError(f"Task type {task_type:d} not supported.")
raise NotImplementedError(
f"Task type ID {task_type:d} is not supported. "
f"Supported task type IDs: {TaskType.SUPERVISED_CLASSIFICATION.value},"
f"{TaskType.SUPERVISED_REGRESSION.value}, "
f"{TaskType.CLUSTERING.value}, {TaskType.LEARNING_CURVE.value}. "
f"Please refer to the TaskType enum for valid task type identifiers."
)

return task_cls(
task_type_id=task_type,
Expand Down
9 changes: 7 additions & 2 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,12 @@ def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
raise NotImplementedError(self.task_type)
raise NotImplementedError(
f"Task type '{self.task_type}' is not implemented for get_X_and_y(). "
f"Supported types: SUPERVISED_CLASSIFICATION, SUPERVISED_REGRESSION,"
f"LEARNING_CURVE."
f"Task ID: {getattr(self, 'task_id', 'unknown')}. "
)

X, y, _, _ = dataset.get_data(target=self.target_name)
return X, y
Expand Down Expand Up @@ -382,7 +387,7 @@ def __init__( # noqa: PLR0913
self.cost_matrix = cost_matrix

if cost_matrix is not None:
raise NotImplementedError("Costmatrix")
raise NotImplementedError("Costmatrix functionality is not yet implemented.")


class OpenMLRegressionTask(OpenMLSupervisedTask):
Expand Down