Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 3 additions & 35 deletions openml/base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
# License: BSD 3-Clause
from __future__ import annotations

import re
import webbrowser
from abc import ABC, abstractmethod
from collections.abc import Iterable, Sequence
from collections.abc import Sequence

import xmltodict

import openml._api_calls
import openml.config
from openml.utils import ReprMixin

from .utils import _get_rest_api_type_alias, _tag_openml_base


class OpenMLBase(ABC):
class OpenMLBase(ReprMixin, ABC):
"""Base object for functionality that is shared across entities."""

def __repr__(self) -> str:
body_fields = self._get_repr_body_fields()
return self._apply_repr_template(body_fields)

@property
@abstractmethod
def id(self) -> int | None:
Expand Down Expand Up @@ -60,34 +56,6 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | N
"""
# Should be implemented in the base class.

def _apply_repr_template(
self,
body_fields: Iterable[tuple[str, str | int | list[str] | None]],
) -> str:
"""Generates the header and formats the body for string representation of the object.

Parameters
----------
body_fields: List[Tuple[str, str]]
A list of (name, value) pairs to display in the body of the __repr__.
"""
# We add spaces between capitals, e.g. ClassificationTask -> Classification Task
name_with_spaces = re.sub(
r"(\w)([A-Z])",
r"\1 \2",
self.__class__.__name__[len("OpenML") :],
)
header_text = f"OpenML {name_with_spaces}"
header = f"{header_text}\n{'=' * len(header_text)}\n"

_body_fields: list[tuple[str, str | int | list[str]]] = [
(k, "None" if v is None else v) for k, v in body_fields
]
longest_field_name_length = max(len(name) for name, _ in _body_fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
return header + body

@abstractmethod
def _to_dict(self) -> dict[str, dict]:
"""Creates a dictionary representation of self.
Expand Down
22 changes: 19 additions & 3 deletions openml/datasets/data_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
if TYPE_CHECKING:
from IPython.lib import pretty

from openml.utils import ReprMixin

class OpenMLDataFeature: # noqa: PLW1641

class OpenMLDataFeature(ReprMixin):
"""
Data Feature (a.k.a. Attribute) object.

Expand Down Expand Up @@ -74,11 +76,25 @@ def __init__( # noqa: PLR0913
self.number_missing_values = number_missing_values
self.ontologies = ontologies

def __repr__(self) -> str:
return f"[{self.index} - {self.name} ({self.data_type})]"
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Index": self.index,
"Name": self.name,
"Data Type": self.data_type,
}

order = [
"Index",
"Name",
"Data Type",
]
return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__

__hash__ = None # type: ignore

Comment on lines 94 to 98
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not so sure about the custom implementation of __hash__, I know it's a requirement from pre-commit but we need to make sure we don't just write a bad implementation to satisfy the pre-commit checks

I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:

If we want to implement __hash__, given the implementation of __eq__, doesn't it make more sense to create hash by creating a tuple of tuples by looping over all (key, value) pairs of self.__dict__

Copy link
Contributor Author

@JATAYU000 JATAYU000 Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pairs of self.dict

self.__dict__ would return unhashable items which would raise errors, Thats Why I picked immutable/hashable fields

I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:

I have set it None and it does shut the pre-commit failure

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fkiraly please have a look at this thread.

Is it fine to have __hash__ = None for a class?

def _repr_pretty_(self, pp: pretty.PrettyPrinter, cycle: bool) -> None: # noqa: ARG002
pp.text(str(self))
40 changes: 13 additions & 27 deletions openml/setups/setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# License: BSD 3-Clause
from __future__ import annotations

from collections.abc import Sequence
from typing import Any

import openml.config
import openml.flows
from openml.utils import ReprMixin


class OpenMLSetup:
class OpenMLSetup(ReprMixin):
"""Setup object (a.k.a. Configuration).

Parameters
Expand Down Expand Up @@ -43,30 +45,21 @@ def _to_dict(self) -> dict[str, Any]:
else None,
}

def __repr__(self) -> str:
header = "OpenML Setup"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Setup ID": self.setup_id,
"Flow ID": self.flow_id,
"Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
"# of Parameters": (
len(self.parameters) if self.parameters is not None else float("nan")
),
"# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"),
}

# determines the order in which the information will be printed
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]


class OpenMLParameter:
class OpenMLParameter(ReprMixin):
"""Parameter object (used in setup).

Parameters
Expand Down Expand Up @@ -123,11 +116,9 @@ def _to_dict(self) -> dict[str, Any]:
"value": self.value,
}

def __repr__(self) -> str:
header = "OpenML Parameter"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"ID": self.id,
"Flow ID": self.flow_id,
# "Flow Name": self.flow_name,
Expand Down Expand Up @@ -156,9 +147,4 @@ def __repr__(self) -> str:
parameter_default,
parameter_value,
]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]
23 changes: 22 additions & 1 deletion openml/tasks/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@

import pickle
from collections import OrderedDict
from collections.abc import Sequence
from pathlib import Path
from typing import Any
from typing_extensions import NamedTuple

import arff # type: ignore
import numpy as np

from openml.utils import ReprMixin


class Split(NamedTuple):
"""A single split of a dataset."""
Expand All @@ -18,7 +21,7 @@ class Split(NamedTuple):
test: np.ndarray


class OpenMLSplit: # noqa: PLW1641
class OpenMLSplit(ReprMixin):
"""OpenML Split object.

This class manages train-test splits for a dataset across multiple
Expand Down Expand Up @@ -63,6 +66,22 @@ def __init__(
self.folds = len(self.split[0])
self.samples = len(self.split[0][0])

def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields = {
"Name": self.name,
"Description": (
self.description if len(self.description) <= 80 else self.description[:77] + "..."
),
"Repeats": self.repeats,
"Folds": self.folds,
"Samples": self.samples,
}

order = ["Name", "Description", "Repeats", "Folds", "Samples"]

return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
if (
(not isinstance(self, type(other)))
Expand Down Expand Up @@ -90,6 +109,8 @@ def __eq__(self, other: Any) -> bool:
return False
return True

__hash__ = None # type: ignore

@classmethod
def _from_arff_file(cls, filename: Path) -> OpenMLSplit: # noqa: C901, PLR0912
repetitions = None
Expand Down
58 changes: 57 additions & 1 deletion openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from __future__ import annotations

import contextlib
import re
import shutil
import warnings
from collections.abc import Callable, Mapping, Sized
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
from functools import wraps
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
Expand Down Expand Up @@ -470,3 +472,57 @@ def update(self, length: int) -> None:
self._progress_bar.update(length)
if self._progress_bar.total <= self._progress_bar.n:
self._progress_bar.close()


class ReprMixin(ABC):
"""A mixin class that provides a customizable string representation for OpenML objects.

This mixin standardizes the __repr__ output format across OpenML classes.
Classes inheriting from this mixin should implement the
_get_repr_body_fields method to specify which fields to display.
"""

def __repr__(self) -> str:
body_fields = self._get_repr_body_fields()
return self._apply_repr_template(body_fields)

@abstractmethod
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body.

Returns
-------
body_fields : List[Tuple[str, Union[str, int, List[str]]]]
A list of (name, value) pairs to display in the body of the __repr__.
E.g.: [('metric', 'accuracy'), ('dataset', 'iris')]
If value is a List of str, then each item of the list will appear in a separate row.
"""
# Should be implemented in the base class.

def _apply_repr_template(
self,
body_fields: Iterable[tuple[str, str | int | list[str] | None]],
) -> str:
"""Generates the header and formats the body for string representation of the object.

Parameters
----------
body_fields: List[Tuple[str, str]]
A list of (name, value) pairs to display in the body of the __repr__.
"""
# We add spaces between capitals, e.g. ClassificationTask -> Classification Task
name_with_spaces = re.sub(
r"(\w)([A-Z])",
r"\1 \2",
self.__class__.__name__[len("OpenML") :],
)
header_text = f"OpenML {name_with_spaces}"
header = f"{header_text}\n{'=' * len(header_text)}\n"

_body_fields: list[tuple[str, str | int | list[str]]] = [
(k, "None" if v is None else v) for k, v in body_fields
]
longest_field_name_length = max(len(name) for name, _ in _body_fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
return header + body