Make all `OpenML` classes to inherit `ReprMixin` #1567

geetu040 · 2026-02-06T07:36:23Z

I am not so sure about the custom implementation of __hash__, I know it's a requirement from pre-commit but we need to make sure we don't just write a bad implementation to satisfy the pre-commit checks

I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:

If we want to implement __hash__, given the implementation of __eq__, doesn't it make more sense to create hash by creating a tuple of tuples by looping over all (key, value) pairs of self.__dict__

pairs of self.dict

self.__dict__ would return unhashable items which would raise errors, Thats Why I picked immutable/hashable fields

I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:I think if it can be set to None, and that shuts the pre-commit and is right choice in code and no sdk code currently depends on hashing then do it like that:

I have set it None and it does shut the pre-commit failure

@fkiraly please have a look at this thread.

Is it fine to have __hash__ = None for a class?

-Original file line number
+Diff line change
@@ -1,26 +1,22 @@
     # License: BSD 3-Clause
     from __future__ import annotations
-    import re
     import webbrowser
     from abc import ABC, abstractmethod
-    from collections.abc import Iterable, Sequence
+    from collections.abc import Sequence
     import xmltodict
     import openml._api_calls
     import openml.config
+    from openml.utils import ReprMixin
     from .utils import _get_rest_api_type_alias, _tag_openml_base
-    class OpenMLBase(ABC):
+    class OpenMLBase(ReprMixin, ABC):
         """Base object for functionality that is shared across entities."""
-        def __repr__(self) -> str:
-            body_fields = self._get_repr_body_fields()
-            return self._apply_repr_template(body_fields)
         @property
         @abstractmethod
         def id(self) -> int | None:
@@ Expand Down Expand Up @@
             """
             # Should be implemented in the base class.
-        def _apply_repr_template(
-            self,
-            body_fields: Iterable[tuple[str, str | int | list[str] | None]],
-        ) -> str:
-            """Generates the header and formats the body for string representation of the object.
-            Parameters
-            ----------
-            body_fields: List[Tuple[str, str]]
-               A list of (name, value) pairs to display in the body of the __repr__.
-            """
-            # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
-            name_with_spaces = re.sub(
-                r"(\w)([A-Z])",
-                r"\1 \2",
-                self.__class__.__name__[len("OpenML") :],
-            )
-            header_text = f"OpenML {name_with_spaces}"
-            header = f"{header_text}\n{'=' * len(header_text)}\n"
-            _body_fields: list[tuple[str, str | int | list[str]]] = [
-                (k, "None" if v is None else v) for k, v in body_fields
-            ]
-            longest_field_name_length = max(len(name) for name, _ in _body_fields)
-            field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-            body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
-            return header + body
         @abstractmethod
         def _to_dict(self) -> dict[str, dict]:
             """Creates a dictionary representation of self.
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,8 +7,10 @@ @@
     if TYPE_CHECKING:
         from IPython.lib import pretty
+    from openml.utils import ReprMixin
-    class OpenMLDataFeature:  # noqa: PLW1641
+    class OpenMLDataFeature(ReprMixin):
         """
         Data Feature (a.k.a. Attribute) object.
@@ Expand Down Expand Up / @@ -74,11 +76,25 @@ def __init__( # noqa: PLR0913 @@
             self.number_missing_values = number_missing_values
             self.ontologies = ontologies
-        def __repr__(self) -> str:
-            return f"[{self.index} - {self.name} ({self.data_type})]"
+        def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+            """Collect all information to display in the __repr__ body."""
+            fields: dict[str, int | str | None] = {
+                "Index": self.index,
+                "Name": self.name,
+                "Data Type": self.data_type,
+            }
+            order = [
+                "Index",
+                "Name",
+                "Data Type",
+            ]
+            return [(key, fields[key]) for key in order if key in fields]
         def __eq__(self, other: Any) -> bool:
             return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__
+        __hash__ = None  # type: ignore
         def _repr_pretty_(self, pp: pretty.PrettyPrinter, cycle: bool) -> None:  # noqa: ARG002
             pp.text(str(self))

-Original file line number
+Diff line change
@@ -1,13 +1,15 @@
     # License: BSD 3-Clause
     from __future__ import annotations
+    from collections.abc import Sequence
     from typing import Any
     import openml.config
     import openml.flows
+    from openml.utils import ReprMixin
-    class OpenMLSetup:
+    class OpenMLSetup(ReprMixin):
         """Setup object (a.k.a. Configuration).
         Parameters
@@ Expand Down Expand Up / @@ -43,30 +45,21 @@ def _to_dict(self) -> dict[str, Any]: @@
                 else None,
             }
-        def __repr__(self) -> str:
-            header = "OpenML Setup"
-            header = f"{header}\n{'=' * len(header)}\n"
-            fields = {
+        def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+            """Collect all information to display in the __repr__ body."""
+            fields: dict[str, int | str | None] = {
                 "Setup ID": self.setup_id,
                 "Flow ID": self.flow_id,
                 "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-                "# of Parameters": (
-                    len(self.parameters) if self.parameters is not None else float("nan")
-                ),
+                "# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"),
             }
             # determines the order in which the information will be printed
             order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
-            _fields = [(key, fields[key]) for key in order if key in fields]
-            longest_field_name_length = max(len(name) for name, _ in _fields)
-            field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-            body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
-            return header + body
+            return [(key, fields[key]) for key in order if key in fields]
-    class OpenMLParameter:
+    class OpenMLParameter(ReprMixin):
         """Parameter object (used in setup).
         Parameters
@@ Expand Down Expand Up / @@ -123,11 +116,9 @@ def _to_dict(self) -> dict[str, Any]: @@
                 "value": self.value,
             }
-        def __repr__(self) -> str:
-            header = "OpenML Parameter"
-            header = f"{header}\n{'=' * len(header)}\n"
-            fields = {
+        def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+            """Collect all information to display in the __repr__ body."""
+            fields: dict[str, int | str | None] = {
                 "ID": self.id,
                 "Flow ID": self.flow_id,
                 # "Flow Name": self.flow_name,
@@ Expand Down Expand Up / @@ -156,9 +147,4 @@ def __repr__(self) -> str: @@
                 parameter_default,
                 parameter_value,
             ]
-            _fields = [(key, fields[key]) for key in order if key in fields]
-            longest_field_name_length = max(len(name) for name, _ in _fields)
-            field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
-            body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
-            return header + body
+            return [(key, fields[key]) for key in order if key in fields]

-Original file line number
+Diff line change
@@ Expand Up / @@ -3,13 +3,16 @@ @@
     import pickle
     from collections import OrderedDict
+    from collections.abc import Sequence
     from pathlib import Path
     from typing import Any
     from typing_extensions import NamedTuple
     import arff  # type: ignore
     import numpy as np
+    from openml.utils import ReprMixin
     class Split(NamedTuple):
         """A single split of a dataset."""
@@ Expand All / @@ -18,7 +21,7 @@ class Split(NamedTuple): @@
         test: np.ndarray
-    class OpenMLSplit:  # noqa: PLW1641
+    class OpenMLSplit(ReprMixin):
         """OpenML Split object.
         This class manages train-test splits for a dataset across multiple
@@ Expand Down Expand Up / @@ -63,6 +66,22 @@ def __init__( @@
             self.folds = len(self.split[0])
             self.samples = len(self.split[0][0])
+        def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+            """Collect all information to display in the __repr__ body."""
+            fields = {
+                "Name": self.name,
+                "Description": (
+                    self.description if len(self.description) <= 80 else self.description[:77] + "..."
+                ),
+                "Repeats": self.repeats,
+                "Folds": self.folds,
+                "Samples": self.samples,
+            }
+            order = ["Name", "Description", "Repeats", "Folds", "Samples"]
+            return [(key, fields[key]) for key in order if key in fields]
         def __eq__(self, other: Any) -> bool:
             if (
                 (not isinstance(self, type(other)))
@@ Expand Down Expand Up / @@ -90,6 +109,8 @@ def __eq__(self, other: Any) -> bool: @@
                     return False
             return True
+        __hash__ = None  # type: ignore
         @classmethod
         def _from_arff_file(cls, filename: Path) -> OpenMLSplit:  # noqa: C901, PLR0912
             repetitions = None
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,9 +2,11 @@ @@
     from __future__ import annotations
     import contextlib
+    import re
     import shutil
     import warnings
-    from collections.abc import Callable, Mapping, Sized
+    from abc import ABC, abstractmethod
+    from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
     from functools import wraps
     from pathlib import Path
     from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
@@ Expand Down Expand Up / @@ -470,3 +472,57 @@ def update(self, length: int) -> None: @@
             self._progress_bar.update(length)
             if self._progress_bar.total <= self._progress_bar.n:
                 self._progress_bar.close()
+    class ReprMixin(ABC):
+        """A mixin class that provides a customizable string representation for OpenML objects.
+        This mixin standardizes the __repr__ output format across OpenML classes.
+        Classes inheriting from this mixin should implement the
+        _get_repr_body_fields method to specify which fields to display.
+        """
+        def __repr__(self) -> str:
+            body_fields = self._get_repr_body_fields()
+            return self._apply_repr_template(body_fields)
+        @abstractmethod
+        def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
+            """Collect all information to display in the __repr__ body.
+            Returns
+            -------
+            body_fields : List[Tuple[str, Union[str, int, List[str]]]]
+                A list of (name, value) pairs to display in the body of the __repr__.
+                E.g.: [('metric', 'accuracy'), ('dataset', 'iris')]
+                If value is a List of str, then each item of the list will appear in a separate row.
+            """
+            # Should be implemented in the base class.
+        def _apply_repr_template(
+            self,
+            body_fields: Iterable[tuple[str, str | int | list[str] | None]],
+        ) -> str:
+            """Generates the header and formats the body for string representation of the object.
+            Parameters
+            ----------
+            body_fields: List[Tuple[str, str]]
+               A list of (name, value) pairs to display in the body of the __repr__.
+            """
+            # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
+            name_with_spaces = re.sub(
+                r"(\w)([A-Z])",
+                r"\1 \2",
+                self.__class__.__name__[len("OpenML") :],
+            )
+            header_text = f"OpenML {name_with_spaces}"
+            header = f"{header_text}\n{'=' * len(header_text)}\n"
+            _body_fields: list[tuple[str, str | int | list[str]]] = [
+                (k, "None" if v is None else v) for k, v in body_fields
+            ]
+            longest_field_name_length = max(len(name) for name, _ in _body_fields)
+            field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
+            body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
+            return header + body

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Make all `OpenML` classes to inherit `ReprMixin` #1567

Diff view

Diff view

There are no files selected for viewing

geetu040 Feb 6, 2026

Uh oh!

JATAYU000 Feb 6, 2026 •

edited

Loading

Uh oh!

geetu040 Feb 6, 2026

Uh oh!

Uh oh!

Make all OpenML classes to inherit ReprMixin #1567

Are you sure you want to change the base?

Make all OpenML classes to inherit ReprMixin #1567

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

geetu040 Feb 6, 2026

Choose a reason for hiding this comment

Uh oh!

JATAYU000 Feb 6, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

geetu040 Feb 6, 2026

Choose a reason for hiding this comment

Uh oh!

Make all `OpenML` classes to inherit `ReprMixin` #1567

Make all `OpenML` classes to inherit `ReprMixin` #1567

JATAYU000 Feb 6, 2026 •

edited

Loading