From 4a059e09a4c66397cf266f71c4248543e2e972e5 Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Fri, 3 May 2024 21:13:39 +0200
Subject: [PATCH] feat: replace other values than NaN with imputer (#707)

Closes #643

### Summary of Changes

* Add an optional argument to `Imputer` to configure the
`value_to_replace`. This can be an int, float, or string.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
---
 docs/tutorials/data_processing.ipynb          |  72 ++---
 .../data/tabular/transformation/_imputer.py   | 249 ++++++++++------
 src/safeds/data/tabular/typing/__init__.py    |   3 -
 .../data/tabular/typing/_imputer_strategy.py  |  55 ----
 .../tabular/transformation/test_imputer.py    | 268 +++++++++++-------
 5 files changed, 359 insertions(+), 288 deletions(-)
 delete mode 100644 src/safeds/data/tabular/typing/_imputer_strategy.py

diff --git a/docs/tutorials/data_processing.ipynb b/docs/tutorials/data_processing.ipynb
index 1e2f87661..c08fc31db 100644
--- a/docs/tutorials/data_processing.ipynb
+++ b/docs/tutorials/data_processing.ipynb
@@ -32,7 +32,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.containers import Table\n",
     "\n",
@@ -40,7 +39,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -54,7 +54,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice = titanic.slice_rows(end=10)\n",
     "\n",
@@ -62,7 +61,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -76,13 +76,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.get_row(0)"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -96,13 +96,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.get_column(\"name\")"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -116,7 +116,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "Table.from_rows([\n",
     "    titanic_slice.get_row(0),\n",
@@ -125,7 +124,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -139,7 +139,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "Table.from_columns([\n",
     "    titanic_slice.get_column(\"name\"),\n",
@@ -148,7 +147,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -162,7 +162,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.remove_columns([\n",
     "    \"id\",\n",
@@ -175,7 +174,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -189,13 +189,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.keep_only_columns([\"name\", \"survived\"])"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -211,13 +211,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.sort_columns()"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -231,7 +231,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.sort_columns(\n",
     "    lambda column1, column2:\n",
@@ -240,7 +239,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -254,7 +254,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic.filter_rows(\n",
     "    lambda row:\n",
@@ -263,7 +262,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -278,7 +278,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import Imputer\n",
     "\n",
@@ -287,7 +286,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -301,7 +301,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import LabelEncoder\n",
     "\n",
@@ -310,7 +309,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -324,7 +324,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import OneHotEncoder\n",
     "\n",
@@ -333,7 +332,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -347,7 +347,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import RangeScaler\n",
     "\n",
@@ -356,7 +355,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -370,7 +370,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "from safeds.data.tabular.transformation import StandardScaler\n",
     "\n",
@@ -379,7 +378,8 @@
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -394,13 +394,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.transform_column(\"sex\", lambda row: 1 if row.get_value(\"sex\") == \"female\" else 0)\n"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   },
   {
    "cell_type": "markdown",
@@ -414,13 +414,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
    "source": [
     "titanic_slice.transform_column(\"parents_children\", lambda row: \"No\" if row.get_value(\"parents_children\") == 0 else \"Yes\")\n"
    ],
    "metadata": {
     "collapsed": false
-   }
+   },
+   "outputs": []
   }
  ],
  "metadata": {
diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py
index 9602712e8..f8bd25439 100644
--- a/src/safeds/data/tabular/transformation/_imputer.py
+++ b/src/safeds/data/tabular/transformation/_imputer.py
@@ -2,11 +2,14 @@
 
 import sys
 import warnings
+from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any
 
+import pandas as pd
+
+from safeds._utils import _structural_hash
 from safeds.data.tabular.containers import Table
 from safeds.data.tabular.transformation._table_transformer import TableTransformer
-from safeds.data.tabular.typing import ImputerStrategy
 from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError
 
 if TYPE_CHECKING:
@@ -15,12 +18,14 @@
 
 class Imputer(TableTransformer):
     """
-    Replace missing values with the given strategy.
+    Replace missing values using the given strategy.
 
     Parameters
     ----------
     strategy:
-        The strategy used to impute missing values. Use the classes nested inside `Imputer.Strategy` to specify it.
+        How to replace missing values.
+    value_to_replace:
+        The value that should be replaced.
 
     Examples
     --------
@@ -37,105 +42,80 @@ class Imputer(TableTransformer):
     >>> transformed_table = transformer.fit_and_transform(table)
     """
 
-    class Strategy:
-        class Constant(ImputerStrategy):
+    class Strategy(ABC):
+        """Various strategies to replace missing values. Use the static methods to create instances of this class."""
+
+        @abstractmethod
+        def __eq__(self, other: object) -> bool:
+            pass  # pragma: no cover
+
+        @abstractmethod
+        def __hash__(self) -> int:
+            pass  # pragma: no cover
+
+        @abstractmethod
+        def _apply(self, imputer: sk_SimpleImputer) -> None:
             """
-            An imputation strategy for imputing missing data with given constant values.
+            Set the imputer strategy of the given imputer.
 
             Parameters
             ----------
-            value:
-                The given value to impute missing values.
+            imputer:
+                The imputer to augment.
             """
 
-            def __eq__(self, other: object) -> bool:
-                if not isinstance(other, Imputer.Strategy.Constant):
-                    return NotImplemented
-                if self is other:
-                    return True
-                return self._value == other._value
-
-            __hash__ = ImputerStrategy.__hash__
-
-            def __init__(self, value: Any):
-                self._value = value
-
-            def __sizeof__(self) -> int:
-                """
-                Return the complete size of this object.
-
-                Returns
-                -------
-                size:
-                    Size of this object in bytes.
-                """
-                return sys.getsizeof(self._value)
-
-            def __str__(self) -> str:
-                return f"Constant({self._value})"
-
-            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
-                imputer.strategy = "constant"
-                imputer.fill_value = self._value
-
-        class Mean(ImputerStrategy):
-            """An imputation strategy for imputing missing data with mean values."""
-
-            def __eq__(self, other: object) -> bool:
-                if not isinstance(other, Imputer.Strategy.Mean):
-                    return NotImplemented
-                return True
-
-            __hash__ = ImputerStrategy.__hash__
-
-            def __str__(self) -> str:
-                return "Mean"
-
-            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
-                imputer.strategy = "mean"
-
-        class Median(ImputerStrategy):
-            """An imputation strategy for imputing missing data with median values."""
-
-            def __eq__(self, other: object) -> bool:
-                if not isinstance(other, Imputer.Strategy.Median):
-                    return NotImplemented
-                return True
-
-            __hash__ = ImputerStrategy.__hash__
-
-            def __str__(self) -> str:
-                return "Median"
-
-            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
-                imputer.strategy = "median"
-
-        class Mode(ImputerStrategy):
+        @staticmethod
+        def Constant(value: Any) -> Imputer.Strategy:  # noqa: N802
             """
-            An imputation strategy for imputing missing data with mode values.
+            Replace missing values with the given constant value.
 
-            The lowest value will be used if there are multiple values with the same highest count.
+            Parameters
+            ----------
+            value:
+                The value to replace missing values.
             """
+            return _Constant(value)  # pragma: no cover
 
-            def __eq__(self, other: object) -> bool:
-                if not isinstance(other, Imputer.Strategy.Mode):
-                    return NotImplemented
-                return True
+        @staticmethod
+        def Mean() -> Imputer.Strategy:  # noqa: N802
+            """Replace missing values with the mean of each column."""
+            return _Mean()  # pragma: no cover
 
-            __hash__ = ImputerStrategy.__hash__
+        @staticmethod
+        def Median() -> Imputer.Strategy:  # noqa: N802
+            """Replace missing values with the median of each column."""
+            return _Median()  # pragma: no cover
 
-            def __str__(self) -> str:
-                return "Mode"
+        @staticmethod
+        def Mode() -> Imputer.Strategy:  # noqa: N802
+            """Replace missing values with the mode of each column."""
+            return _Mode()  # pragma: no cover
 
-            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
-                imputer.strategy = "most_frequent"
+    def __init__(self, strategy: Imputer.Strategy, *, value_to_replace: float | str | None = None):
+        if value_to_replace is None:
+            value_to_replace = pd.NA
 
-    def __init__(self, strategy: ImputerStrategy):
         self._strategy = strategy
+        self._value_to_replace = value_to_replace
 
         self._wrapped_transformer: sk_SimpleImputer | None = None
         self._column_names: list[str] | None = None
 
+    @property
+    def strategy(self) -> Imputer.Strategy:
+        """The strategy used to replace missing values."""
+        return self._strategy
+
+    @property
+    def value_to_replace(self) -> Any:
+        """The value that should be replaced."""
+        return self._value_to_replace
+
+    @property
+    def is_fitted(self) -> bool:
+        """Whether the transformer is fitted."""
+        return self._wrapped_transformer is not None
+
     # noinspection PyProtectedMember
     def fit(self, table: Table, column_names: list[str] | None) -> Imputer:
         """
@@ -176,7 +156,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer:
         if table.number_of_rows == 0:
             raise ValueError("The Imputer cannot be fitted because the table contains 0 rows")
 
-        if (isinstance(self._strategy, Imputer.Strategy.Mean | Imputer.Strategy.Median)) and table.keep_only_columns(
+        if (isinstance(self._strategy, _Mean | _Median)) and table.keep_only_columns(
             column_names,
         ).remove_columns_with_non_numerical_values().number_of_columns < len(
             column_names,
@@ -194,7 +174,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer:
                 ),
             )
 
-        if isinstance(self._strategy, Imputer.Strategy.Mode):
+        if isinstance(self._strategy, _Mode):
             multiple_most_frequent = {}
             for name in column_names:
                 if len(table.get_column(name).mode()) > 1:
@@ -209,7 +189,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer:
                 )
 
         wrapped_transformer = sk_SimpleImputer()
-        self._strategy._augment_imputer(wrapped_transformer)
+        self._strategy._apply(wrapped_transformer)
+        wrapped_transformer.missing_values = self._value_to_replace
         wrapped_transformer.fit(table._data[column_names])
 
         result = Imputer(self._strategy)
@@ -265,11 +246,6 @@ def transform(self, table: Table) -> Table:
         )
         return Table._from_pandas_dataframe(data, table.schema)
 
-    @property
-    def is_fitted(self) -> bool:
-        """Whether the transformer is fitted."""
-        return self._wrapped_transformer is not None
-
     def get_names_of_added_columns(self) -> list[str]:
         """
         Get the names of all new columns that have been added by the Imputer.
@@ -288,7 +264,6 @@ def get_names_of_added_columns(self) -> list[str]:
             raise TransformerNotFittedError
         return []
 
-    # (Must implement abstract method, cannot instantiate class otherwise.)
     def get_names_of_changed_columns(self) -> list[str]:
         """
          Get the names of all columns that may have been changed by the Imputer.
@@ -324,3 +299,93 @@ def get_names_of_removed_columns(self) -> list[str]:
         if not self.is_fitted:
             raise TransformerNotFittedError
         return []
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Imputation strategies
+# ----------------------------------------------------------------------------------------------------------------------
+
+
+class _Constant(Imputer.Strategy):
+    def __init__(self, value: Any):
+        self._value = value
+
+    @property
+    def value(self) -> Any:
+        return self._value
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, _Constant):
+            return NotImplemented
+        if self is other:
+            return True
+        return self._value == other._value
+
+    def __hash__(self) -> int:
+        return _structural_hash(str(self))
+
+    def __sizeof__(self) -> int:
+        return sys.getsizeof(self._value)
+
+    def __str__(self) -> str:
+        return f"Constant({self._value})"
+
+    def _apply(self, imputer: sk_SimpleImputer) -> None:
+        imputer.strategy = "constant"
+        imputer.fill_value = self._value
+
+
+class _Mean(Imputer.Strategy):
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, _Mean):
+            return NotImplemented
+        return True
+
+    def __hash__(self) -> int:
+        return _structural_hash(str(self))
+
+    def __str__(self) -> str:
+        return "Mean"
+
+    def _apply(self, imputer: sk_SimpleImputer) -> None:
+        imputer.strategy = "mean"
+
+
+class _Median(Imputer.Strategy):
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, _Median):
+            return NotImplemented
+        return True
+
+    def __hash__(self) -> int:
+        return _structural_hash(str(self))
+
+    def __str__(self) -> str:
+        return "Median"
+
+    def _apply(self, imputer: sk_SimpleImputer) -> None:
+        imputer.strategy = "median"
+
+
+class _Mode(Imputer.Strategy):
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, _Mode):
+            return NotImplemented
+        return True
+
+    def __hash__(self) -> int:
+        return _structural_hash(str(self))
+
+    def __str__(self) -> str:
+        return "Mode"
+
+    def _apply(self, imputer: sk_SimpleImputer) -> None:
+        imputer.strategy = "most_frequent"
+
+
+# Override the methods with classes, so they can be used in `isinstance` calls. Unlike methods, classes define a type.
+# This is needed for the DSL, where imputer strategies are variants of an enum.
+Imputer.Strategy.Constant = _Constant  # type: ignore[method-assign]
+Imputer.Strategy.Mean = _Mean  # type: ignore[method-assign]
+Imputer.Strategy.Median = _Median  # type: ignore[method-assign]
+Imputer.Strategy.Mode = _Mode  # type: ignore[method-assign]
diff --git a/src/safeds/data/tabular/typing/__init__.py b/src/safeds/data/tabular/typing/__init__.py
index 5b6db59a2..ab6a79399 100644
--- a/src/safeds/data/tabular/typing/__init__.py
+++ b/src/safeds/data/tabular/typing/__init__.py
@@ -6,7 +6,6 @@
 
 if TYPE_CHECKING:
     from ._column_type import Anything, Boolean, ColumnType, Integer, Nothing, RealNumber, String
-    from ._imputer_strategy import ImputerStrategy
     from ._schema import Schema
 
 apipkg.initpkg(
@@ -15,7 +14,6 @@
         "Anything": "._column_type:Anything",
         "Boolean": "._column_type:Boolean",
         "ColumnType": "._column_type:ColumnType",
-        "ImputerStrategy": "._imputer_strategy:ImputerStrategy",
         "Integer": "._column_type:Integer",
         "Nothing": "._column_type:Nothing",
         "RealNumber": "._column_type:RealNumber",
@@ -28,7 +26,6 @@
     "Anything",
     "Boolean",
     "ColumnType",
-    "ImputerStrategy",
     "Integer",
     "Nothing",
     "RealNumber",
diff --git a/src/safeds/data/tabular/typing/_imputer_strategy.py b/src/safeds/data/tabular/typing/_imputer_strategy.py
deleted file mode 100644
index f35cf9d25..000000000
--- a/src/safeds/data/tabular/typing/_imputer_strategy.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING
-
-from safeds._utils import _structural_hash
-
-if TYPE_CHECKING:
-    from sklearn.impute import SimpleImputer as sk_SimpleImputer
-
-
-class ImputerStrategy(ABC):
-    """
-    The abstract base class of the different imputation strategies supported by the `Imputer`.
-
-    This class is only needed for type annotations. Use the subclasses nested inside `Imputer.Strategy` instead.
-    """
-
-    @abstractmethod
-    def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
-        """
-        Set the imputer strategy of the given imputer.
-
-        Parameters
-        ----------
-        imputer:
-            The imputer to augment.
-        """
-
-    @abstractmethod
-    def __eq__(self, other: object) -> bool:
-        """
-        Compare two imputer strategies.
-
-        Parameters
-        ----------
-        other:
-            other object to compare to
-
-        Returns
-        -------
-        equals:
-            Whether the two imputer strategies are equal
-        """
-
-    def __hash__(self) -> int:
-        """
-        Return a deterministic hash value for this imputer strategy.
-
-        Returns
-        -------
-        hash:
-            The hash value.
-        """
-        return _structural_hash(self.__class__.__qualname__)
diff --git a/tests/safeds/data/tabular/transformation/test_imputer.py b/tests/safeds/data/tabular/transformation/test_imputer.py
index 1d986192b..55699318d 100644
--- a/tests/safeds/data/tabular/transformation/test_imputer.py
+++ b/tests/safeds/data/tabular/transformation/test_imputer.py
@@ -4,11 +4,11 @@
 import pytest
 from safeds.data.tabular.containers import Table
 from safeds.data.tabular.transformation import Imputer
-from safeds.data.tabular.typing import ImputerStrategy
+from safeds.data.tabular.transformation._imputer import _Mode
 from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError
 
 
-def strategies() -> list[ImputerStrategy]:
+def strategies() -> list[Imputer.Strategy]:
     """
     Return the list of imputer strategies to test.
 
@@ -17,13 +17,107 @@ def strategies() -> list[ImputerStrategy]:
 
     Returns
     -------
-    strategies : list[ImputerStrategy]
+    strategies : list[Imputer.Strategy]
         The list of classifiers to test.
     """
     return [Imputer.Strategy.Constant(2), Imputer.Strategy.Mean(), Imputer.Strategy.Median(), Imputer.Strategy.Mode()]
 
 
-class TestStrategy:
+class TestStrategyClass:
+    def test_should_be_able_to_get_value_of_constant_strategy(self) -> None:
+        assert Imputer.Strategy.Constant(1).value == 1  # type: ignore[attr-defined]
+
+    @pytest.mark.parametrize(
+        ("strategy", "type_", "expected"),
+        [
+            (Imputer.Strategy.Constant(0), Imputer.Strategy.Constant, True),
+            (Imputer.Strategy.Mean(), Imputer.Strategy.Mean, True),
+            (Imputer.Strategy.Median(), Imputer.Strategy.Median, True),
+            (Imputer.Strategy.Mode(), Imputer.Strategy.Mode, True),
+            (Imputer.Strategy.Mode(), Imputer.Strategy.Mean, False),
+        ],
+    )
+    def test_should_be_able_to_use_strategy_in_isinstance(
+        self,
+        strategy: Imputer.Strategy,
+        type_: type,
+        expected: bool,
+    ) -> None:
+        assert isinstance(strategy, type_) == expected
+
+    class TestEq:
+        @pytest.mark.parametrize(
+            ("strategy1", "strategy2"),
+            ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_equal_strategy(
+            self,
+            strategy1: Imputer.Strategy,
+            strategy2: Imputer.Strategy,
+        ) -> None:
+            assert strategy1 == strategy2
+
+        @pytest.mark.parametrize(
+            "strategy",
+            ([x for x in strategies() if x.__class__]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_equal_identity_strategy(
+            self,
+            strategy: Imputer.Strategy,
+        ) -> None:
+            assert strategy == strategy  # noqa: PLR0124
+
+        @pytest.mark.parametrize(
+            ("strategy1", "strategy2"),
+            ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_unequal_strategy(
+            self,
+            strategy1: Imputer.Strategy,
+            strategy2: Imputer.Strategy,
+        ) -> None:
+            assert strategy1 != strategy2
+
+    class TestHash:
+        @pytest.mark.parametrize(
+            ("strategy1", "strategy2"),
+            ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_should_return_same_hash_for_equal_strategy(
+            self,
+            strategy1: Imputer.Strategy,
+            strategy2: Imputer.Strategy,
+        ) -> None:
+            assert hash(strategy1) == hash(strategy2)
+
+        @pytest.mark.parametrize(
+            ("strategy1", "strategy2"),
+            ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_should_return_different_hash_for_unequal_strategy(
+            self,
+            strategy1: Imputer.Strategy,
+            strategy2: Imputer.Strategy,
+        ) -> None:
+            assert hash(strategy1) != hash(strategy2)
+
+    class TestSizeof:
+        @pytest.mark.parametrize(
+            "strategy",
+            ([Imputer.Strategy.Constant(1)]),
+            ids=lambda x: x.__class__.__name__,
+        )
+        def test_sizeof_strategy(
+            self,
+            strategy: Imputer.Strategy,
+        ) -> None:
+            assert sys.getsizeof(strategy) > sys.getsizeof(object())
+
     class TestStr:
         @pytest.mark.parametrize(
             ("strategy", "expected"),
@@ -33,15 +127,34 @@ class TestStr:
                 (Imputer.Strategy.Median(), "Median"),
                 (Imputer.Strategy.Mode(), "Mode"),
             ],
-            ids=["Constant", "Mean", "Median", "Mode"],
+            ids=lambda x: x.__class__.__name__,
         )
-        def test_should_return_correct_string_representation(self, strategy: ImputerStrategy, expected: str) -> None:
+        def test_should_return_correct_string_representation(self, strategy: Imputer.Strategy, expected: str) -> None:
             assert str(strategy) == expected
 
 
+class TestStrategyProperty:
+    @pytest.mark.parametrize(
+        "strategy",
+        strategies(),
+        ids=lambda x: x.__class__.__name__,
+    )
+    def test_should_return_correct_strategy(self, strategy: Imputer.Strategy) -> None:
+        assert Imputer(strategy).strategy == strategy
+
+
+class TestValueToReplaceProperty:
+    @pytest.mark.parametrize(
+        "value_to_replace",
+        [0],
+    )
+    def test_should_return_correct_value_to_replace(self, value_to_replace: float | str | None) -> None:
+        assert Imputer(Imputer.Strategy.Mode(), value_to_replace=value_to_replace).value_to_replace == value_to_replace
+
+
 class TestFit:
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_raise_if_column_not_found(self, strategy: ImputerStrategy) -> None:
+    def test_should_raise_if_column_not_found(self, strategy: Imputer.Strategy) -> None:
         table = Table(
             {
                 "a": [1, 3, None],
@@ -52,7 +165,7 @@ def test_should_raise_if_column_not_found(self, strategy: ImputerStrategy) -> No
             Imputer(strategy).fit(table, ["b", "c"])
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_raise_if_table_contains_no_rows(self, strategy: ImputerStrategy) -> None:
+    def test_should_raise_if_table_contains_no_rows(self, strategy: Imputer.Strategy) -> None:
         with pytest.raises(ValueError, match=r"The Imputer cannot be fitted because the table contains 0 rows"):
             Imputer(strategy).fit(Table({"col1": []}), ["col1"])
 
@@ -68,7 +181,7 @@ def test_should_raise_if_table_contains_non_numerical_data(
         self,
         table: Table,
         col_names: list[str],
-        strategy: ImputerStrategy,
+        strategy: Imputer.Strategy,
     ) -> None:
         with pytest.raises(
             NonNumericColumnError,
@@ -100,7 +213,7 @@ def test_should_warn_if_multiple_mode_values(self, table: Table, most_frequent:
             Imputer(Imputer.Strategy.Mode()).fit(table, None)
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_not_change_original_transformer(self, strategy: ImputerStrategy) -> None:
+    def test_should_not_change_original_transformer(self, strategy: Imputer.Strategy) -> None:
         table = Table(
             {
                 "a": [1, 3, 3, None],
@@ -116,7 +229,7 @@ def test_should_not_change_original_transformer(self, strategy: ImputerStrategy)
 
 class TestTransform:
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_raise_if_column_not_found(self, strategy: ImputerStrategy) -> None:
+    def test_should_raise_if_column_not_found(self, strategy: Imputer.Strategy) -> None:
         table_to_fit = Table(
             {
                 "a": [1, 3, 3, None],
@@ -124,7 +237,7 @@ def test_should_raise_if_column_not_found(self, strategy: ImputerStrategy) -> No
             },
         )
 
-        if isinstance(strategy, Imputer.Strategy.Mode):
+        if isinstance(strategy, _Mode):
             with warnings.catch_warnings():
                 warnings.filterwarnings(
                     action="ignore",
@@ -145,12 +258,12 @@ def test_should_raise_if_column_not_found(self, strategy: ImputerStrategy) -> No
             transformer.transform(table_to_transform)
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_raise_if_table_contains_no_rows(self, strategy: ImputerStrategy) -> None:
+    def test_should_raise_if_table_contains_no_rows(self, strategy: Imputer.Strategy) -> None:
         with pytest.raises(ValueError, match=r"The Imputer cannot transform the table because it contains 0 rows"):
             Imputer(strategy).fit(Table({"col1": [1, 2, 2]}), ["col1"]).transform(Table({"col1": []}))
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_raise_if_not_fitted(self, strategy: ImputerStrategy) -> None:
+    def test_should_raise_if_not_fitted(self, strategy: Imputer.Strategy) -> None:
         table = Table(
             {
                 "a": [1, 3, None],
@@ -165,12 +278,12 @@ def test_should_raise_if_not_fitted(self, strategy: ImputerStrategy) -> None:
 
 class TestIsFitted:
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_return_false_before_fitting(self, strategy: ImputerStrategy) -> None:
+    def test_should_return_false_before_fitting(self, strategy: Imputer.Strategy) -> None:
         transformer = Imputer(strategy)
         assert not transformer.is_fitted
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_return_true_after_fitting(self, strategy: ImputerStrategy) -> None:
+    def test_should_return_true_after_fitting(self, strategy: Imputer.Strategy) -> None:
         table = Table(
             {
                 "a": [1, 3, 3, None],
@@ -184,7 +297,7 @@ def test_should_return_true_after_fitting(self, strategy: ImputerStrategy) -> No
 
 class TestFitAndTransform:
     @pytest.mark.parametrize(
-        ("table", "column_names", "strategy", "expected"),
+        ("table", "column_names", "strategy", "value_to_replace", "expected"),
         [
             (
                 Table(
@@ -194,6 +307,7 @@ class TestFitAndTransform:
                 ),
                 None,
                 Imputer.Strategy.Constant(0.0),
+                None,
                 Table(
                     {
                         "a": [1.0, 3.0, 0.0],
@@ -208,6 +322,7 @@ class TestFitAndTransform:
                 ),
                 None,
                 Imputer.Strategy.Mean(),
+                None,
                 Table(
                     {
                         "a": [1.0, 3.0, 2.0],
@@ -222,6 +337,7 @@ class TestFitAndTransform:
                 ),
                 None,
                 Imputer.Strategy.Median(),
+                None,
                 Table(
                     {
                         "a": [1.0, 3.0, 1.0, 1.0],
@@ -236,6 +352,7 @@ class TestFitAndTransform:
                 ),
                 None,
                 Imputer.Strategy.Mode(),
+                None,
                 Table(
                     {
                         "a": [1.0, 3.0, 3.0, 3.0],
@@ -251,6 +368,7 @@ class TestFitAndTransform:
                 ),
                 ["a"],
                 Imputer.Strategy.Constant(0.0),
+                None,
                 Table(
                     {
                         "a": [1.0, 3.0, 0.0],
@@ -266,8 +384,24 @@ class TestFitAndTransform:
                 ),
                 ["a"],
                 Imputer.Strategy.Mode(),
+                None,
                 Table({"a": [1.0, 1.0, 2.0, 2.0, 1.0]}),
             ),
+            (
+                Table(
+                    {
+                        "a": [0.0, 1.0, 2.0],
+                    },
+                ),
+                None,
+                Imputer.Strategy.Constant(1.0),
+                0.0,
+                Table(
+                    {
+                        "a": [1.0, 1.0, 2.0],
+                    },
+                ),
+            ),
         ],
         ids=[
             "constant strategy",
@@ -276,28 +410,35 @@ class TestFitAndTransform:
             "mode strategy",
             "constant strategy multiple columns",
             "mode strategy multiple most frequent values",
+            "other value to replace",
         ],
     )
     def test_should_return_transformed_table(
         self,
         table: Table,
         column_names: list[str] | None,
-        strategy: ImputerStrategy,
+        strategy: Imputer.Strategy,
+        value_to_replace: float | str | None,
         expected: Table,
     ) -> None:
-        if isinstance(strategy, Imputer.Strategy.Mode):
+        if isinstance(strategy, _Mode):
             with warnings.catch_warnings():
                 warnings.filterwarnings(
                     action="ignore",
                     message=r"There are multiple most frequent values in a column given to the Imputer\..*",
                     category=UserWarning,
                 )
-                assert Imputer(strategy).fit_and_transform(table, column_names) == expected
+                assert (
+                    Imputer(strategy, value_to_replace=value_to_replace).fit_and_transform(table, column_names)
+                    == expected
+                )
         else:
-            assert Imputer(strategy).fit_and_transform(table, column_names) == expected
+            assert (
+                Imputer(strategy, value_to_replace=value_to_replace).fit_and_transform(table, column_names) == expected
+            )
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_should_not_change_original_table(self, strategy: ImputerStrategy) -> None:
+    def test_should_not_change_original_table(self, strategy: Imputer.Strategy) -> None:
         table = Table(
             {
                 "a": [1, None, None],
@@ -315,7 +456,7 @@ def test_should_not_change_original_table(self, strategy: ImputerStrategy) -> No
         assert table == expected
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_get_names_of_added_columns(self, strategy: ImputerStrategy) -> None:
+    def test_get_names_of_added_columns(self, strategy: Imputer.Strategy) -> None:
         transformer = Imputer(strategy=strategy)
         with pytest.raises(TransformerNotFittedError):
             transformer.get_names_of_added_columns()
@@ -330,7 +471,7 @@ def test_get_names_of_added_columns(self, strategy: ImputerStrategy) -> None:
         assert transformer.get_names_of_added_columns() == []
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_get_names_of_changed_columns(self, strategy: ImputerStrategy) -> None:
+    def test_get_names_of_changed_columns(self, strategy: Imputer.Strategy) -> None:
         transformer = Imputer(strategy=strategy)
         with pytest.raises(TransformerNotFittedError):
             transformer.get_names_of_changed_columns()
@@ -344,7 +485,7 @@ def test_get_names_of_changed_columns(self, strategy: ImputerStrategy) -> None:
         assert transformer.get_names_of_changed_columns() == ["a", "b"]
 
     @pytest.mark.parametrize("strategy", strategies(), ids=lambda x: x.__class__.__name__)
-    def test_get_names_of_removed_columns(self, strategy: ImputerStrategy) -> None:
+    def test_get_names_of_removed_columns(self, strategy: Imputer.Strategy) -> None:
         transformer = Imputer(strategy=strategy)
         with pytest.raises(TransformerNotFittedError):
             transformer.get_names_of_removed_columns()
@@ -357,80 +498,3 @@ def test_get_names_of_removed_columns(self, strategy: ImputerStrategy) -> None:
         )
         transformer = transformer.fit(table, None)
         assert transformer.get_names_of_removed_columns() == []
-
-
-class TestHash:
-    @pytest.mark.parametrize(
-        ("strategy1", "strategy2"),
-        ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_should_return_same_hash_for_equal_strategy(
-        self,
-        strategy1: ImputerStrategy,
-        strategy2: ImputerStrategy,
-    ) -> None:
-        assert hash(strategy1) == hash(strategy2)
-
-    @pytest.mark.parametrize(
-        ("strategy1", "strategy2"),
-        ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_should_return_different_hash_for_unequal_strategy(
-        self,
-        strategy1: ImputerStrategy,
-        strategy2: ImputerStrategy,
-    ) -> None:
-        assert hash(strategy1) != hash(strategy2)
-
-
-class TestEq:
-
-    @pytest.mark.parametrize(
-        ("strategy1", "strategy2"),
-        ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_equal_strategy(
-        self,
-        strategy1: ImputerStrategy,
-        strategy2: ImputerStrategy,
-    ) -> None:
-        assert strategy1 == strategy2
-
-    @pytest.mark.parametrize(
-        "strategy",
-        ([x for x in strategies() if x.__class__]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_equal_identity_strategy(
-        self,
-        strategy: ImputerStrategy,
-    ) -> None:
-        assert strategy == strategy  # noqa: PLR0124
-
-    @pytest.mark.parametrize(
-        ("strategy1", "strategy2"),
-        ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_unequal_strategy(
-        self,
-        strategy1: ImputerStrategy,
-        strategy2: ImputerStrategy,
-    ) -> None:
-        assert strategy1 != strategy2
-
-
-class TestSizeof:
-    @pytest.mark.parametrize(
-        "strategy",
-        ([Imputer.Strategy.Constant(1)]),
-        ids=lambda x: x.__class__.__name__,
-    )
-    def test_sizeof_strategy(
-        self,
-        strategy: ImputerStrategy,
-    ) -> None:
-        assert sys.getsizeof(strategy) > sys.getsizeof(object())