From ab6b1b2169a3741b12c8d281efa6b4d7d5ce5f9c Mon Sep 17 00:00:00 2001
From: Henry Webel <kzl465@ku.dk>
Date: Tue, 22 Oct 2024 20:15:34 +0200
Subject: [PATCH] :construction: fix ci runs (mamba and numpy related) (#81)

* :construction: switch mamba installation

- see if snakemake envs are somehow cached

* :bug: specify python version, move ls

* :construction: deactivate some workflow, run relatvie ls command

* try not to cache

* :construction: test using venv created by codespace with python 3.12

- might be that I need to create (not sure what change in runner configurations)

* try to use full snakemake installation

* :construction: use miniconda for pypi installation test

* try miniconda again

- snakemake environment has it's own mamba installation
- auto-activate environment "test"

* install build dependencies, fix ubuntu first

* :bug: try to put mamba below 2.0

https://github.com/snakemake/snakemake/issues/3108

* test should be activate per default

* :construction: conda env not activated...

* :bug: pip does not install in environment

* :construction: experiment

* :bug: shell was not iniated

* :bug: test installing njab separately

* :bug: order matters!

* try again new order, add umap-learn explicitly

* :bug: do not re-install njab

* restrict scipy (trapz missing in lifelines)

latest scipy not supported by lifelines

* :bug: exclude numpy 2.0 for now

* numpy try two

* swap numpy and njab, adapt other pkg to what it was before

* add back umap learn, relax constraints

* :construction: in package single requirement

single packages cannot be specified to just ignore the dependencies.

* :heavy_minus_sign: remove scipy dependency

- leave it to njab to install dependencies in a second step.

* :arrow_up: remove support for python 3.8 (end-of-life)

* :art: setuptools_scm uses tags to determine version, add tags

* :bug: tags not fetched without entire history

see https://github.com/actions/checkout/issues/1471

* :art: clean-up workflow file

* :sparkles: add njab after update to requirements

- enable again more workflows (using mamba constraint snakemake environement)

* :fire: remove comments, :rewind: add back tests

* :bug: make order explicit (by feat freq or bin and bin count)

* :bug: fix order of example more explicitly.

* :bug: actually test latest version of pimms, remove comments

* :bug: runs natively in colab without issues
---
 .github/workflows/ci.yaml                |  34 ++---
 .github/workflows/ci_workflow.yaml       |   7 +-
 .github/workflows/test_pkg_on_colab.yaml |   5 +-
 .github/workflows/workflow_website.yaml  |   4 +-
 .readthedocs.yaml                        |   4 +-
 environment.yml                          |   2 +-
 pimmslearn/imputation.py                 | 162 +----------------------
 pimmslearn/pandas/__init__.py            |   3 +-
 project/workflow/envs/pimms.yaml         |   2 +-
 pyproject.toml                           |  15 +--
 snakemake_env.yml                        |   2 +-
 tests/pandas/test_calc_errors.py         | 105 +++++++--------
 tests/test_imputation.py                 |  41 +-----
 13 files changed, 96 insertions(+), 290 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 7e16f5b60..7cb8b26a3 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,23 +21,19 @@ jobs:
              "macos-13",
              # "windows-latest" # rrcovNA cannot be build from source on windows-server
              ]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
     - name: Checkout
       uses: actions/checkout@v4
     - name: Set up Miniconda
-      # ! change action https://github.com/mamba-org/setup-micromamba
       uses: conda-incubator/setup-miniconda@v3
       with: 
-        miniforge-variant: Mambaforge
-        # miniforge-version: latest
-        use-mamba: true
-        channel-priority: disabled
         python-version: ${{ matrix.python-version }}
+        channel-priority: strict
         environment-file: snakemake_env.yml
         activate-environment: snakemake
         auto-activate-base: true
-        # auto-update-conda: true
+        auto-update-conda: true
     - name: inspect-conda-environment
       run: |
         conda info
@@ -45,11 +41,6 @@ jobs:
         conda env export --from-history --no-builds > environment.yml
         conda env export --no-builds
         conda env export --no-builds > environment_w_versions.yml
-    # - name: test-r-kernel-imports
-    #   run: |
-    #     Rscript -e "library(stringi)"
-    #     Rscript -e "library(stringr)"
-    #     Rscript -e "library(reshape2)"
     - name: Dry-Run demo workflow (integration test)
       run: | 
        cd project
@@ -75,8 +66,8 @@ jobs:
         name: ${{ matrix.os }}-${{ matrix.python-version }}-example-workflow-results
         path: |
           project/runs/example/
-          environment.yml
-          environment_w_versions.yml
+          snakemake_env
+          project/.snakemake/conda/
 
   run-unit-local-pip-installation:
     runs-on: ${{ matrix.os }}
@@ -85,25 +76,28 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-tags: true
+          fetch-depth: 0
 
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: install pimms
-        run: python -m pip install .
-      
+        run: pip install .
+
       - name: Install pytest
-        run: python -m pip install pytest pytest-cov
+        run: pip install pytest pytest-cov
       
       - name: Run pytest
         run: pytest .
 
       - name: Install papermill
-        run: python -m pip install papermill ipykernel
+        run: pip install papermill ipykernel
       
       - name: View papermill help message for notebooks (as scripts)
         run: |
@@ -141,4 +135,4 @@ jobs:
       - uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/ci_workflow.yaml b/.github/workflows/ci_workflow.yaml
index 20a64f2db..7475ccdc4 100644
--- a/.github/workflows/ci_workflow.yaml
+++ b/.github/workflows/ci_workflow.yaml
@@ -1,4 +1,4 @@
-name: run workflow with conda envs
+name: run workflow (v1) with conda envs
 on:
   push:
     branches: [main, dev]
@@ -31,13 +31,12 @@ jobs:
         # ! change action https://github.com/mamba-org/setup-micromamba
         uses: conda-incubator/setup-miniconda@v3
         with:
-          miniforge-variant: Mambaforge
-          use-mamba: true
-          channel-priority: disabled
+          channel-priority: strict
           python-version: ${{ matrix.python-version }}
           environment-file: snakemake_env.yml
           activate-environment: snakemake
           auto-activate-base: true
+          auto-update-conda: true
       - name: inspect-conda-environment
         run: |
           conda info
diff --git a/.github/workflows/test_pkg_on_colab.yaml b/.github/workflows/test_pkg_on_colab.yaml
index 9fae4d14a..6109c0810 100644
--- a/.github/workflows/test_pkg_on_colab.yaml
+++ b/.github/workflows/test_pkg_on_colab.yaml
@@ -20,11 +20,12 @@ jobs:
         - name: Install pimms-learn (from branch) and papermill
           if: github.event_name == 'pull_request'
           run: | 
-            python3 -m pip install pimms-learn papermill
+            pip install .
+            pip install papermill 
         - name: Install pimms-learn (from PyPI) and papermill
           if: github.event_name == 'schedule'
           run: | 
-            python3 -m pip install pimms-learn papermill
+            pip install pimms-learn papermill
         - name: Run tutorial
           run: |
             cd project
diff --git a/.github/workflows/workflow_website.yaml b/.github/workflows/workflow_website.yaml
index 1d794bdd7..aa9fcaac2 100644
--- a/.github/workflows/workflow_website.yaml
+++ b/.github/workflows/workflow_website.yaml
@@ -1,4 +1,4 @@
-name: Build workflow website on public Alzheimer dataset (for protein groups)
+name: Build workflow (v2) website on public Alzheimer dataset (for protein groups)
 on:
   pull_request:
     branches: [main, dev]
@@ -73,4 +73,4 @@ jobs:
       uses: peaceiris/actions-gh-pages@v4
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: project/runs/alzheimer_study/_build/
\ No newline at end of file
+        publish_dir: project/runs/alzheimer_study/_build/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 6e817d6be..3199f225a 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,7 +9,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.8"
+    python: "3.10"
     # You can also specify other tool versions:
     # nodejs: "19"
     # rust: "1.64"
@@ -32,4 +32,4 @@ python:
    - method: pip
      path: .
      extra_requirements:
-      - docs
\ No newline at end of file
+      - docs
diff --git a/environment.yml b/environment.yml
index aad91462b..7aa4526ed 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,7 +9,7 @@ channels:
   - plotly
   # - defaults
 dependencies:
-  - python>=3.8,<=3.12
+  - python>=3.9,<=3.12
   - numpy
   - pandas>=1
   - scipy>=1.6
diff --git a/pimmslearn/imputation.py b/pimmslearn/imputation.py
index 4dd553ae9..36b065516 100644
--- a/pimmslearn/imputation.py
+++ b/pimmslearn/imputation.py
@@ -5,12 +5,11 @@
 
 
 """
-from typing import Tuple, Dict
-from sklearn.neighbors import NearestNeighbors
-import scipy
+import logging
+from typing import Dict, Tuple
+
 import numpy as np
 import pandas as pd
-import logging
 
 logger = logging.getLogger(__name__)
 
@@ -18,152 +17,6 @@
 RANDOMSEED = 123
 
 
-def impute_missing(protein_values, mean=None, std=None):
-    """
-    Imputation is based on the mean and standard deviation
-    from the protein_values.
-    If mean and standard deviation (std) are given,
-    missing values are imputed and protein_values are returned imputed.
-    If no mean and std are given, the mean and std are computed from
-    the non-missing protein_values.
-
-    Parameters
-    ----------
-    protein_values: Iterable
-    mean: float
-    std: float
-
-    Returns
-    ------
-    protein_values: pandas.Series
-    """
-    raise NotImplementedError('Will be the main function combining features')
-    # clip by zero?
-
-
-def _select_data(data: pd.DataFrame, threshold: float):
-    """Select (protein-) columns for imputation.
-
-    Based on the threshold representing the minimum proportion of available
-    data per protein, the columns of a `pandas.DataFrame` are selected.
-
-    Parameters
-    ----------
-    data: pandas.DataFrame
-    threshold: float
-        Threshold of percentage of non-missing values to select a column/feature.
-    """
-    columns_to_impute = data.notnull().mean() >= threshold
-    return columns_to_impute
-
-
-def _sparse_coo_array(data: pd.DataFrame):
-    """Return a sparse scipy matrix from dense `pandas.DataFrame` with many
-    missing values.
-    """
-    indices = np.nonzero(~np.isnan(data.to_numpy()))
-    data_selected_sparse = data.to_numpy()
-    data_selected_sparse = scipy.sparse.coo_matrix(
-        (data_selected_sparse[indices], indices),
-        shape=data_selected_sparse.shape)
-    return data_selected_sparse
-
-
-def _get_weighted_mean(distances, data):
-    """Compute weighted mean ignoring
-    identical entries"""
-    mask = distances > 0.0
-    weights = distances[mask] / distances[mask].sum()
-    weighted_sum = data.loc[mask].mul(weights, axis=0)
-    mean_imputed = weighted_sum.sum() / sum(mask)
-    return mean_imputed
-
-
-# define imputation methods
-# could be done in PCA transformed space
-def imputation_KNN(data, alone=True, threshold=0.5):
-    """
-
-
-    Parameters
-    ----------
-    data: pandas.DataFrame
-    alone: bool  # is not used
-    threshold: float
-        Threshold of missing data by column in interval (0, 1)
-    """
-    mask_selected = _select_data(data=data, threshold=threshold)
-    data_selected = data.loc[:, mask_selected].copy()
-    data_selected_sparse = _sparse_coo_array(data_selected)
-    # impute
-    knn_fitted = NearestNeighbors(n_neighbors=3, algorithm='brute').fit(
-        data_selected_sparse)
-    fit_distances, fit_neighbors = knn_fitted.kneighbors(data_selected_sparse)
-    for i, (distances, ids) in enumerate(zip(fit_distances, fit_neighbors)):
-        mean_imputed = _get_weighted_mean(distances, data_selected.loc[ids])
-        if all(distances == 0.0):
-            logger.warning(f"Did not find any neighbor for int-id: {i}")
-        else:
-            assert i == ids[distances == 0.0], (
-                "None or more then one identical data points "
-                "for ids: {}".format(ids[distances == 0.0])
-            )
-        mask = data_selected.iloc[i].isna()
-        data_selected.loc[i, mask] = mean_imputed.loc[mask]  # SettingWithCopyError
-
-    data.update(data_selected)
-    return data
-
-
-def imputation_normal_distribution(log_intensities: pd.Series,
-                                   mean_shift=1.8,
-                                   std_shrinkage=0.3,
-                                   copy=True):
-    """Impute missing log-transformed intensity values of a single feature.
-    Samples one value for imputation for all samples.
-
-    Parameters
-    ----------
-    log_intensities: pd.Series
-        Series of normally distributed values of a single feature (for all samples/runs).
-        Here usually log-transformed intensities.
-    mean_shift: integer, float
-        Shift the mean of the log_intensities by factors of their standard
-        deviation to the negative.
-    std_shrinkage: float
-        Value greater than zero by which to shrink (or inflate) the
-        standard deviation of the log_intensities.
-    """
-    np.random.seed(RANDOMSEED)
-    if not isinstance(log_intensities, pd.Series):
-        try:
-            log_intensities.Series(log_intensities)
-            logger.warning("Series created of Iterable.")
-        except BaseException:
-            raise ValueError(
-                "Plese provided data which is a pandas.Series or an Iterable")
-    if mean_shift < 0:
-        raise ValueError(
-            "Please specify a positive float as the std.-dev. is non-negative.")
-    if std_shrinkage <= 0:
-        raise ValueError(
-            "Please specify a positive float as shrinkage factor for std.-dev.")
-    if std_shrinkage >= 1:
-        logger.warning("Standard Deviation will increase for imputed values.")
-
-    mean = log_intensities.mean()
-    std = log_intensities.std()
-
-    mean_shifted = mean - (std * mean_shift)
-    std_shrinked = std * std_shrinkage
-
-    if copy:
-        log_intensities = log_intensities.copy(deep=True)
-
-    return log_intensities.where(log_intensities.notna(),
-                                 np.random.normal(mean_shifted, std_shrinked))
-
-
 def impute_shifted_normal(df_wide: pd.DataFrame,
                           mean_shift: float = 1.8,
                           std_shrinkage: float = 0.3,
@@ -224,15 +77,6 @@ def impute_shifted_normal(df_wide: pd.DataFrame,
     return imputed_shifted_normal
 
 
-def imputation_mixed_norm_KNN(data):
-    # impute columns with less than 50% missing values with KNN
-    data = imputation_KNN(data, alone=False)  # ToDo: Alone is not used.
-    # impute remaining columns based on the distribution of the protein
-    data = imputation_normal_distribution(
-        data, mean_shift=1.8, std_shrinkage=0.3)
-    return data
-
-
 def compute_moments_shift(observed: pd.Series, imputed: pd.Series,
                           names: Tuple[str, str] = ('observed', 'imputed')) -> Dict[str, float]:
     """Summary of overall shift of mean and std. dev. of predictions for a imputation method."""
diff --git a/pimmslearn/pandas/__init__.py b/pimmslearn/pandas/__init__.py
index 4be42b68d..fa69cd7af 100644
--- a/pimmslearn/pandas/__init__.py
+++ b/pimmslearn/pandas/__init__.py
@@ -7,7 +7,8 @@
 import omegaconf
 import pandas as pd
 
-from pimmslearn.pandas.calc_errors import calc_errors_per_feat, get_absolute_error
+from pimmslearn.pandas.calc_errors import (calc_errors_per_feat,
+                                           get_absolute_error)
 
 __all__ = [
     'calc_errors_per_feat',
diff --git a/project/workflow/envs/pimms.yaml b/project/workflow/envs/pimms.yaml
index 9d1c927f3..a2ab6f0a6 100644
--- a/project/workflow/envs/pimms.yaml
+++ b/project/workflow/envs/pimms.yaml
@@ -9,7 +9,7 @@ channels:
   - plotly
   # - defaults
 dependencies:
-  - python>=3.8,<=3.12
+  - python>=3.9,<=3.12
   - numpy
   - pandas>=1
   - scipy>=1.6
diff --git a/pyproject.toml b/pyproject.toml
index 571d9cb63..73bcb27f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ name = "pimms-learn"
 # See the section below: [tools.setuptools.dynamic]
 dynamic = ["version"]
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 # These are keywords
 classifiers = [
   "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
@@ -16,20 +16,17 @@ classifiers = [
   "Topic :: Scientific/Engineering :: Bio-Informatics",
 ]
 dependencies = [
-  "njab>=0.0.8",
-  "numpy",
-  "matplotlib",
   "pandas",
-  "plotly",
+  "numpy",
   "torch",
+  "fastai",
   "scikit-learn>=1.0",
-  "scipy",
   "seaborn",
-  "fastai",
+  "matplotlib",
+  "plotly", # not used in library, but workflow
   "omegaconf",
-  "tqdm",
-  "mrmr-selection",
   "pingouin",
+  "njab>=0.1"
 ]
 
 [project.scripts]
diff --git a/snakemake_env.yml b/snakemake_env.yml
index 7713b7b18..fc0ab023f 100644
--- a/snakemake_env.yml
+++ b/snakemake_env.yml
@@ -5,4 +5,4 @@ channels:
   - defaults
 dependencies:
   - snakemake-minimal
-  - mamba
+  - mamba<2.0
diff --git a/tests/pandas/test_calc_errors.py b/tests/pandas/test_calc_errors.py
index 63b47adad..8af9ffd6f 100644
--- a/tests/pandas/test_calc_errors.py
+++ b/tests/pandas/test_calc_errors.py
@@ -24,11 +24,9 @@ def example_data():
                         columns=['observed'] + ['model_' + str(i + 1) for i in range(4)])
     data.columns.name = 'model'
     data.index.name = 'feat'
-    data['freq_feat'] = [4, 5, 5, 4, 6, 7, 7, 9, 8, 6]
+    data['freq_feat'] = [4, 4, 5, 5, 5, 6, 7, 9, 8, 6]
     return data
 
-# %%
-
 
 def test_get_absolute_error(example_data):
     expected = {'feat': {0: 'feat_0',
@@ -89,58 +87,57 @@ def test_get_absolute_error(example_data):
 
 def test_calc_errors_per_feat(example_data):
     expected = {'feat': {0: 'feat_0',
-                         1: 'feat_1',
-                         2: 'feat_0',
+                         1: 'feat_0',
+                         2: 'feat_1',
                          3: 'feat_1',
                          4: 'feat_1',
-                         5: 'feat_6',
-                         6: 'feat_2',
+                         5: 'feat_2',
+                         6: 'feat_6',
                          7: 'feat_3',
                          8: 'feat_5',
                          9: 'feat_4'},
                 'model_1': {0: 1.0836015099999994,
-                            1: 0.38399649333333247,
-                            2: 1.0836015099999994,
+                            1: 1.0836015099999994,
+                            2: 0.38399649333333247,
                             3: 0.38399649333333247,
                             4: 0.38399649333333247,
-                            5: 0.3581477100000008,
-                            6: 1.0785032900000004,
+                            5: 1.0785032900000004,
+                            6: 0.3581477100000008,
                             7: 0.5197284500000023,
                             8: 0.35989225000000147,
                             9: 0.25562937999999846},
                 'model_2': {0: 0.6558889949999998,
-                            1: 0.30025493000000125,
-                            2: 0.6558889949999998,
+                            1: 0.6558889949999998,
+                            2: 0.30025493000000125,
                             3: 0.30025493000000125,
                             4: 0.30025493000000125,
-                            5: 0.10481768000000002,
-                            6: 0.6079609700000006,
+                            5: 0.6079609700000006,
+                            6: 0.10481768000000002,
                             7: 0.48225405000000166,
                             8: 0.3109490500000014,
                             9: 0.24097977999999998},
                 'model_3': {0: 1.8424256349999997,
-                            1: 0.3030794033333339,
-                            2: 1.8424256349999997,
+                            1: 1.8424256349999997,
+                            2: 0.3030794033333339,
                             3: 0.3030794033333339,
                             4: 0.3030794033333339,
-                            5: 0.025569629999999677,
-                            6: 1.3011469200000008,
+                            5: 1.3011469200000008,
+                            6: 0.025569629999999677,
                             7: 0.6282909300000021,
                             8: 0.749302710000002,
                             9: 0.04352294999999984},
                 'model_4': {0: 1.3207320749999987,
-                            1: 0.6042852166666677,
-                            2: 1.3207320749999987,
+                            1: 1.3207320749999987,
+                            2: 0.6042852166666677,
                             3: 0.6042852166666677,
                             4: 0.6042852166666677,
-                            5: 0.1415143900000011,
-                            6: 1.2042582899999985,
+                            5: 1.2042582899999985,
+                            6: 0.1415143900000011,
                             7: 0.8281038200000026,
                             8: 0.5444545000000005,
                             9: 0.06842009000000004},
-                'freq_feat': {0: 4, 1: 4, 2: 5, 3: 5, 4: 6, 5: 6, 6: 7, 7: 7, 8: 8, 9: 9},
-                'n_obs': {0: 2, 1: 3, 2: 2, 3: 3, 4: 3, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}}
-
+                'freq_feat': {0: 4, 1: 4, 2: 5, 3: 5, 4: 5, 5: 6, 6: 6, 7: 7, 8: 8, 9: 9},
+                'n_obs': {0: 2, 1: 2, 2: 3, 3: 3, 4: 3, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}}
     actual = calc_errors.calc_errors_per_feat(
         pred=example_data.drop('freq_feat', axis=1),
         freq_feat=example_data['freq_feat']).reset_index().to_dict()
@@ -151,56 +148,56 @@ def test_calc_errors_per_bin(example_data):
     expected = {'feat': {0: 'feat_0',
                          1: 'feat_0',
                          2: 'feat_1',
-                         3: 'feat_2',
-                         4: 'feat_3',
+                         3: 'feat_5',
+                         4: 'feat_2',
                          5: 'feat_4',
-                         6: 'feat_5',
+                         6: 'feat_3',
                          7: 'feat_1',
-                         8: 'feat_1',
-                         9: 'feat_6'},
+                         8: 'feat_6',
+                         9: 'feat_1'},
                 'model_1': {0: 1.7588900899999977,
                             1: 0.408312930000001,
                             2: 0.03497017999999841,
-                            3: 1.0785032900000004,
-                            4: 0.5197284500000023,
+                            3: 0.35989225000000147,
+                            4: 1.0785032900000004,
                             5: 0.25562937999999846,
-                            6: 0.35989225000000147,
+                            6: 0.5197284500000023,
                             7: 0.31798253999999915,
-                            8: 0.7990367599999999,
-                            9: 0.3581477100000008},
+                            8: 0.3581477100000008,
+                            9: 0.7990367599999999},
                 'model_2': {0: 0.9619296899999981,
                             1: 0.34984830000000144,
                             2: 0.04799503999999999,
-                            3: 0.6079609700000006,
-                            4: 0.48225405000000166,
+                            3: 0.3109490500000014,
+                            4: 0.6079609700000006,
                             5: 0.24097977999999998,
-                            6: 0.3109490500000014,
+                            6: 0.48225405000000166,
                             7: 0.055784630000001556,
-                            8: 0.7969851200000022,
-                            9: 0.10481768000000002},
+                            8: 0.10481768000000002,
+                            9: 0.7969851200000022},
                 'model_3': {0: 2.9334374200000006,
                             1: 0.7514138499999987,
                             2: 0.023260270000001526,
-                            3: 1.3011469200000008,
-                            4: 0.6282909300000021,
+                            3: 0.749302710000002,
+                            4: 1.3011469200000008,
                             5: 0.04352294999999984,
-                            6: 0.749302710000002,
+                            6: 0.6282909300000021,
                             7: 0.18840471000000036,
-                            8: 0.6975732299999997,
-                            9: 0.025569629999999677},
+                            8: 0.025569629999999677,
+                            9: 0.6975732299999997},
                 'model_4': {0: 2.1805211699999987,
                             1: 0.46094297999999867,
                             2: 0.1140570700000012,
-                            3: 1.2042582899999985,
-                            4: 0.8281038200000026,
+                            3: 0.5444545000000005,
+                            4: 1.2042582899999985,
                             5: 0.06842009000000004,
-                            6: 0.5444545000000005,
+                            6: 0.8281038200000026,
                             7: 0.7145071600000001,
-                            8: 0.9842914200000017,
-                            9: 0.1415143900000011},
-                'bin': {0: 25, 1: 30, 2: 31, 3: 26, 4: 29, 5: 29, 6: 26, 7: 28, 8: 28, 9: 28},
+                            8: 0.1415143900000011,
+                            9: 0.9842914200000017},
+                'bin': {0: 25, 1: 30, 2: 31, 3: 26, 4: 26, 5: 29, 6: 29, 7: 28, 8: 28, 9: 28},
                 'n_obs': {0: 1, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 2, 7: 3, 8: 3, 9: 3}}
-
     actual = calc_errors.calc_errors_per_bin(
-        example_data.drop('freq_feat', axis=1)).reset_index().to_dict()
+        example_data.drop('freq_feat', axis=1)
+        ).sort_values(["n_obs", "bin", "model_1"]).reset_index().to_dict()
     assert actual == expected
diff --git a/tests/test_imputation.py b/tests/test_imputation.py
index 0c98f77bd..61cd3068f 100644
--- a/tests/test_imputation.py
+++ b/tests/test_imputation.py
@@ -1,9 +1,3 @@
-from pathlib import Path
-import numpy as np
-import pandas as pd
-import pytest
-
-from pimmslearn.imputation import imputation_KNN, imputation_normal_distribution, impute_shifted_normal
 """
 # Test Data set was created from a sample by shuffling:
 
@@ -19,6 +13,13 @@
 data.apply(numpy.random.shuffle, axis=1)
 data.to_csv('test_data.csv')
 """
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from pimmslearn.imputation import impute_shifted_normal
 
 
 @pytest.fixture
@@ -29,34 +30,6 @@ def example_data():
     example_data_path = Path(__file__).resolve().parent / 'test_data.csv'
     return pd.read_csv(example_data_path, index_col='id')
 
-# def test_impute_missing():
-#     pass
-
-
-def test_imputation_KNN(example_data):
-    threshold = 0.55
-    data = example_data.copy()
-    data_transformed = imputation_KNN(data, threshold=threshold)
-    columns_to_impute = data.notnull().mean() >= threshold
-    columns_to_impute = columns_to_impute[columns_to_impute].index
-    assert all(data_transformed.loc[:, columns_to_impute].isna().sum() < 15)
-    n_not_to_impute = data.loc[:,
-                               data.notnull().mean() < threshold].isna().sum()
-    assert all(data_transformed.loc[:, n_not_to_impute.index].isna().sum()
-               == n_not_to_impute)
-
-
-def test_imputation_normal_dist():
-    log_intensities = pd.Series([26.0, np.nan, 24.0, 25.0, np.nan])
-    imputed = imputation_normal_distribution(log_intensities)
-    imputed = round(imputed, ndigits=5)
-    assert imputed.equals(
-        pd.Series([26.0, 22.87431, 24.0, 25.0, 22.87431])
-    )
-
-# def test_imputation_mixed_norm_KNN():
-#     pass
-
 
 @pytest.mark.parametrize('axis', [0, 1])
 def test_impute_shifted_normal(example_data, axis):