From aba7b7829767f118953c8ee8bb6ae7612332def6 Mon Sep 17 00:00:00 2001
From: bees4ever <beesforever@gmx.de>
Date: Thu, 1 Mar 2018 12:09:29 +0100
Subject: [PATCH 1/2] Add a version beta version of the NSGA-II Algorithm based
 on Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and
 T. Meyarivan

---
 spotpy/algorithms/__init__.py |   3 +-
 spotpy/algorithms/nsgaii.py   | 299 ++++++++++++++++++++++++++++++++++
 2 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 spotpy/algorithms/nsgaii.py
diff --git a/spotpy/algorithms/__init__.py b/spotpy/algorithms/__init__.py
index f59834a3..99442c82 100644
--- a/spotpy/algorithms/__init__.py
+++ b/spotpy/algorithms/__init__.py
@@ -30,4 +30,5 @@
 from .abc import abc         # Artificial Bee Colony
 from .fscabc import fscabc   # Fitness Scaling Artificial Bee Colony
 from .dream import dream     # DiffeRential Evolution Adaptive Metropolis
-from .list import list       # Samples from  given spotpy database
\ No newline at end of file
+from .list import list       # Samples from  given spotpy database
+from .nsgaii import NSGAII   # A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II
diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py
new file mode 100644
index 00000000..0d29aea9
--- /dev/null
+++ b/spotpy/algorithms/nsgaii.py
@@ -0,0 +1,299 @@
+'''
+Copyright (c) 2018 by Benjamin Manns
+This file is part of Statistical Parameter Optimization Tool for Python(SPOTPY).
+:author: Benjamin Manns
+
+This file contains the NSGA-II Algorithm implemented for SPOTPY based on:
+- K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, "A fast and elitist multiobjective genetic algorithm:
+NSGA-II," in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, Apr 2002.
+doi: 10.1109/4235.996017
+URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=996017&isnumber=21497
+- http://www.cs.colostate.edu/~genitor/MiscPubs/tutorial.pdf (Mutation and Crossover Algorithm)
+- http://www.tik.ee.ethz.ch/file/6c0e384dceb283cd4301339a895b72b8/TIK-Report11.pdf (Tournament Selection)
+'''
+
+import numpy as np
+import pprint
+import copy
+
+import matplotlib.pyplot as pl
+from mpl_toolkits.mplot3d import Axes3D
+
+from spotpy.algorithms import _algorithm
+
+
+class ParaPop:
+    def __init__(self, params, m_vals=[], sim=None):
+        self.params = params
+        self.m_vals = m_vals
+        self.sim = sim
+
+    def __str__(self):
+        return "<ParaPop> with content " + str(self.m_vals)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class NSGAII(_algorithm):
+    def __init__(self, *args, **kwargs):
+        super(NSGAII, self).__init__(*args, **kwargs)
+        # self.all_objectives = [self.setup.__getattribute__(m) for m in dir(self.setup) if "objectivefunc" in m]
+
+        self.param_len = len(self.get_parameters())
+        self.generation = 0
+        self.length_of_objective_func = 2 # TODO make it better, do we need this
+
+        self.objfun_maxmin_list = None
+        self.objfun_normalize_list= []
+
+    def fast_non_dominated_sort(self, P):
+        S = {}
+        n = {}
+        F = {}
+        rank = {}
+        F[1] = {}
+
+        if self.objfun_maxmin_list is None:
+            # In the first iteration create this list to have it prepared for later use
+            self.objfun_maxmin_list = []
+            for _ in self.objectivefunction(self.setup.evaluation(), self.setup.simulation(P[0])):
+                self.objfun_maxmin_list.append([])
+
+        param_generator = ((p, list(P[p])) for p in P)
+        calculated_sims = list(self.repeat(param_generator))
+        for p, par_p, sim_p in calculated_sims:
+
+            S[p] = {}
+            S_p_index = 0
+            n[p] = 0
+            for q, par_q, sim_q in calculated_sims:
+
+                # check whether parameter set p or q is dominating so we test all objective functions here
+                # https://cims.nyu.edu/~gn387/glp/lec1.pdf / Definition / Dominance Relation
+
+                # m_diffs = np.array([])
+                m_vals_p = np.array([])
+                m_vals_q = np.array([])
+
+                for i, m in enumerate(self.objectivefunction(self.setup.evaluation(), sim_q)):
+                    m_vals_q = np.append(m_vals_q, m)
+                    self.objfun_maxmin_list[i].append(m)
+
+                for i, m in enumerate(self.objectivefunction(self.setup.evaluation(), sim_p)):
+                    m_vals_p = np.append(m_vals_p, m)
+                    self.objfun_maxmin_list[i].append(m)
+
+                m_diffs = m_vals_q - m_vals_p
+
+                # TODO ist Minimieren oder Maximieren richtig?
+
+                pp_q = ParaPop(np.array(P[q]), list(m_vals_q.tolist()), sim_q)
+
+                pp_p = ParaPop(np.array(P[q]), list(m_vals_p.tolist()), sim_p)
+
+                # Allow here also more then 2
+                # if p dominates q
+                if (m_diffs >= 0).all and (m_diffs > 0).any():
+
+                    S[p][S_p_index] = pp_q
+                    S_p_index += 1
+
+                # elif q dominates p:
+                elif (m_diffs <= 0).all() and (m_diffs < 0).any():
+                    # else:
+                    n[p] += 1
+
+            if n[p] == 0:
+                rank[p] = 1
+                F[1][p] = pp_p
+
+        i = 1
+        while len(F[i]) > 0:
+            Q = {}
+            # q_ind is just a useless indices which may has to change somehow, it is only for the dict we use
+            q_ind = 0
+            for p in F[i]:
+                for q in S[p]:
+                    n[q] -= 1
+                    if n[q] == 0:
+                        rank[q] = i + 1
+                        Q[q_ind] = S[p][q]
+                        q_ind += 1
+
+            i += 1
+            F[i] = Q
+
+        return F
+
+    def crowding_distance_assignement(self, I):
+        l = len(I)
+
+        if l > 2:
+
+            I = list(I.values())
+
+            ##################
+            # I = sort(I,m)  #
+            ##################
+
+            sorting_m = []
+
+            for i in I:
+                sorting_m.append(i.m_vals)
+
+            sorting_m = np.array(sorting_m)
+
+            new_order = np.argsort(np.sqrt(np.sum(sorting_m ** 2, 1)))
+
+            I = np.array(I)
+            I = I[new_order]
+
+
+            distance_I = list(np.repeat(0, l))
+            distance_I[0] = distance_I[l - 1] = np.inf
+
+            for_distance = []
+
+            for i in I:
+                for_distance.append(i.m_vals)
+
+            which_obj_fn = 0
+            for_distance = np.array(for_distance)
+            for k in for_distance.T:
+                tmp_dist = k[2:l] - k[0:l - 2]
+
+                distance_I[1:l - 1] += tmp_dist / self.objfun_normalize_list[which_obj_fn]
+
+                which_obj_fn += 1
+
+            return distance_I
+
+        else:
+            return [np.inf]
+
+    def sample(self, generations=2, paramsamp=20):
+        self.repetitions = int(generations)
+        self.status.repetitions = self.repetitions*paramsamp*2
+        R_0 = {}
+
+        for i in range(paramsamp * 2):
+            R_0[i] = list(self.setup.parameters()['random'])
+
+        while self.generation < self.repetitions:
+
+            F = self.fast_non_dominated_sort(R_0)
+
+            print("GENERATION: " + str(self.generation) + " of " + str(generations))
+            layer = 0
+            fig = pl.figure()
+
+            if self.length_of_objective_func == 2:
+
+                for i in F:
+                    if layer == 0:
+                        l_color = "b"
+                    else:
+                        l_color = "#"
+                        for _ in range(6):
+                            l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][
+                                np.random.randint(16)]
+                    for j in F[i]:
+                        pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o')
+                    layer += 1
+
+                pl.show()
+
+            elif self.length_of_objective_func == 3:
+
+                ax = fig.add_subplot(111, projection='3d')
+
+                for i in F:
+                    for j in F[i]:
+                        ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1],
+                                   F[i][j].m_vals[2])  # , lay_col[layer] + 'o')
+                    layer += 1
+
+                # ax.set_xlabel(m1_name)
+                # ax.set_ylabel(m2_name)
+                # ax.set_zlabel(m3_name)
+
+                pl.show()
+
+            # Now sort again
+            complete_sort_all_p = []
+
+            # post-proccesing min-max values of each objective function:
+            # reset the normalize list
+            self.objfun_normalize_list = []
+            for i in self.objfun_maxmin_list:
+                # fill the normalize list
+                self.objfun_normalize_list.append(abs(max(i)-min(i)))
+
+            # reset the objfun_maxmin_list
+            self.objfun_maxmin_list = None
+
+            cChain = 0
+            for k in F:
+
+                # Save fronts we have now before sorting and mutation
+                for o in F[k]:
+
+                    self.postprocessing(self.generation, F[k][o].params, F[k][o].sim, cChain)
+
+                F_I_distance = self.crowding_distance_assignement(F[k])
+
+                # print(F_I_distance)
+
+                # sort within the list and then add to general list
+                # the partial order <_n is defined as follows:
+                # i <_n j if(i_rank < j_rank) or (i_rank = j_rank
+                # and i_distance > j_distance )
+                i_distance_order = np.argsort(F_I_distance)
+
+                F_ar = np.array(list(F[k].values()))
+                if len(F[k]) > 0:
+                    for a in F_ar[i_distance_order]:
+                        complete_sort_all_p.append(a)
+
+                    # TODO Why is algorithm to take all values again, I think only the first which are the best
+                cChain +=1
+
+            N = paramsamp
+            complete_sort_all_p = np.array(complete_sort_all_p)
+            P_new = complete_sort_all_p[0:N]
+            Q_new = {}
+            M = len(P_new)
+            if M < N:
+                P_new = np.append(P_new, complete_sort_all_p[0:N - M])
+                if N > len(P_new):
+                    exit("We still have to few parameters after selecting parent elements")
+
+            list_index = 0
+            while list_index < N:
+                # select pairs... with tournament, because this is a sorted list we just use the
+                # first occurence of a pick "out of M (= length of P_new)
+                tmp_parm_1 = P_new[np.random.randint(0, M, 1)[0]].params
+                tmp_parm_2 = P_new[np.random.randint(0, M, 1)[0]].params
+
+                # select cross over point
+                xover_point = np.random.randint(0, self.param_len - 1, 1)[0]
+
+                # cross over
+                A = np.append(tmp_parm_2[0:xover_point], tmp_parm_1[xover_point:])
+
+                # mutation
+                sign = [0, 1, -1][np.random.randint(0, 3, 1)[0]]
+                Q_new[list_index] = A + sign * (A / 4)  # was 100 before
+
+                list_index += 1
+
+            self.generation += 1
+
+            # merge P and Q
+            for i in P_new.tolist():
+                Q_new[list_index] = i.params
+                list_index += 1
+            R_0 = Q_new
+
+        self.final_call()

From 566e23f3eb47802092ccf97a8057926eb5ebc8cd Mon Sep 17 00:00:00 2001
From: bees4ever <beesforever@gmx.de>
Date: Fri, 2 Mar 2018 09:03:13 +0100
Subject: [PATCH 2/2] Add a test for NSGA-II and fix the fail of describe test

---
 .travis.yml                        |   2 +-
 spotpy/algorithms/nsgaii.py        | 117 +++++++++++++++++++----------
 spotpy/examples/tutorial_nsgaii.py |  92 +++++++++++++++++++++++
 3 files changed, 171 insertions(+), 40 deletions(-)
 create mode 100644 spotpy/examples/tutorial_nsgaii.py

diff --git a/.travis.yml b/.travis.yml
index e0d6b969..77e229ce 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,7 +22,7 @@ install:
 
 script:
   - pip uninstall spotpy -y
-  - py.test  spotpy/unittests/test_* spotpy/examples/tutorial_rosenbrock.py  --cov spotpy --cov-report term-missing -v
+  - py.test  spotpy/unittests/test_* spotpy/examples/tutorial_rosenbrock.py  spotpy/examples/tutorial_nsgaii.py --cov spotpy --cov-report term-missing -v
 
 after_success:
   - coveralls
diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py
index 0d29aea9..12b8fc4d 100644
--- a/spotpy/algorithms/nsgaii.py
+++ b/spotpy/algorithms/nsgaii.py
@@ -13,11 +13,6 @@
 '''
 
 import numpy as np
-import pprint
-import copy
-
-import matplotlib.pyplot as pl
-from mpl_toolkits.mplot3d import Axes3D
 
 from spotpy.algorithms import _algorithm
 
@@ -36,13 +31,51 @@ def __repr__(self):
 
 
 class NSGAII(_algorithm):
+    """
+        Implements the "Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II
+        by Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and T. Meyarivan
+
+    """
+
     def __init__(self, *args, **kwargs):
+        """
+        Input
+        ----------
+        spot_setup: class
+            model: function
+                Should be callable with a parameter combination of the parameter-function
+                and return an list of simulation results (as long as evaluation list)
+            parameter: function
+                When called, it should return a random parameter combination. Which can
+                be e.g. uniform or Gaussian
+            objectivefunction: function
+                Should return the objectivefunction for a given list of a model simulation and
+                observation.
+            evaluation: function
+                Should return the true values as return by the model.
+
+        dbname: str
+            * Name of the database where parameter, objectivefunction value and simulation results will be saved.
+
+        dbformat: str
+            * ram: fast suited for short sampling time. no file will be created and results are saved in an array.
+            * csv: A csv file will be created, which you can import afterwards.
+
+        parallel: str
+            * seq: Sequentiel sampling (default): Normal iterations on one core of your cpu.
+            * mpi: Message Passing Interface: Parallel computing on cluster pcs (recommended for unix os).
+
+        save_sim: boolean
+            * True:  Simulation results will be saved
+            * False: Simulation results will not be saved
+        """
+
         super(NSGAII, self).__init__(*args, **kwargs)
         # self.all_objectives = [self.setup.__getattribute__(m) for m in dir(self.setup) if "objectivefunc" in m]
 
         self.param_len = len(self.get_parameters())
         self.generation = 0
-        self.length_of_objective_func = 2 # TODO make it better, do we need this
+        self.length_of_objective_func = 0
 
         self.objfun_maxmin_list = None
         self.objfun_normalize_list= []
@@ -55,9 +88,11 @@ def fast_non_dominated_sort(self, P):
         F[1] = {}
 
         if self.objfun_maxmin_list is None:
+            self.length_of_objective_func = 0
             # In the first iteration create this list to have it prepared for later use
             self.objfun_maxmin_list = []
             for _ in self.objectivefunction(self.setup.evaluation(), self.setup.simulation(P[0])):
+                self.length_of_objective_func += 1
                 self.objfun_maxmin_list.append([])
 
         param_generator = ((p, list(P[p])) for p in P)
@@ -185,40 +220,44 @@ def sample(self, generations=2, paramsamp=20):
             F = self.fast_non_dominated_sort(R_0)
 
             print("GENERATION: " + str(self.generation) + " of " + str(generations))
-            layer = 0
-            fig = pl.figure()
-
-            if self.length_of_objective_func == 2:
-
-                for i in F:
-                    if layer == 0:
-                        l_color = "b"
-                    else:
-                        l_color = "#"
-                        for _ in range(6):
-                            l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][
-                                np.random.randint(16)]
-                    for j in F[i]:
-                        pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o')
-                    layer += 1
-
-                pl.show()
-
-            elif self.length_of_objective_func == 3:
-
-                ax = fig.add_subplot(111, projection='3d')
-
-                for i in F:
-                    for j in F[i]:
-                        ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1],
-                                   F[i][j].m_vals[2])  # , lay_col[layer] + 'o')
-                    layer += 1
-
-                # ax.set_xlabel(m1_name)
-                # ax.set_ylabel(m2_name)
-                # ax.set_zlabel(m3_name)
 
-                pl.show()
+            # Debuggin Issue
+            # import matplotlib.pyplot as pl
+            # from mpl_toolkits.mplot3d import Axes3D
+            # layer = 0
+            # fig = pl.figure()
+            #
+            # if self.length_of_objective_func == 2:
+            #
+            #     for i in F:
+            #         if layer == 0:
+            #             l_color = "b"
+            #         else:
+            #             l_color = "#"
+            #             for _ in range(6):
+            #                 l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][
+            #                     np.random.randint(16)]
+            #         for j in F[i]:
+            #             pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o')
+            #         layer += 1
+            #
+            #     pl.show()
+            #
+            # elif self.length_of_objective_func == 3:
+            #
+            #     ax = fig.add_subplot(111, projection='3d')
+            #
+            #     for i in F:
+            #         for j in F[i]:
+            #             ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1],
+            #                        F[i][j].m_vals[2])  # , lay_col[layer] + 'o')
+            #         layer += 1
+            #
+            #     # ax.set_xlabel(m1_name)
+            #     # ax.set_ylabel(m2_name)
+            #     # ax.set_zlabel(m3_name)
+            #
+            #     pl.show()
 
             # Now sort again
             complete_sort_all_p = []
diff --git a/spotpy/examples/tutorial_nsgaii.py b/spotpy/examples/tutorial_nsgaii.py
new file mode 100644
index 00000000..e3cb0f86
--- /dev/null
+++ b/spotpy/examples/tutorial_nsgaii.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+
+from __future__ import division, print_function
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+try:
+    import spotpy
+except ImportError:
+    import sys
+
+    sys.path.append(".")
+    import spotpy
+
+import spotpy.algorithms
+from spotpy.examples.hymod_python.hymod import hymod
+import os
+import unittest
+
+
+class spot_setup(object):
+    def __init__(self):
+        # Transform [mm/day] into [l s-1], where 1.783 is the catchment area
+        self.Factor = 1.783 * 1000 * 1000 / (60 * 60 * 24)
+        # Load Observation data from file
+        self.PET, self.Precip = [], []
+        self.date, self.trueObs = [], []
+        self.owd = os.path.dirname(os.path.realpath(__file__))
+        self.hymod_path = self.owd + os.sep + 'hymod_python'
+        climatefile = open(self.hymod_path + os.sep + 'hymod_input.csv', 'r')
+        headerline = climatefile.readline()[:-1]
+
+        if ';' in headerline:
+            self.delimiter = ';'
+        else:
+            self.delimiter = ','
+        self.header = headerline.split(self.delimiter)
+        for line in climatefile:
+            values = line.strip().split(self.delimiter)
+            self.date.append(str(values[0]))
+            self.Precip.append(float(values[1]))
+            self.PET.append(float(values[2]))
+            self.trueObs.append(float(values[3]))
+
+        climatefile.close()
+        self.params = [spotpy.parameter.Uniform('cmax', low=1.0, high=500, optguess=412.33),
+                       spotpy.parameter.Uniform('bexp', low=0.1, high=2.0, optguess=0.1725),
+                       spotpy.parameter.Uniform('alpha', low=0.1, high=0.99, optguess=0.8127),
+                       spotpy.parameter.Uniform('Ks', low=0.0, high=0.10, optguess=0.0404),
+                       spotpy.parameter.Uniform('Kq', low=0.1, high=0.99, optguess=0.5592),
+                       spotpy.parameter.Uniform('fake1', low=0.1, high=10, optguess=0.5592),
+                       spotpy.parameter.Uniform('fake2', low=0.1, high=10, optguess=0.5592)]
+
+    def parameters(self):
+        return spotpy.parameter.generate(self.params)
+
+    def simulation(self, x):
+        data = hymod(self.Precip, self.PET, x[0], x[1], x[2], x[3], x[4])
+        sim = []
+        for val in data:
+            sim.append(val * self.Factor)
+        return sim[366:]
+
+    def evaluation(self):
+        return self.trueObs[366:]
+
+    def objectivefunction(self, simulation, evaluation, params=None):
+        # like = spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation,simulation)
+        # return -like
+        # like1 = spotpy.signatures.getSkewness(evaluation, simulation)
+        like1 = spotpy.objectivefunctions.agreementindex(evaluation, simulation)
+        like2 = spotpy.objectivefunctions.rmse(evaluation, simulation)
+        like3 = spotpy.signatures.getCoeffVariation(evaluation, simulation)
+        return [like1, like2, like3]
+
+
+
+class Test_NSGAII(unittest.TestCase):
+    def setUp(self):
+        self.sp_setup = spot_setup()
+        self.sampler = spotpy.algorithms.NSGAII(self.sp_setup, dbname='NSGA2', dbformat="csv")
+
+        self.sampler.sample(generations=5, paramsamp=40)
+
+    def test_sampler_output(self):
+        self.assertGreaterEqual(400, len(self.sampler.getdata()))
+        self.assertLessEqual(300, len(self.sampler.getdata()))
+