From aba7b7829767f118953c8ee8bb6ae7612332def6 Mon Sep 17 00:00:00 2001 From: bees4ever Date: Thu, 1 Mar 2018 12:09:29 +0100 Subject: [PATCH 1/2] Add a version beta version of the NSGA-II Algorithm based on Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and T. Meyarivan --- spotpy/algorithms/__init__.py | 3 +- spotpy/algorithms/nsgaii.py | 299 ++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 spotpy/algorithms/nsgaii.py diff --git a/spotpy/algorithms/__init__.py b/spotpy/algorithms/__init__.py index f59834a3..99442c82 100644 --- a/spotpy/algorithms/__init__.py +++ b/spotpy/algorithms/__init__.py @@ -30,4 +30,5 @@ from .abc import abc # Artificial Bee Colony from .fscabc import fscabc # Fitness Scaling Artificial Bee Colony from .dream import dream # DiffeRential Evolution Adaptive Metropolis -from .list import list # Samples from given spotpy database \ No newline at end of file +from .list import list # Samples from given spotpy database +from .nsgaii import NSGAII # A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py new file mode 100644 index 00000000..0d29aea9 --- /dev/null +++ b/spotpy/algorithms/nsgaii.py @@ -0,0 +1,299 @@ +''' +Copyright (c) 2018 by Benjamin Manns +This file is part of Statistical Parameter Optimization Tool for Python(SPOTPY). +:author: Benjamin Manns + +This file contains the NSGA-II Algorithm implemented for SPOTPY based on: +- K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, "A fast and elitist multiobjective genetic algorithm: +NSGA-II," in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, Apr 2002. +doi: 10.1109/4235.996017 +URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=996017&isnumber=21497 +- http://www.cs.colostate.edu/~genitor/MiscPubs/tutorial.pdf (Mutation and Crossover Algorithm) +- http://www.tik.ee.ethz.ch/file/6c0e384dceb283cd4301339a895b72b8/TIK-Report11.pdf (Tournament Selection) +''' + +import numpy as np +import pprint +import copy + +import matplotlib.pyplot as pl +from mpl_toolkits.mplot3d import Axes3D + +from spotpy.algorithms import _algorithm + + +class ParaPop: + def __init__(self, params, m_vals=[], sim=None): + self.params = params + self.m_vals = m_vals + self.sim = sim + + def __str__(self): + return " with content " + str(self.m_vals) + + def __repr__(self): + return self.__str__() + + +class NSGAII(_algorithm): + def __init__(self, *args, **kwargs): + super(NSGAII, self).__init__(*args, **kwargs) + # self.all_objectives = [self.setup.__getattribute__(m) for m in dir(self.setup) if "objectivefunc" in m] + + self.param_len = len(self.get_parameters()) + self.generation = 0 + self.length_of_objective_func = 2 # TODO make it better, do we need this + + self.objfun_maxmin_list = None + self.objfun_normalize_list= [] + + def fast_non_dominated_sort(self, P): + S = {} + n = {} + F = {} + rank = {} + F[1] = {} + + if self.objfun_maxmin_list is None: + # In the first iteration create this list to have it prepared for later use + self.objfun_maxmin_list = [] + for _ in self.objectivefunction(self.setup.evaluation(), self.setup.simulation(P[0])): + self.objfun_maxmin_list.append([]) + + param_generator = ((p, list(P[p])) for p in P) + calculated_sims = list(self.repeat(param_generator)) + for p, par_p, sim_p in calculated_sims: + + S[p] = {} + S_p_index = 0 + n[p] = 0 + for q, par_q, sim_q in calculated_sims: + + # check whether parameter set p or q is dominating so we test all objective functions here + # https://cims.nyu.edu/~gn387/glp/lec1.pdf / Definition / Dominance Relation + + # m_diffs = np.array([]) + m_vals_p = np.array([]) + m_vals_q = np.array([]) + + for i, m in enumerate(self.objectivefunction(self.setup.evaluation(), sim_q)): + m_vals_q = np.append(m_vals_q, m) + self.objfun_maxmin_list[i].append(m) + + for i, m in enumerate(self.objectivefunction(self.setup.evaluation(), sim_p)): + m_vals_p = np.append(m_vals_p, m) + self.objfun_maxmin_list[i].append(m) + + m_diffs = m_vals_q - m_vals_p + + # TODO ist Minimieren oder Maximieren richtig? + + pp_q = ParaPop(np.array(P[q]), list(m_vals_q.tolist()), sim_q) + + pp_p = ParaPop(np.array(P[q]), list(m_vals_p.tolist()), sim_p) + + # Allow here also more then 2 + # if p dominates q + if (m_diffs >= 0).all and (m_diffs > 0).any(): + + S[p][S_p_index] = pp_q + S_p_index += 1 + + # elif q dominates p: + elif (m_diffs <= 0).all() and (m_diffs < 0).any(): + # else: + n[p] += 1 + + if n[p] == 0: + rank[p] = 1 + F[1][p] = pp_p + + i = 1 + while len(F[i]) > 0: + Q = {} + # q_ind is just a useless indices which may has to change somehow, it is only for the dict we use + q_ind = 0 + for p in F[i]: + for q in S[p]: + n[q] -= 1 + if n[q] == 0: + rank[q] = i + 1 + Q[q_ind] = S[p][q] + q_ind += 1 + + i += 1 + F[i] = Q + + return F + + def crowding_distance_assignement(self, I): + l = len(I) + + if l > 2: + + I = list(I.values()) + + ################## + # I = sort(I,m) # + ################## + + sorting_m = [] + + for i in I: + sorting_m.append(i.m_vals) + + sorting_m = np.array(sorting_m) + + new_order = np.argsort(np.sqrt(np.sum(sorting_m ** 2, 1))) + + I = np.array(I) + I = I[new_order] + + + distance_I = list(np.repeat(0, l)) + distance_I[0] = distance_I[l - 1] = np.inf + + for_distance = [] + + for i in I: + for_distance.append(i.m_vals) + + which_obj_fn = 0 + for_distance = np.array(for_distance) + for k in for_distance.T: + tmp_dist = k[2:l] - k[0:l - 2] + + distance_I[1:l - 1] += tmp_dist / self.objfun_normalize_list[which_obj_fn] + + which_obj_fn += 1 + + return distance_I + + else: + return [np.inf] + + def sample(self, generations=2, paramsamp=20): + self.repetitions = int(generations) + self.status.repetitions = self.repetitions*paramsamp*2 + R_0 = {} + + for i in range(paramsamp * 2): + R_0[i] = list(self.setup.parameters()['random']) + + while self.generation < self.repetitions: + + F = self.fast_non_dominated_sort(R_0) + + print("GENERATION: " + str(self.generation) + " of " + str(generations)) + layer = 0 + fig = pl.figure() + + if self.length_of_objective_func == 2: + + for i in F: + if layer == 0: + l_color = "b" + else: + l_color = "#" + for _ in range(6): + l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][ + np.random.randint(16)] + for j in F[i]: + pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o') + layer += 1 + + pl.show() + + elif self.length_of_objective_func == 3: + + ax = fig.add_subplot(111, projection='3d') + + for i in F: + for j in F[i]: + ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1], + F[i][j].m_vals[2]) # , lay_col[layer] + 'o') + layer += 1 + + # ax.set_xlabel(m1_name) + # ax.set_ylabel(m2_name) + # ax.set_zlabel(m3_name) + + pl.show() + + # Now sort again + complete_sort_all_p = [] + + # post-proccesing min-max values of each objective function: + # reset the normalize list + self.objfun_normalize_list = [] + for i in self.objfun_maxmin_list: + # fill the normalize list + self.objfun_normalize_list.append(abs(max(i)-min(i))) + + # reset the objfun_maxmin_list + self.objfun_maxmin_list = None + + cChain = 0 + for k in F: + + # Save fronts we have now before sorting and mutation + for o in F[k]: + + self.postprocessing(self.generation, F[k][o].params, F[k][o].sim, cChain) + + F_I_distance = self.crowding_distance_assignement(F[k]) + + # print(F_I_distance) + + # sort within the list and then add to general list + # the partial order <_n is defined as follows: + # i <_n j if(i_rank < j_rank) or (i_rank = j_rank + # and i_distance > j_distance ) + i_distance_order = np.argsort(F_I_distance) + + F_ar = np.array(list(F[k].values())) + if len(F[k]) > 0: + for a in F_ar[i_distance_order]: + complete_sort_all_p.append(a) + + # TODO Why is algorithm to take all values again, I think only the first which are the best + cChain +=1 + + N = paramsamp + complete_sort_all_p = np.array(complete_sort_all_p) + P_new = complete_sort_all_p[0:N] + Q_new = {} + M = len(P_new) + if M < N: + P_new = np.append(P_new, complete_sort_all_p[0:N - M]) + if N > len(P_new): + exit("We still have to few parameters after selecting parent elements") + + list_index = 0 + while list_index < N: + # select pairs... with tournament, because this is a sorted list we just use the + # first occurence of a pick "out of M (= length of P_new) + tmp_parm_1 = P_new[np.random.randint(0, M, 1)[0]].params + tmp_parm_2 = P_new[np.random.randint(0, M, 1)[0]].params + + # select cross over point + xover_point = np.random.randint(0, self.param_len - 1, 1)[0] + + # cross over + A = np.append(tmp_parm_2[0:xover_point], tmp_parm_1[xover_point:]) + + # mutation + sign = [0, 1, -1][np.random.randint(0, 3, 1)[0]] + Q_new[list_index] = A + sign * (A / 4) # was 100 before + + list_index += 1 + + self.generation += 1 + + # merge P and Q + for i in P_new.tolist(): + Q_new[list_index] = i.params + list_index += 1 + R_0 = Q_new + + self.final_call() From 566e23f3eb47802092ccf97a8057926eb5ebc8cd Mon Sep 17 00:00:00 2001 From: bees4ever Date: Fri, 2 Mar 2018 09:03:13 +0100 Subject: [PATCH 2/2] Add a test for NSGA-II and fix the fail of describe test --- .travis.yml | 2 +- spotpy/algorithms/nsgaii.py | 117 +++++++++++++++++++---------- spotpy/examples/tutorial_nsgaii.py | 92 +++++++++++++++++++++++ 3 files changed, 171 insertions(+), 40 deletions(-) create mode 100644 spotpy/examples/tutorial_nsgaii.py diff --git a/.travis.yml b/.travis.yml index e0d6b969..77e229ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ install: script: - pip uninstall spotpy -y - - py.test spotpy/unittests/test_* spotpy/examples/tutorial_rosenbrock.py --cov spotpy --cov-report term-missing -v + - py.test spotpy/unittests/test_* spotpy/examples/tutorial_rosenbrock.py spotpy/examples/tutorial_nsgaii.py --cov spotpy --cov-report term-missing -v after_success: - coveralls diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py index 0d29aea9..12b8fc4d 100644 --- a/spotpy/algorithms/nsgaii.py +++ b/spotpy/algorithms/nsgaii.py @@ -13,11 +13,6 @@ ''' import numpy as np -import pprint -import copy - -import matplotlib.pyplot as pl -from mpl_toolkits.mplot3d import Axes3D from spotpy.algorithms import _algorithm @@ -36,13 +31,51 @@ def __repr__(self): class NSGAII(_algorithm): + """ + Implements the "Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II + by Kalyanmoy Deb, Associate Member, IEEE, Amrit Pratap, Sameer Agarwal, and T. Meyarivan + + """ + def __init__(self, *args, **kwargs): + """ + Input + ---------- + spot_setup: class + model: function + Should be callable with a parameter combination of the parameter-function + and return an list of simulation results (as long as evaluation list) + parameter: function + When called, it should return a random parameter combination. Which can + be e.g. uniform or Gaussian + objectivefunction: function + Should return the objectivefunction for a given list of a model simulation and + observation. + evaluation: function + Should return the true values as return by the model. + + dbname: str + * Name of the database where parameter, objectivefunction value and simulation results will be saved. + + dbformat: str + * ram: fast suited for short sampling time. no file will be created and results are saved in an array. + * csv: A csv file will be created, which you can import afterwards. + + parallel: str + * seq: Sequentiel sampling (default): Normal iterations on one core of your cpu. + * mpi: Message Passing Interface: Parallel computing on cluster pcs (recommended for unix os). + + save_sim: boolean + * True: Simulation results will be saved + * False: Simulation results will not be saved + """ + super(NSGAII, self).__init__(*args, **kwargs) # self.all_objectives = [self.setup.__getattribute__(m) for m in dir(self.setup) if "objectivefunc" in m] self.param_len = len(self.get_parameters()) self.generation = 0 - self.length_of_objective_func = 2 # TODO make it better, do we need this + self.length_of_objective_func = 0 self.objfun_maxmin_list = None self.objfun_normalize_list= [] @@ -55,9 +88,11 @@ def fast_non_dominated_sort(self, P): F[1] = {} if self.objfun_maxmin_list is None: + self.length_of_objective_func = 0 # In the first iteration create this list to have it prepared for later use self.objfun_maxmin_list = [] for _ in self.objectivefunction(self.setup.evaluation(), self.setup.simulation(P[0])): + self.length_of_objective_func += 1 self.objfun_maxmin_list.append([]) param_generator = ((p, list(P[p])) for p in P) @@ -185,40 +220,44 @@ def sample(self, generations=2, paramsamp=20): F = self.fast_non_dominated_sort(R_0) print("GENERATION: " + str(self.generation) + " of " + str(generations)) - layer = 0 - fig = pl.figure() - - if self.length_of_objective_func == 2: - - for i in F: - if layer == 0: - l_color = "b" - else: - l_color = "#" - for _ in range(6): - l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][ - np.random.randint(16)] - for j in F[i]: - pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o') - layer += 1 - - pl.show() - - elif self.length_of_objective_func == 3: - - ax = fig.add_subplot(111, projection='3d') - - for i in F: - for j in F[i]: - ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1], - F[i][j].m_vals[2]) # , lay_col[layer] + 'o') - layer += 1 - - # ax.set_xlabel(m1_name) - # ax.set_ylabel(m2_name) - # ax.set_zlabel(m3_name) - pl.show() + # Debuggin Issue + # import matplotlib.pyplot as pl + # from mpl_toolkits.mplot3d import Axes3D + # layer = 0 + # fig = pl.figure() + # + # if self.length_of_objective_func == 2: + # + # for i in F: + # if layer == 0: + # l_color = "b" + # else: + # l_color = "#" + # for _ in range(6): + # l_color += ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"][ + # np.random.randint(16)] + # for j in F[i]: + # pl.plot(F[i][j].m_vals[0], F[i][j].m_vals[1], color=l_color, marker='o') + # layer += 1 + # + # pl.show() + # + # elif self.length_of_objective_func == 3: + # + # ax = fig.add_subplot(111, projection='3d') + # + # for i in F: + # for j in F[i]: + # ax.scatter(F[i][j].m_vals[0], F[i][j].m_vals[1], + # F[i][j].m_vals[2]) # , lay_col[layer] + 'o') + # layer += 1 + # + # # ax.set_xlabel(m1_name) + # # ax.set_ylabel(m2_name) + # # ax.set_zlabel(m3_name) + # + # pl.show() # Now sort again complete_sort_all_p = [] diff --git a/spotpy/examples/tutorial_nsgaii.py b/spotpy/examples/tutorial_nsgaii.py new file mode 100644 index 00000000..e3cb0f86 --- /dev/null +++ b/spotpy/examples/tutorial_nsgaii.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# coding: utf-8 + + +from __future__ import division, print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +try: + import spotpy +except ImportError: + import sys + + sys.path.append(".") + import spotpy + +import spotpy.algorithms +from spotpy.examples.hymod_python.hymod import hymod +import os +import unittest + + +class spot_setup(object): + def __init__(self): + # Transform [mm/day] into [l s-1], where 1.783 is the catchment area + self.Factor = 1.783 * 1000 * 1000 / (60 * 60 * 24) + # Load Observation data from file + self.PET, self.Precip = [], [] + self.date, self.trueObs = [], [] + self.owd = os.path.dirname(os.path.realpath(__file__)) + self.hymod_path = self.owd + os.sep + 'hymod_python' + climatefile = open(self.hymod_path + os.sep + 'hymod_input.csv', 'r') + headerline = climatefile.readline()[:-1] + + if ';' in headerline: + self.delimiter = ';' + else: + self.delimiter = ',' + self.header = headerline.split(self.delimiter) + for line in climatefile: + values = line.strip().split(self.delimiter) + self.date.append(str(values[0])) + self.Precip.append(float(values[1])) + self.PET.append(float(values[2])) + self.trueObs.append(float(values[3])) + + climatefile.close() + self.params = [spotpy.parameter.Uniform('cmax', low=1.0, high=500, optguess=412.33), + spotpy.parameter.Uniform('bexp', low=0.1, high=2.0, optguess=0.1725), + spotpy.parameter.Uniform('alpha', low=0.1, high=0.99, optguess=0.8127), + spotpy.parameter.Uniform('Ks', low=0.0, high=0.10, optguess=0.0404), + spotpy.parameter.Uniform('Kq', low=0.1, high=0.99, optguess=0.5592), + spotpy.parameter.Uniform('fake1', low=0.1, high=10, optguess=0.5592), + spotpy.parameter.Uniform('fake2', low=0.1, high=10, optguess=0.5592)] + + def parameters(self): + return spotpy.parameter.generate(self.params) + + def simulation(self, x): + data = hymod(self.Precip, self.PET, x[0], x[1], x[2], x[3], x[4]) + sim = [] + for val in data: + sim.append(val * self.Factor) + return sim[366:] + + def evaluation(self): + return self.trueObs[366:] + + def objectivefunction(self, simulation, evaluation, params=None): + # like = spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation,simulation) + # return -like + # like1 = spotpy.signatures.getSkewness(evaluation, simulation) + like1 = spotpy.objectivefunctions.agreementindex(evaluation, simulation) + like2 = spotpy.objectivefunctions.rmse(evaluation, simulation) + like3 = spotpy.signatures.getCoeffVariation(evaluation, simulation) + return [like1, like2, like3] + + + +class Test_NSGAII(unittest.TestCase): + def setUp(self): + self.sp_setup = spot_setup() + self.sampler = spotpy.algorithms.NSGAII(self.sp_setup, dbname='NSGA2', dbformat="csv") + + self.sampler.sample(generations=5, paramsamp=40) + + def test_sampler_output(self): + self.assertGreaterEqual(400, len(self.sampler.getdata())) + self.assertLessEqual(300, len(self.sampler.getdata())) +