From d23d42570d040b1b83e4441758f23018c057eef5 Mon Sep 17 00:00:00 2001 From: tnigon Date: Fri, 28 Sep 2018 09:43:55 -0500 Subject: [PATCH] using real names for db col headings instead of simulation1, etc. --- spotpy/algorithms/_algorithm.py | 54 ++++++++++++++++++--------------- spotpy/database.py | 48 ++++++++++++++++++----------- 2 files changed, 60 insertions(+), 42 deletions(-) diff --git a/spotpy/algorithms/_algorithm.py b/spotpy/algorithms/_algorithm.py index 302ea0e0..48d20977 100644 --- a/spotpy/algorithms/_algorithm.py +++ b/spotpy/algorithms/_algorithm.py @@ -30,7 +30,7 @@ class _RunStatistic(object): """ - this class checks for each run if the objectivefunction got better and holds the + this class checks for each run if the objectivefunction got better and holds the best parameter set. Every _algorithm has an object of this class as status. Usage: @@ -46,7 +46,7 @@ def __init__(self): self.bestrep = 0 self.starttime = time.time() self.last_print = time.time() - + self.repetitions = None def __call__(self, rep, objectivefunction, params): @@ -73,12 +73,12 @@ def print_status(self): if acttime - self.last_print >= 2: avg_time_per_run = (acttime - self.starttime) / (self.rep + 1) timestr = time.strftime("%H:%M:%S", time.gmtime(round(avg_time_per_run * (self.repetitions - (self.rep + 1))))) - + text = '%i of %i (best like=%g) est. time remaining: %s' % (self.rep, self.repetitions, self.objectivefunction, timestr) print(text) self.last_print = time.time() - + def __repr__(self): return 'Best objectivefunction: %g' % self.objectivefunction @@ -90,24 +90,24 @@ class _algorithm(object): Input ---------- spot_setup: class - model: function - Should be callable with a parameter combination of the parameter-function + model: function + Should be callable with a parameter combination of the parameter-function and return an list of simulation results (as long as evaluation list) parameter: function - When called, it should return a random parameter combination. Which can + When called, it should return a random parameter combination. Which can be e.g. uniform or Gaussian - objectivefunction: function - Should return the objectivefunction for a given list of a model simulation and + objectivefunction: function + Should return the objectivefunction for a given list of a model simulation and observation. evaluation: function Should return the true values as return by the model. dbname: str - Name of the database where parameter, objectivefunction value and simulation + Name of the database where parameter, objectivefunction value and simulation results will be saved. dbformat: str ram: fast suited for short sampling time. no file will be created and results are saved in an array. - csv: A csv file will be created, which you can import afterwards. + csv: A csv file will be created, which you can import afterwards. parallel: str seq: Sequentiel sampling (default): Normal iterations on one core of your cpu. mpc: Multi processing: Iterations on all available cores on your (single) pc @@ -121,8 +121,8 @@ class _algorithm(object): alt_objfun: str or None, default: 'rmse' alternative objectivefunction to be used for algorithm * None: the objfun defined in spot_setup.objectivefunction is used - * any str: if str is found in spotpy.objectivefunctions, - this objectivefunction is used, else falls back to None + * any str: if str is found in spotpy.objectivefunctions, + this objectivefunction is used, else falls back to None e.g.: 'log_p', 'rmse', 'bias', 'kge' etc. sim_timeout: float, int or None, default: None the defined model given in the spot_setup class can be controlled to break after 'sim_timeout' seconds if @@ -130,6 +130,8 @@ class _algorithm(object): If the model run has been broken simlply '[nan]' will be returned. random_state: int or None, default: None the algorithms uses the number in random_state as seed for numpy. This way stochastic processes can be reproduced. + simnames: list or None, default: None + a list of strings denoting the names of outputs being simulated by the model. """ _unaccepted_parameter_types = (parameter.List, ) @@ -137,7 +139,8 @@ class _algorithm(object): def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True, dbappend=False, parallel='seq', save_sim=True, alt_objfun=None, breakpoint=None, backup_every_rep=100, save_threshold=-np.inf, - db_precision=np.float16, sim_timeout=None, random_state=None): + db_precision=np.float16, sim_timeout=None, random_state=None, + simnames=None): # Initialize the user defined setup class self.setup = spot_setup # Philipp: Changed from Tobi's version, now we are using both new class defined parameters @@ -155,7 +158,7 @@ def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True, for i, val in enumerate(self.all_params): if self.all_params[i] not in self.constant_positions: self.non_constant_positions.append(i) - else: + else: self.non_constant_positions = np.arange(0,len(self.all_params)) print(self.non_constant_positions) self.parameter = self.get_parameters @@ -171,6 +174,7 @@ def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True, objectivefunctions, alt_objfun or '', None) or self.setup.objectivefunction self.evaluation = self.setup.evaluation() self.save_sim = save_sim + self.simnames = simnames self.dbname = dbname or 'customDb' self.dbformat = dbformat or 'ram' self.db_precision = db_precision @@ -181,11 +185,11 @@ def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True, # 'dbappend' used to append to the existing data base, after restart self.dbinit = dbinit self.dbappend = dbappend - + # Set the random state if random_state is None: #ToDo: Have to discuss if these 3 lines are neccessary. random_state = np.random.randint(low=0, high=2**30) - np.random.seed(random_state) + np.random.seed(random_state) # If value is not None a timeout will set so that the simulation will break after sim_timeout seconds without return a value self.sim_timeout = sim_timeout @@ -275,8 +279,8 @@ def _init_database(self, like, randompar, simulations): self.datawriter = database.get_datawriter(self.dbformat, self.dbname, self.parnames, like, randompar, simulations, save_sim=self.save_sim, dbappend=self.dbappend, - dbinit=self.dbinit, db_precision=self.db_precision, - setup=self.setup) + dbinit=self.dbinit, simnames=self.simnames, + db_precision=self.db_precision, setup=self.setup) self.dbinit = False @@ -323,17 +327,17 @@ def postprocessing(self, rep, params, simulation, chains=1, save=True, negativli # before they actually save the run in a database (e.g. sce-ua) if save is True: if negativlike is True: - self.save(-like, params, simulations=simulation, chains=chains) + self.save(-like, params, simulations=simulation, chains=chains) self.status(rep, -like, params) else: self.save(like, params, simulations=simulation, chains=chains) self.status(rep, like, params) if type(like)==type([]): return like[0] - else: + else: return like - - + + def getfitness(self, simulation, params): """ Calls the user defined spot_setup objectivefunction @@ -343,9 +347,9 @@ def getfitness(self, simulation, params): return self.objectivefunction(evaluation=self.evaluation, simulation=simulation, params = (params,self.parnames)) except TypeError: # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction - #print('Not using parameters in fitness function') + #print('Not using parameters in fitness function') return self.objectivefunction(evaluation=self.evaluation, simulation=simulation) - + def simulate(self, id_params_tuple): """This is a simple wrapper of the model, returning the result together with the run id and the parameters. This is needed, because some parallel things diff --git a/spotpy/database.py b/spotpy/database.py index da174c95..5cd2eca4 100644 --- a/spotpy/database.py +++ b/spotpy/database.py @@ -30,7 +30,8 @@ class database(object): """ def __init__(self, dbname, parnames, like, randompar, simulations=None, - chains=1, save_sim=True, db_precision=np.float16, **kwargs): + chains=1, save_sim=True, simnames=None, + db_precision=np.float16, **kwargs): # Just needed for the first line in the database self.chains = chains self.dbname = dbname @@ -39,13 +40,14 @@ def __init__(self, dbname, parnames, like, randompar, simulations=None, self.simulations = simulations self.save_sim = save_sim self.db_precision = db_precision + self.simnames = simnames if not save_sim: simulations = None self.dim_dict = {} self.singular_data_lens = [self._check_dims(name, obj) for name, obj in [( 'like', like), ('par', randompar), ('simulation', simulations)]] - self._make_header(simulations,parnames) - + self._make_header(simulations,parnames,simnames) + self.last_flush = time.time() def _check_dims(self, name, obj): @@ -86,7 +88,7 @@ def _iterable_to_list(self, obj): def _array_to_list(self, obj): #print('array') - values = [] + values = [] for val in obj: values.append(val) return values @@ -94,7 +96,7 @@ def _array_to_list(self, obj): def _nestediterable_to_list(self, obj): #print('nested') - values = [] + values = [] for nestedlist in obj: #print(len(nestedlist)) for val in nestedlist: @@ -103,22 +105,34 @@ def _nestediterable_to_list(self, obj): return values #return np.array(obj).flatten().tolist() - def _make_header(self, simulations,parnames): + def _make_header(self, simulations, parnames, simnames=None): + ''' + Builds header for the database + should be a list of names of outputs being simulated by the + model + ''' + if simnames is not None and not isinstance(simnames, list): + simnames = None # Be sure simnames is either None or list self.header = [] self.header.extend(['like' + '_'.join(map(str, x)) for x in product(*self._tuple_2_xrange(self.singular_data_lens[0]))]) self.header.extend(['par{0}'.format(x) for x in parnames]) #print(self.singular_data_lens[2]) - #print(type(self.singular_data_lens[2])) + #print(type(self.singular_data_lens[2])) if self.save_sim: for i in range(len(simulations)): if type(simulations[0]) == type([]) or type(simulations[0]) == type(np.array([])): for j in range(len(simulations[i])): - self.header.extend(['simulation' + str(i+1)+'_'+str(j+1)]) + if simnames is None: + self.header.extend(['sim' + str(i+1)+'_'+str(j+1)]) + else: + self.header.extend(['sim' + str(simnames[i])+'_'+str(j+1)]) else: - self.header.extend(['simulation' + '_'+str(i)]) + if simnames is None: + self.header.extend(['sim' + '_'+str(i+1)]) + else: + self.header.extend(['sim' + str(simnames[i])]) #for x in product(*self._tuple_2_xrange(self.singular_data_lens[2]))]) - self.header.append('chain') def _tuple_2_xrange(self, t): @@ -146,7 +160,7 @@ def save(self, objectivefunction, parameterlist, simulations=None, def finalize(self): """ - Is called in a last step of every algorithm. + Is called in a last step of every algorithm. Forms the List of values into a strutured numpy array in order to have the same structure as a csv database. """ @@ -154,11 +168,11 @@ def finalize(self): 'formats': [np.float] * len(self.header)} i = 0 Y = np.zeros(len(self.ram), dtype=dt) - + for line in self.ram: Y[i] = line i+=1 - + self.data = Y def getdata(self): @@ -205,7 +219,7 @@ def save(self, objectivefunction, parameterlist, simulations=None, chains=1): coll = map(self.db_precision, coll) self.db.write( ','.join(map(str, coll)) + '\n') - + acttime = time.time() # Force writing to disc at least every two seconds if acttime - self.last_flush >= 2: @@ -253,7 +267,7 @@ def __init__(self, *args, **kwargs): # init base class super(sql, self).__init__(*args, **kwargs) # Create a open file, which needs to be closed after the sampling - try: + try: os.remove(self.dbname + '.db') except: pass @@ -297,12 +311,12 @@ def getdata(self): # Workaround for python2 headers = [(unicode(row[1]).encode("ascii"), unicode("