diff --git a/README.md b/README.md index 57ff420..386849d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Inverse modelling pyrolization kinetics with ensemble learning methods - scripts -These scripts can be used to reproduce the study described in the article "Inverse modelling pyrolization kinetics with ensemble learning methods"[[1]]. The scripts here are intended to be used with default configuration, if not noted otherwise. To apply this model to your own data, you can adopt the scripts shown in ./test_model/. +These scripts can be used to reproduce the study described in the article "Inverse modelling pyrolization kinetics with ensemble learning methods" [[1]]. The scripts here are intended to be used with default configuration, if not noted otherwise. To apply this model to your own data, you can adopt the scripts shown in ./test_model/. ## ./generate_db/ @@ -106,7 +106,7 @@ Corresponding $T$ is 20...550 °C with $\Delta T=2K$. Then, $t$ is $\frac{T}{\be ### `sm1.pickle` -The file holds a single python element of class `sklearn.ensemble.ExtraTreesClassifier`[[4]]. It is pre trained to estimate the number of components with single reactions represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. +The file holds a single python element of class `sklearn.ensemble.ExtraTreesClassifier` [[4]]. It is pre trained to estimate the number of components with single reactions represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. Using `sm1.predict(X)` expects 1064 features as input with following properties: @@ -125,7 +125,7 @@ Output is a single integer of 1,2 or 3 as the number of components in the materi ### `sm3_1r.pickle` -The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor`[[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of one component represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. +The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor` [[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of one component represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. Using `sm3_1r.predict(X)` expects 1067 features as input with following properties: @@ -146,7 +146,7 @@ Output is $log(A_1)$ and $E_1$. ### `sm3_2r.pickle` -The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor`[[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of two components represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. +The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor` [[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of two components represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. Using `sm3_2r.predict(X)` expects 1067 features as input with following properties: @@ -166,7 +166,7 @@ Output is $log(A_1)$, $log(A_2)$, $E_1$ and $E_2$. ### `sm3_3r.pickle` -The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor`[[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of three components represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. +The file holds a single python element of class `sklearn.ensemble.ExtraTreesRegressor` [[6]]. It is pre trained to estimate the reaction kinetic parameters of materials consisting of three components represented by input mass loss rate data. It can be loaded via pickle[[5]] into Python. Using `sm3_3r.predict(X)` expects 1067 features as input with following properties: @@ -205,5 +205,4 @@ This is the columns description of the output from `calculate.py`. The file has [3]: https://doi.org/10.5281/zenodo.6346476 [4]: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html [5]: https://docs.python.org/3/library/pickle.html -[6]: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesRegressor.html#sklearn.ensemble.ExtraTreesRegressor -[5]: aa \ No newline at end of file +[6]: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesRegressor.html#sklearn.ensemble.ExtraTreesRegressor \ No newline at end of file diff --git a/generate_db/generate_3c.py b/generate_db/generate_3c.py index ebc8533..5dec66a 100644 --- a/generate_db/generate_3c.py +++ b/generate_db/generate_3c.py @@ -53,7 +53,7 @@ def generate(dataset,nr): temp3,masslossrate3,massloss3=calculate_reaction(each[3],each[4],each[2],HR=30., dt=4,T0=Tstart, T1=Tend) each=each1.copy() temp4,masslossrate4,massloss4=calculate_reaction(each[3],each[4],each[2],HR=40., dt=3,T0=Tstart, T1=Tend) - feat.append((temp1,temp2,temp3,temp4,masslossrate1,masslossrate2,masslossrate3,masslossrate4,massloss1,massloss2,massloss3,massloss4)) + feat.append((temp1,temp2,temp3,temp4,masslossrate1,masslossrate2,masslossrate3,masslossrate4,massloss1,massloss2,massloss3,massloss4)) scale=len(temp1) feat=np.array(feat) print('{} done'.format(int(nr))) diff --git a/test_model/calculate.py b/test_model/calculate.py new file mode 100644 index 0000000..e40024d --- /dev/null +++ b/test_model/calculate.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# coding: utf-8 + +import time +import math +import numpy as np +import scipy +from scipy.signal import argrelextrema +from scipy.signal import peak_widths +from multiprocessing import Process +import multiprocessing +import pickle + +import warnings +warnings.filterwarnings('ignore') + +def _1gaussian(x_arr, peak1, cen1, sigma1): + x_arr=np.array(x_arr) + return peak1 * (1 / (sigma1 * (np.sqrt(2 * np.pi)))) * (np.exp((-1.0 / 2.0) * (((x_arr - cen1) / sigma1) ** 2))) + +def _2gaussian(x_arr, peak1, cen1, sigma1, peak2, cen2, sigma2): + return _1gaussian(x_arr, peak1, cen1, sigma1)+_1gaussian(x_arr, peak2, cen2, sigma2) + +def _3gaussian(x_arr, peak1, cen1, sigma1, peak2, cen2, sigma2, peak3, cen3, sigma3): + return _1gaussian(x_arr, peak1, cen1, sigma1)+_1gaussian(x_arr, peak2, cen2, sigma2)+_1gaussian(x_arr, peak3, cen3, sigma3) + +def _2components(frt, t): + x_array = list(range(20, 551, 2)) + y_array_2gauss = frt[t * 266:266 + t * 266] + try: + if len(argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0]) == 2: + a1, a2 = argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0] + b1, b2 = peak_widths(frt[t * 266:266 + t * 266], rel_height=.9999, peaks=[a1, a2])[0] + try: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 4 + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 4 + popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array_2gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2], + maxfev=500000, bounds=(0,np.inf)) + except: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 2 + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 2 + popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array_2gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2], + maxfev=500000, bounds=(0,np.inf)) + pars_1 = popt_2gauss[0:3] + pars_2 = popt_2gauss[3:6] + gauss_peak_1 = _1gaussian(x_array, *pars_1) + gauss_peak_2 = _1gaussian(x_array, *pars_2) + area1 = np.sqrt((np.trapz(gauss_peak_1, x_array)) ** 2) + area2 = np.sqrt((np.trapz(gauss_peak_2, x_array)) ** 2) + else: + a1 = argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0][0] + b1 = peak_widths(frt[t * 266:266 + t * 266], rel_height=.9999, peaks=[a1])[0][0] + 1 + try: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 4 + popt_1gauss, pcov_1gauss = scipy.optimize.curve_fit(_1gaussian, x_array, y_array_2gauss, + p0=[peak1, cen1, sigma1], maxfev=500000, bounds=(0,np.inf)) + except: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 2 + popt_1gauss, pcov_1gauss = scipy.optimize.curve_fit(_1gaussian, x_array, y_array_2gauss, + p0=[peak1, cen1, sigma1], maxfev=500000, bounds=(0,np.inf)) + gauss_peak_1 = _1gaussian(x_array, *popt_1gauss) + area1 = np.sqrt(np.trapz(gauss_peak_1, x_array) ** 2) + area2 = np.sqrt((np.trapz(y_array_2gauss, x_array) - area1) ** 2) + except: + return [0.5, 0.5] + return sorted([area1 / (area1 + area2), area2 / (area1 + area2)]) + + +def _3components(frt, t): + x_array = list(range(20, 551, 2)) + y_array_3gauss = frt[t * 266:266 + t * 266] + try: + if len(argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0]) == 3: + a1, a2, a3 = argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0] + b1, b2, b3 = peak_widths(frt[t * 266:266 + t * 266], rel_height=.9999, peaks=[a1, a2, a3])[0] + try: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 4 + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 4 + peak3, cen3, sigma3 = frt[t * 266 + a3], x_array[a3], b3 / 4 + popt_3gauss, pcov_3gauss = scipy.optimize.curve_fit(_3gaussian, x_array, y_array_3gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2, peak3, + cen3, sigma3], maxfev=500000, bounds=(0,np.inf)) + except: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 2 + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 2 + peak3, cen3, sigma3 = frt[t * 266 + a3], x_array[a3], b3 / 2 + popt_3gauss, pcov_3gauss = scipy.optimize.curve_fit(_3gaussian, x_array, y_array_3gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2, peak3, + cen3, sigma3], maxfev=500000, bounds=(0,np.inf)) + pars_1 = popt_3gauss[0:3] + pars_2 = popt_3gauss[3:6] + pars_3 = popt_3gauss[6:9] + gauss_peak_1 = _1gaussian(x_array, *pars_1) + gauss_peak_2 = _1gaussian(x_array, *pars_2) + gauss_peak_3 = _1gaussian(x_array, *pars_3) + area1 = np.sqrt((np.trapz(gauss_peak_1, x_array)) ** 2) + area2 = np.sqrt((np.trapz(gauss_peak_2, x_array)) ** 2) + area3 = np.sqrt((np.trapz(gauss_peak_3, x_array)) ** 2) + else: + a1, a2 = argrelextrema(frt[t * 266:266 + t * 266], np.greater)[0] + b1, b2 = peak_widths(frt[t * 266:266 + t * 266], rel_height=.9999, peaks=[a1, a2])[0] + try: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 4, + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 4 + popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array_3gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2], + maxfev=500000, bounds=(0,np.inf)) + except: + peak1, cen1, sigma1 = frt[t * 266 + a1], x_array[a1], b1 / 2, + peak2, cen2, sigma2 = frt[t * 266 + a2], x_array[a2], b2 / 2 + popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array_3gauss, + p0=[peak1, cen1, sigma1, peak2, cen2, sigma2], + maxfev=500000, bounds=(0,np.inf)) + pars_1 = popt_2gauss[0:3] + pars_2 = popt_2gauss[3:6] + gauss_peak_1 = _1gaussian(x_array, *pars_1) + gauss_peak_2 = _1gaussian(x_array, *pars_2) + area1 = np.sqrt((np.trapz(gauss_peak_1, x_array)) ** 2) + area2 = np.sqrt((np.trapz(gauss_peak_2, x_array)) ** 2) + area3 = np.sqrt((np.trapz(y_array_3gauss, x_array) - area1 - area2) ** 2) + except: + return [0.35, 0.35, 0.3] + return sorted([area1 / (area1 + area2 + area3), area2 / (area1 + area2 + area3), area3 / (area1 + area2 + area3)]) + + +def predictnumbers(ftz): + n = moda.predict([ftz]) + try: + if n == 1: + reslt = np.concatenate((modc1.predict([np.append(ftz, 0)])[0], [1, 0, 0])) + reslt = np.insert(reslt, [1,1,2,2], 0) + elif n == 2: + ex = np.zeros((4, 2)) + for jt, teach in enumerate(ex): + ex[jt] = np.sort(_2components(ftz, jt)) + reslt = np.concatenate( + (modc2.predict([np.concatenate((ftz, np.nanmean(ex, axis=0)))])[0], np.nanmean(ex, axis=0))) + reslt = np.insert(reslt, [2,4,6], 0) + elif n == 3: + ex = np.zeros((4, 3)) + for jt, teach in enumerate(ex): + ex[jt] = np.sort(_3components(ftz, jt)) + reslt = np.concatenate( + (modc3.predict([np.concatenate((ftz, np.nanmean(ex, axis=0)))])[0], np.nanmean(ex, axis=0))) + except: + reslt = np.zeros(9) + return reslt + + +def multipredict(datazet, nr): + datazet = np.array(datazet) + for ik, each in enumerate(datazet): + return_list[len(datazet) * nr + ik] = predictnumbers(each) + + +cores = 100 +samplesize = 150000 # NUMBER OF SAMPLES +samplesperset = 266 + +firstmodel1 = "../build_model/sm1.pickle" +thirdmodel1 = "../build_model/sm3_1r.pickle" +thirdmodel2 = "../build_model/sm3_2r.pickle" +thirdmodel3 = "../build_model/sm3_3r.pickle" + +print('Loading dataset') +labels = [] +features = [] +x_array = list(range(20, 551, 2)) +for i in [125, ]: + feat = "../generate_db/features6400k_5_10_30_40_1r_2ks_" + str(i) + ".csv" + lab = "../generate_db/labels6400k_5_10_30_40_1r_2ks_" + str(i) + ".csv" + + labels_temp = np.genfromtxt(lab, delimiter=',', dtype=np.float64)[:, 9:] + features_temp = np.genfromtxt(feat, delimiter=',', dtype=np.float64) + labels_temp[:, 0] = np.log(labels_temp[:, 0]) + features.append(features_temp) + labels.append(labels_temp) + +for i in [125, ]: + feat = "../generate_db/features6400k_5_10_30_40_2r_2ks_" + str(i) + ".csv" + lab = "../generate_db/labels6400k_5_10_30_40_2r_2ks_" + str(i) + ".csv" + + labels_temp = np.genfromtxt(lab, delimiter=',', dtype=np.float64)[:, 9:] + features_temp = np.genfromtxt(feat, delimiter=',', dtype=np.float64) + labels_temp[:, 0] = np.log(labels_temp[:, 0]) + labels_temp[:, 1] = np.log(labels_temp[:, 1]) + features.append(features_temp) + labels.append(labels_temp) + +for i in [125, ]: + feat = "../generate_db/features6400k_5_10_30_40_3r_2ks_" + str(i) + ".csv" + lab = "../generate_db/labels6400k_5_10_30_40_3r_2ks_" + str(i) + ".csv" + + labels_temp = np.genfromtxt(lab, delimiter=',', dtype=np.float64)[:, 9:] + features_temp = np.genfromtxt(feat, delimiter=',', dtype=np.float64) + labels_temp[:, 0] = np.log(labels_temp[:, 0]) + labels_temp[:, 1] = np.log(labels_temp[:, 1]) + labels_temp[:, 2] = np.log(labels_temp[:, 2]) + features.append(features_temp) + labels.append(labels_temp) + +features = np.array(features).reshape(samplesize, -1) +labels = np.array(labels).reshape(samplesize, -1) + +print('Dataset loaded') + +print('Loading models') +with open(thirdmodel2, 'rb') as file: + modc2 = pickle.load(file) +with open(thirdmodel3, 'rb') as file: + modc3 = pickle.load(file) +with open(firstmodel1, 'rb') as file: + moda = pickle.load(file) +with open(thirdmodel1, 'rb') as file: + modc1 = pickle.load(file) + +print('Models loaded') + +k = samplesize +eval_predictions = features[:k] + +starttime = time.time() +if __name__ == '__main__': + processes = [] + manager = multiprocessing.Manager() + return_list = manager.list([0] * k) + for n in range(cores): + p = Process(target=multipredict, + args=(eval_predictions[int((n * (k / cores))):int(((k / cores) + n * (k / cores)))], n,)) + processes.append(p) + for p in processes: + p.start() + for p in processes: + p.join() +endtime = time.time() +duration = endtime - starttime +print("Calculation Duration: {}s".format(duration)) +print(" ") +prediction = np.array(return_list) + +np.savetxt("prediction.csv", prediction, delimiter=",") + +print('Prediction finished') diff --git a/test_model/evaluate.py b/test_model/evaluate.py index fd57165..e717385 100644 --- a/test_model/evaluate.py +++ b/test_model/evaluate.py @@ -1,143 +1,112 @@ #!/usr/bin/env python # coding: utf-8 -import time -import math -import numpy as np -import scipy -from scipy.signal import argrelextrema -from scipy.signal import peak_widths -from multiprocessing import Process -from sklearn.ensemble import ExtraTreesRegressor -from sklearn.ensemble import ExtraTreesClassifier -import multiprocessing -import pickle -import os import time import math import numpy as np import matplotlib.pyplot as plt -from matplotlib import colors import pandas as pd -import scipy -import statistics -import csv -from random import randrange, uniform -from multiprocessing import Process -import multiprocessing -from scipy import interpolate -from scipy.spatial import ConvexHull -from matplotlib.image import NonUniformImage -from matplotlib.colors import LogNorm -from dot2tex import dot2tex -from sklearn.metrics import accuracy_score -from sklearn.metrics import precision_score -from sklearn.metrics import recall_score -from sklearn.metrics import confusion_matrix -from sklearn.datasets import make_classification -from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay -from sklearn.model_selection import train_test_split -from sklearn.svm import SVC -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.tree import export_graphviz -from subprocess import call -from IPython.display import Image -from scipy.signal import argrelextrema -from scipy.signal import peak_widths -from scipy.signal import find_peaks -from scipy.signal import find_peaks_cwt -from random import randrange, uniform -from scipy import interpolate -from scipy.spatial import ConvexHull from multiprocessing import Process import multiprocessing from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score -from sklearn.model_selection import KFold -from matplotlib.patches import Rectangle import itertools -def convert_tga_to_rk(rr,rt,hr,Y=1): - #[rr]=/s,[rt]=°C,[hr]=K/min, [E]=J/mol, [A]=1/s - rt=rt+273 - hr=hr/60 - E=(np.exp(1)*rr*8.3145*(rt**2))/hr - A=(np.exp(1)*rr*np.exp((E/(8.3145*rt)),dtype=np.float128)) - return(A,E) - -def reactionrate(A,E,y,T,n=1): - r=A*y**n*math.exp(-E/8.3145/T) - return(r) - -def calculate_reaction(A,E,Ys,T0=20,T1=550,HR=5.,dt=5): - Y=Ys.copy() - T0,T1=T0+273,T1+273 - x=np.zeros(((int((T1-T0)*(60./(HR*dt))+1),3))) - x[:,0]=np.linspace(T0,T1,int((T1-T0)*(60./(HR*dt))+1)) + +def convert_tga_to_rk(rr, rt, hr, Y=1): + # [rr]=/s,[rt]=°C,[hr]=K/min, [E]=J/mol, [A]=1/s + rt = rt + 273 + hr = hr / 60 + E = (np.exp(1) * rr * 8.3145 * (rt ** 2)) / hr + A = (np.exp(1) * rr * np.exp((E / (8.3145 * rt)), dtype=np.float128)) + return (A, E) + + +def reactionrate(A, E, y, T, n=1): + r = A * y ** n * math.exp(-E / 8.3145 / T) + return (r) + + +def calculate_reaction(A, E, Ys, T0=20, T1=550, HR=5., dt=5): + Y = Ys.copy() + T0, T1 = T0 + 273, T1 + 273 + x = np.zeros(((int((T1 - T0) * (60. / (HR * dt)) + 1), 3))) + x[:, 0] = np.linspace(T0, T1, int((T1 - T0) * (60. / (HR * dt)) + 1)) for i in x: - k=0 - for c,t in enumerate(Y): + k = 0 + for c, t in enumerate(Y): if Y[c] == 0: pass else: - r=reactionrate(A[c],E[c],Y[c],i[0]) - Y[c]=Y[c]-(r*dt) - k=k+r - if Y[c]<0: - r,Y[c]=0,0 - i[2]=sum(Y) - i[1]=k - x[:,0]=x[:,0]-273 - return(x.T) - - -def generate(datazet,nr): - datazet=np.array(datazet) - for ik,each in enumerate(datazet): - temp1,masslossrate1,massloss1=calculate_reaction(np.exp(each[0:reactions]),each[3:6],each[6:9],HR=5.,dt=24) - temp2,masslossrate2,massloss2=calculate_reaction(np.exp(each[0:reactions]),each[3:6],each[6:9],HR=10.,dt=12) - temp3,masslossrate3,massloss3=calculate_reaction(np.exp(each[0:reactions]),each[3:6],each[6:9],HR=30., dt=4) - temp4,masslossrate4,massloss4=calculate_reaction(np.exp(each[0:reactions]),each[3:6],each[6:9],HR=40., dt=3) - return_list[len(datazet)*nr+ik]=temp1,temp2,temp3,temp4,masslossrate1,masslossrate2,masslossrate3,masslossrate4,massloss1,massloss2,massloss3,massloss4 - - - -def generate_2(datazet,labels,nr): - datazet=np.array(datazet) - for ik,each in enumerate(datazet): - exampleset1=[] + r = reactionrate(A[c], E[c], Y[c], i[0]) + Y[c] = Y[c] - (r * dt) + k = k + r + if Y[c] < 0: + r, Y[c] = 0, 0 + i[2] = sum(Y) + i[1] = k + x[:, 0] = x[:, 0] - 273 + return (x.T) + + +def generate(datazet, nr): + datazet = np.array(datazet) + for ik, each in enumerate(datazet): + temp1, masslossrate1, massloss1 = calculate_reaction(np.exp(each[0:reactions]), each[3:6], each[6:9], HR=5., + dt=24) + temp2, masslossrate2, massloss2 = calculate_reaction(np.exp(each[0:reactions]), each[3:6], each[6:9], HR=10., + dt=12) + temp3, masslossrate3, massloss3 = calculate_reaction(np.exp(each[0:reactions]), each[3:6], each[6:9], HR=30., + dt=4) + temp4, masslossrate4, massloss4 = calculate_reaction(np.exp(each[0:reactions]), each[3:6], each[6:9], HR=40., + dt=3) + return_list[ + len(datazet) * nr + ik] = temp1, temp2, temp3, temp4, masslossrate1, masslossrate2, masslossrate3, masslossrate4, massloss1, massloss2, massloss3, massloss4 + + +def generate_2(datazet, labels, nr): + datazet = np.array(datazet) + for ik, each in enumerate(datazet): + exampleset1 = [] exampleset1.append(labels[ik]) - exampleset1=np.array(exampleset1) - exampleset1[:,0:3]=np.exp(exampleset1[:,0:3]) - feat1=generate_3(exampleset1,3) - exampleset3=[] + exampleset1 = np.array(exampleset1) + exampleset1[:, 0:3] = np.exp(exampleset1[:, 0:3]) + feat1 = generate_3(exampleset1, 3) + exampleset3 = [] for eacht in lista: exampleset3.append(each[eacht]) - exampleset3=np.array(exampleset3) - exampleset3[:,0:3][exampleset3[:,0:3]>0]=np.exp(exampleset3[:,0:3][exampleset3[:,0:3]>0]) - feat3=generate_3(exampleset3,3) - minim2=[] + exampleset3 = np.array(exampleset3) + exampleset3[:, 0:3][exampleset3[:, 0:3] > 0] = np.exp(exampleset3[:, 0:3][exampleset3[:, 0:3] > 0]) + feat3 = generate_3(exampleset3, 3) + minim2 = [] for eachs in feat3: - minim2.append(np.sqrt(mean_squared_error(np.array(feat1[0,4:8]).reshape(1,-1),np.array(eachs[4:8]).reshape(1,-1)))/(np.max(np.array(feat1[0,4:8]).reshape(1,-1))-np.min(np.array(feat1[0,4:8]).reshape(1,-1)))) - return_list[len(datazet)*nr+ik]=np.min(minim2) - exampleset3[:,0:3][exampleset3[:,0:3]>0]=np.log(exampleset3[:,0:3][exampleset3[:,0:3]>0]) - return_prediction[len(datazet)*nr+ik]=exampleset3[np.argsort(minim2)[0]] - - -def generate_3(dataset,nr): - dataset=np.array(dataset) - feat=[] - for ik,each in enumerate(np.copy(dataset)): - temp1,masslossrate1,massloss1=calculate_reaction(each[0:3],each[3:6],each[6:9],HR=5.,dt=24) - temp2,masslossrate2,massloss2=calculate_reaction(each[0:3],each[3:6],each[6:9],HR=10.,dt=12) - temp3,masslossrate3,massloss3=calculate_reaction(each[0:3],each[3:6],each[6:9],HR=30., dt=4) - temp4,masslossrate4,massloss4=calculate_reaction(each[0:3],each[3:6],each[6:9],HR=40., dt=3) - feat.append((temp1,temp2,temp3,temp4,masslossrate1,masslossrate2,masslossrate3,masslossrate4,massloss1,massloss2,massloss3,massloss4)) - scale=len(temp1) - feat=np.array(feat) + minim2.append(np.sqrt( + mean_squared_error(np.array(feat1[0, 4:8]).reshape(1, -1), np.array(eachs[4:8]).reshape(1, -1))) / ( + np.max(np.array(feat1[0, 4:8]).reshape(1, -1)) - np.min( + np.array(feat1[0, 4:8]).reshape(1, -1)))) + return_list[len(datazet) * nr + ik] = np.min(minim2) + exampleset3[:, 0:3][exampleset3[:, 0:3] > 0] = np.log(exampleset3[:, 0:3][exampleset3[:, 0:3] > 0]) + return_prediction[len(datazet) * nr + ik] = exampleset3[np.argsort(minim2)[0]] + + +def generate_3(dataset, nr): + dataset = np.array(dataset) + feat = [] + for ik, each in enumerate(np.copy(dataset)): + temp1, masslossrate1, massloss1 = calculate_reaction(each[0:3], each[3:6], each[6:9], HR=5., dt=24) + temp2, masslossrate2, massloss2 = calculate_reaction(each[0:3], each[3:6], each[6:9], HR=10., dt=12) + temp3, masslossrate3, massloss3 = calculate_reaction(each[0:3], each[3:6], each[6:9], HR=30., dt=4) + temp4, masslossrate4, massloss4 = calculate_reaction(each[0:3], each[3:6], each[6:9], HR=40., dt=3) + feat.append((temp1, temp2, temp3, temp4, masslossrate1, masslossrate2, masslossrate3, masslossrate4, massloss1, + massloss2, massloss3, massloss4)) + scale = len(temp1) + feat = np.array(feat) return feat +samplesize = 150000 # NUMBER OF SAMPLES +samplesperset = 266 + print('Loading Dataset....') labels = [] features = [] @@ -178,202 +147,200 @@ def generate_3(dataset,nr): features = np.array(features).reshape(samplesize, -1) labels = np.array(labels).reshape(samplesize, -1) -predictions=np.genfromtxt('prediction.csv', delimiter=',', dtype=np.float64) +predictions = np.genfromtxt('prediction.csv', delimiter=',', dtype=np.float64) print('Dataset loaded....') -sort_prs=np.fliplr(np.argsort(labels[:,6:9])) -sort_prs=np.concatenate((sort_prs,sort_prs+3,sort_prs+6),axis=1) -sort_pre=np.fliplr(np.argsort(predictions[:,6:9])) -sort_pre=np.concatenate((sort_pre,sort_pre+3,sort_pre+6),axis=1) -complete_prediction=np.take_along_axis(predictions, sort_pre,axis=1) -complete_labels=np.take_along_axis(labels, sort_prs, axis=1) +sort_prs = np.fliplr(np.argsort(labels[:, 6:9])) +sort_prs = np.concatenate((sort_prs, sort_prs + 3, sort_prs + 6), axis=1) +sort_pre = np.fliplr(np.argsort(predictions[:, 6:9])) +sort_pre = np.concatenate((sort_pre, sort_pre + 3, sort_pre + 6), axis=1) +complete_prediction = np.take_along_axis(predictions, sort_pre, axis=1) +complete_labels = np.take_along_axis(labels, sort_prs, axis=1) #################### # Evaluation Table # #################### -n1=[] -n2=[] -n3=[] -alln=[] +n1 = [] +n2 = [] +n3 = [] +alln = [] -n1.append(round(r2_score(complete_labels[0:50000,0],complete_prediction[0:50000,0]),2)) -n2.append(r2_score(complete_labels[50000:100000,0:2],complete_prediction[50000:100000,0:2])) -n3.append(r2_score(complete_labels[100000:150000,0:3],complete_prediction[100000:150000,0:3])) +n1.append(round(r2_score(complete_labels[0:50000, 0], complete_prediction[0:50000, 0]), 2)) +n2.append(r2_score(complete_labels[50000:100000, 0:2], complete_prediction[50000:100000, 0:2])) +n3.append(r2_score(complete_labels[100000:150000, 0:3], complete_prediction[100000:150000, 0:3])) -n1.append(round(r2_score(complete_labels[0:50000,3],complete_prediction[0:50000,3]),2)) -n2.append(r2_score(complete_labels[50000:100000,3:5],complete_prediction[50000:100000,3:5])) -n3.append(r2_score(complete_labels[100000:150000,3:6],complete_prediction[100000:150000,3:6])) +n1.append(round(r2_score(complete_labels[0:50000, 3], complete_prediction[0:50000, 3]), 2)) +n2.append(r2_score(complete_labels[50000:100000, 3:5], complete_prediction[50000:100000, 3:5])) +n3.append(r2_score(complete_labels[100000:150000, 3:6], complete_prediction[100000:150000, 3:6])) n1.append('-') -n2.append(r2_score((complete_labels[50000:100000,6:8]),(complete_prediction[50000:100000,6:8]))) -n3.append(r2_score((complete_labels[100000:150000,6:9]),(complete_prediction[100000:150000,6:9]))) - -n1.append(round(r2_score(complete_labels[0:50000,[0,3]],complete_prediction[0:50000,[0,3]]),2)) -n2.append(r2_score(complete_labels[50000:100000,[0,1,3,4,6,7]],complete_prediction[50000:100000,[0,1,3,4,6,7]])) -n3.append(r2_score(complete_labels[100000:150000],complete_prediction[100000:150000])) - -alln.append(r2_score(complete_labels[0:150000,0:3],complete_prediction[0:150000,0:3])) -alln.append(r2_score(complete_labels[0:150000,3:6],complete_prediction[0:150000,3:6])) -alln.append(r2_score(complete_labels[0:150000,6:9],complete_prediction[0:150000,6:9])) -alln.append(r2_score(complete_labels[0:150000],complete_prediction[0:150000])) - -complete_labels[(complete_labels[0:150000]>0) & (complete_prediction[0:150000] > 0)].shape +n2.append(r2_score((complete_labels[50000:100000, 6:8]), (complete_prediction[50000:100000, 6:8]))) +n3.append(r2_score((complete_labels[100000:150000, 6:9]), (complete_prediction[100000:150000, 6:9]))) + +n1.append(round(r2_score(complete_labels[0:50000, [0, 3]], complete_prediction[0:50000, [0, 3]]), 2)) +n2.append( + r2_score(complete_labels[50000:100000, [0, 1, 3, 4, 6, 7]], complete_prediction[50000:100000, [0, 1, 3, 4, 6, 7]])) +n3.append(r2_score(complete_labels[100000:150000], complete_prediction[100000:150000])) + +alln.append(r2_score(complete_labels[0:150000, 0:3], complete_prediction[0:150000, 0:3])) +alln.append(r2_score(complete_labels[0:150000, 3:6], complete_prediction[0:150000, 3:6])) +alln.append(r2_score(complete_labels[0:150000, 6:9], complete_prediction[0:150000, 6:9])) +alln.append(r2_score(complete_labels[0:150000], complete_prediction[0:150000])) + +data = {'$n=1$': n1, + '$n=2$': n2, + '$n=3$': n3, + 'all $n$': alln} + +df = pd.DataFrame(data, index=["$A_i$", "$E_i$", "$Y_i$", "total"]) +df.index.name = 'Parameter' +print((df.round(2)).to_latex(escape=False)) -data={'$n=1$':n1, - '$n=2$':n2, - '$n=3$':n3, - 'all $n$':alln} +k = len(complete_prediction) +reactions = 3 +corez = 100 -df=pd.DataFrame(data,index=["$A_i$","$E_i$","$Y_i$","total"]) -df.index.name='Parameter' -print((df.round(2)).to_latex(escape=False)) -textfile = open("tab:evaluation.tex", "w") -a = textfile.write((df.round(2)).to_latex(escape=False)) -textfile.close() - -k=len(complete_prediction) -reactions=3 -corez=100 - -starttime=time.time() +starttime = time.time() if __name__ == '__main__': processes = [] - manager=multiprocessing.Manager() - return_list=manager.list([0]*k) + manager = multiprocessing.Manager() + return_list = manager.list([0] * k) for n in range(corez): - p=Process(target=generate, args=(complete_prediction[int((n*(k/corez))):int(((k/corez)+n*(k/corez)))],n,)) + p = Process(target=generate, + args=(complete_prediction[int((n * (k / corez))):int(((k / corez) + n * (k / corez)))], n,)) processes.append(p) for p in processes: p.start() for p in processes: p.join() -endtime=time.time() -duration=endtime-starttime -pred_feat=np.array(return_list) -prediction_features=(np.copy(pred_feat[:,4:8])).reshape(k,266*4) -dataset=[] - -for it,each2 in enumerate(np.copy(features)): - dataset.append(np.sqrt(mean_squared_error(features[it],prediction_features[it]))/(np.max(features[it])-np.min(features[it]))) -complete_histogram=np.array(dataset) - -r2_histogram=[] -k=50000 -reactions=2 -corez=200 -r2_labels=complete_labels[50000:50000+k] -r2_prediction=complete_prediction[50000:50000+k] - -a1=list(itertools.permutations([0,1])) -a2=list(itertools.permutations([3,4])) -a3=list(itertools.permutations([6,7])) - -listb=np.array(list(itertools.product(a1,a2,a3))).reshape(8,-1) -lista=np.zeros((8,9)).astype(int) - -lista[:,0:2]=listb[:,0:2] -lista[:,2]=2 -lista[:,3:5]=listb[:,2:4] -lista[:,5]=5 -lista[:,6:8]=listb[:,4:6] -lista[:,8]=8 -print(lista) - -starttime=time.time() +endtime = time.time() +duration = endtime - starttime +pred_feat = np.array(return_list) +prediction_features = (np.copy(pred_feat[:, 4:8])).reshape(k, 266 * 4) +dataset = [] + +for it, each2 in enumerate(np.copy(features)): + dataset.append(np.sqrt(mean_squared_error(features[it], prediction_features[it])) / ( + np.max(features[it]) - np.min(features[it]))) +complete_histogram = np.array(dataset) + +r2_histogram = [] +k = 50000 +reactions = 2 +corez = 200 +r2_labels = complete_labels[50000:50000 + k] +r2_prediction = complete_prediction[50000:50000 + k] + +a1 = list(itertools.permutations([0, 1])) +a2 = list(itertools.permutations([3, 4])) +a3 = list(itertools.permutations([6, 7])) + +listb = np.array(list(itertools.product(a1, a2, a3))).reshape(8, -1) +lista = np.zeros((8, 9)).astype(int) + +lista[:, 0:2] = listb[:, 0:2] +lista[:, 2] = 2 +lista[:, 3:5] = listb[:, 2:4] +lista[:, 5] = 5 +lista[:, 6:8] = listb[:, 4:6] +lista[:, 8] = 8 + +starttime = time.time() if __name__ == '__main__': processes = [] - manager=multiprocessing.Manager() - return_list=manager.list([0]*k) - return_prediction=manager.list([0]*k) + manager = multiprocessing.Manager() + return_list = manager.list([0] * k) + return_prediction = manager.list([0] * k) for n in range(corez): - p=Process(target=generate_2, args=(r2_prediction[int((n*(k/corez))):int(((k/corez)+n*(k/corez)))],r2_labels[int((n*(k/corez))):int(((k/corez)+n*(k/corez)))],n,)) + p = Process(target=generate_2, args=(r2_prediction[int((n * (k / corez))):int(((k / corez) + n * (k / corez)))], + r2_labels[int((n * (k / corez))):int(((k / corez) + n * (k / corez)))], + n,)) processes.append(p) for p in processes: p.start() for p in processes: p.join() -endtime=time.time() -duration=endtime-starttime -print(duration) +endtime = time.time() +duration = endtime - starttime -r2_histogram=np.array(return_list) -r2_results=np.array(return_prediction) +r2_histogram = np.array(return_list) +r2_results = np.array(return_prediction) -a1=list(itertools.permutations([0,1,2])) -a2=list(itertools.permutations([3,4,5])) -a3=list(itertools.permutations([6,7,8])) +a1 = list(itertools.permutations([0, 1, 2])) +a2 = list(itertools.permutations([3, 4, 5])) +a3 = list(itertools.permutations([6, 7, 8])) -lista=np.array(list(itertools.product(a1,a2,a3))).reshape(216,-1) -print(lista) +lista = np.array(list(itertools.product(a1, a2, a3))).reshape(216, -1) -r3_histogram=[] -k=50000 -reactions=3 -corez=200 +r3_histogram = [] +k = 50000 +reactions = 3 +corez = 200 -r3_labels=complete_labels[100000:100000+k] -r3_prediction=complete_prediction[100000:100000+k] +r3_labels = complete_labels[100000:100000 + k] +r3_prediction = complete_prediction[100000:100000 + k] -starttime=time.time() +starttime = time.time() if __name__ == '__main__': processes = [] - manager=multiprocessing.Manager() - return_list=manager.list([0]*k) - return_prediction=manager.list([0]*k) + manager = multiprocessing.Manager() + return_list = manager.list([0] * k) + return_prediction = manager.list([0] * k) for n in range(corez): - p=Process(target=generate_2, args=(r3_prediction[int((n*(k/corez))):int(((k/corez)+n*(k/corez)))],r3_labels[int((n*(k/corez))):int(((k/corez)+n*(k/corez)))],n,)) + p = Process(target=generate_2, args=(r3_prediction[int((n * (k / corez))):int(((k / corez) + n * (k / corez)))], + r3_labels[int((n * (k / corez))):int(((k / corez) + n * (k / corez)))], + n,)) processes.append(p) for p in processes: p.start() for p in processes: p.join() -endtime=time.time() -duration=endtime-starttime -print(duration) +endtime = time.time() +duration = endtime - starttime -r3_histogram=np.array(return_list) -r3_results=np.array(return_prediction) +r3_histogram = np.array(return_list) +r3_results = np.array(return_prediction) -complete_histogram2=np.concatenate((complete_histogram[0:50000],r2_histogram,r3_histogram)) +complete_histogram2 = np.concatenate((complete_histogram[0:50000], r2_histogram, r3_histogram)) -#print(cfh) scaling = 0.88 fontsize = 13 plt.rcParams.update({'font.size': fontsize}) plt.rcParams.update({'font.family': 'serif'}) -fig = plt.figure(figsize=(7*scaling, - 5*scaling)) +fig = plt.figure(figsize=(7 * scaling, + 5 * scaling)) ax0 = plt.subplot() -weights = np.ones_like(complete_histogram2[0:50000])/float(3*len(complete_histogram2[0:50000])) -weights = (weights,weights,weights) +weights = np.ones_like(complete_histogram2[0:50000]) / float(3 * len(complete_histogram2[0:50000])) +weights = (weights, weights, weights) ax0.grid() -n, bins, patches=ax0.hist((complete_histogram[0:50000],complete_histogram[50000:100000],complete_histogram[100000:150000]),bins=100, - range=(0,0.25), weights=weights, color=("tab:green","tab:orange","tab:red"),stacked=True,histtype="bar", - label=("$n=1$","$n=2$","$n=3$")) - +n, bins, patches = ax0.hist( + (complete_histogram2[0:50000], complete_histogram2[50000:100000], complete_histogram2[100000:150000]), bins=100, + range=(0, 0.25), weights=weights, color=("tab:green", "tab:orange", "tab:red"), stacked=True, histtype="bar", + label=("$n=1$", "$n=2$", "$n=3$")) -ax1=ax0.twinx() -weights = np.ones_like(complete_histogram2)/float(len(complete_histogram2)) -n, bins, patches=ax1.hist(complete_histogram2,bins=1000, range=(0,0.251), weights=weights,cumulative=True,color="tab:blue",histtype='step') +ax1 = ax0.twinx() +weights = np.ones_like(complete_histogram2) / float(len(complete_histogram2)) +n, bins, patches = ax1.hist(complete_histogram2, bins=1000, range=(0, 0.251), weights=weights, cumulative=True, + color="tab:blue", histtype='step') ax0.set_xlabel("Normalized RMSE") -#plt.title("Histogram - Predicting Reaction Kinetics") +# plt.title("Histogram - Predicting Reaction Kinetics") ax0.set_ylabel("Relative Frequency") ax1.set_ylabel("Cumulative relative frequency") ax0.set_xlim([0, 0.25]) ax0.set_ylim([0, 0.5]) ax1.set_ylim([0, 1.1]) -ax0.set_yscale('symlog',linthreshy=0.01) +ax0.set_yscale('symlog', linthreshy=0.01) -#fig.legend() +# fig.legend() fig.patch.set_facecolor('white') plt.tight_layout() plt.savefig("cfh.png", dpi=320);