forked from forrestbao/mflux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclp.py
152 lines (124 loc) · 6.19 KB
/
clp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyleft 2016 Forrest Sheng Bao
# Released under GNU Affero General Public License Version 3.0
# See http://www.gnu.org/licenses/agpl-3.0.en.html
#
# For updated version, check our Git repository at
# https://bitbucket.org/forrestbao/mflux
# or our website
# http://mflux.org
#
# Cite this work: Wu et al., Rapid prediction of bacterial heterotrophic
# fluxomics using machine learning and constraint programming,
# PLoS Computational Biology, 2016, DOI: 10.1371/journal.pcbi.1004838
#
# This file contains functions for constraint programming in MFlux
def process_species_db(File):
"""Load the species database which is Suppliment Information I
Format
========
Fields separated by tab
Species Spcies name Oxygen condition Substrate uptake rate upper bound (mmol/gDW*mol ) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 Growth rate upper bound (h-1) Reference
1 Escherichia coli 1,3,2 20 Y Y Y Y Y Y Y Y Y Y Y Y Y N 1.2 1
2 Corynebacterium glutamicum 1,3,2 40 Y Y Y Y Y Y Y Y N Y N Y Y N 1 2
Returns
========
DB: A list of tuples. Each tuple is (species, Oxygen, rate, Carbon1, Carbon 2, ..., Carbon 14, Growth_rate_upper)
Oxygen itself is a string, e.g., "1,2,3"
"""
Carbon_sub = {"Y":True, "N":False}
DB = []
with open(File, "r") as F:
F.readline() # skip the header
for Line in F:
Field = Line.split("\t")
# print Field
[Species, Substrate_rate] = list(map(int, [Field[0], Field[3]]))
Oxygen = Field[2] #map(int, Field[2].split(","))
Carbon_src = [ Carbon_sub.get(x, False) for x in Field[4:4+13+1] ]
Growth_rate_upper = Field[4+14]
DB.append(tuple([Species, Oxygen, Substrate_rate]+ Carbon_src + [Growth_rate_upper]))
return DB
def species_db_to_constraints(DB, Debug=False):
"""Turn the species DB into a CSP problem (constraints only, no variable ranges)
Parameters
=============
DB: list of tuples
Each tuple is (species, Oxygen, rate, Carbon1, Carbon 2, ..., Carbon 14)
Oxygen itself is a tuple, e.g., (1,3)
Returns
=========
problem: an instance of python-constraint
containing only constraints but no variable domains
Notes
========
the problem has a solution if any of the rules set in species database is VIOLATED.
In other words, if the problem has solution, then the input does NOT make sense.
"""
import constraint # python-constraint module
problem = constraint.Problem() # initialize the CSP problem
# create variables
# problem.addVariable("Species", range(1,41+1))
# problem.addVariable("Substrate_rate", range(0, 100+1))
# problem.addVariable("Oxygen", [1,2,3])
# for i in xrange(1, 14+1):
# problem.addVariable("Carbon"+str(i), [True, False]) # create one variable for each carbon source
# This part should be from user input
# add constraints, where each entry in DB is a constraint.
# create the lambda functions
All_vars= ["Species", "Substrate_rate", "Oxygen"] + ["Carbon"+str(i) for i in range(1, 14+1)] + ["Growth_rate_upper"]
for Entry in DB:
Oxygen_values = Entry[1] # as string
Foo = "lambda "
Foo += ", ".join(All_vars) # done with listing all variables
Foo += ": "
Logic_exp = ["Substrate_rate<=" + str(Entry[2]), "Species==" + str(Entry[0]), "Growth_rate_upper<=" + str(Entry[4+13])]
for i in range(3, 3+14): # carbon sources
if not Entry[i]: # only use false ones to create the constraint
Logic_exp.append( ( All_vars[i] + "==" + str(Entry[i]) ) )
Logic_exp.append( ( "Oxygen in [" + Oxygen_values + "]" ) )
Logic_exp = " and ".join(Logic_exp)
Logic_exp = "not (" + Logic_exp + ")" # De Morgan's Law
if Debug:
print(Logic_exp)
problem.addConstraint(eval(Foo + Logic_exp), tuple(All_vars))
return problem # just return one solution, if no solution, return is NoneType
def input_ok(problem, Vector):
"""Turn user inputs into domains of variables for the CSP problem and then solve.
Parameters
============
problem: a python-constraint instance with constraints built
Vector: the feature vector, float numbers, [Species, Reactor, Nutrient, Oxygen, Method, MFA, Energy, Growth_rate, Substrate_uptake_rate] + ratio of 14 carbon sources in the order: "glucose", "fructose", "galactose", "gluconate", "glutamate", "citrate", "xylose", "succinate", "malate", "lactate", "pyruvate", "glycerol", "acetate", "NaHCO3"
Notes
========
In current formulation, the problem has a solution if any of the rules set in species database is VIOLATED.
In other words, if the problem has solution, then the input does NOT make sense.
Example
=========
>>> import clp
>>> DB = clp.process_species_db("SI_1_species_db.csv")
>>> P = clp.species_db_to_constraints(DB)
>>> Vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.72, 10.47, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0]
>>> print clp.input_ok(P, Vector)
True
>>> P.reset() # another test, violating the carbon source it takes
>>> P = clp.species_db_to_constraints(DB)
>>> Vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.72, 17, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.0]
>>> print clp.input_ok(P, Vector)
False
>>> P.reset() # another test, violating growth rate upper boundary
>>> P = clp.species_db_to_constraints(DB)
>>> Vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.72, 10.47, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0]
>>> print clp.input_ok(P, Vector)
False
"""
problem.addVariable("Species", [Vector[0]])
problem.addVariable("Substrate_rate", [Vector[8]])
problem.addVariable("Oxygen", [Vector[3]])
problem.addVariable("Growth_rate_upper", [Vector[7]])
for i in range(1, 14+1):
problem.addVariable("Carbon"+str(i), [True if Vector[i+8]>0 else False]) # create one variable for each carbon source
Solutions = problem.getSolution()
if Solutions == None:# no a single solution, pass test
return True
else:
return False