-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstability_score.py
127 lines (97 loc) · 5.29 KB
/
stability_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import dml
import prov.model
import datetime
import uuid
class stability_score(dml.Algorithm):
contributor = 'agoncharova_lmckone'
reads = ['agoncharova_lmckone.boston_tract_counts']
writes = ['agoncharova_lmckone.stability_score']
@staticmethod
def execute(trial=False):
'''
Determines a 'housing stability' score for each census tract in Boston based on number of evictions, number of crimes,
income, and number of businesses.
Code adapted from github users jdbrawn, jliang24, slarbi, and tpotye
'''
startTime = datetime.datetime.now()
client = dml.pymongo.MongoClient()
repo = client.repo
repo.authenticate('agoncharova_lmckone', 'agoncharova_lmckone')
tract_counts = repo['agoncharova_lmckone.boston_tract_counts']
score = []
minCrimes = 99999
maxCrimes = 0
minEvictions = 99999
maxEvictions = 0
# find mins and maxes
for entry in tract_counts.find():
if float(entry['properties']['evictions']) < minEvictions:
minCrimes = entry['properties']['evictions']
if float(entry['properties']['evictions']) > maxEvictions:
maxCrimes = entry['properties']['evictions']
if float(entry['properties']['crimes']) < minCrimes:
minCrimes = entry['properties']['crimes']
if float(entry['properties']['crimes']) > maxCrimes:
maxCrimes = entry['properties']['crimes']
eviction_max_minus_min = float(maxEvictions - minEvictions)
crime_max_minus_min = float(maxCrimes - minCrimes)
count = 100 #
# calculate score
for entry in tract_counts.find():
evictionScore = float(entry['properties']['evictions'] - minEvictions) / eviction_max_minus_min
crimeScore = float(entry['properties']['crimes'] - minCrimes) / crime_max_minus_min
## take a look at coefficients used by Desmond in his paper to maybe weight these differently
stabilityScore = (evictionScore + crimeScore) / 2.0
# print("entry stabilityScore " + str(stabilityScore) + " crimes " + str(entry['properties']['crimes']) + " eviction " + str(entry['properties']['evictions']))
score.append({
'Tract': entry['properties']['GEOID'],
'stability': stabilityScore,
'evictionScore': evictionScore,
'crimeScore': crimeScore,
'businesses': entry['properties']['businesses']
})
if(trial):
if(count <= 0):
break
count += 1
repo.dropCollection('stability_score')
repo.createCollection('stability_score')
repo['agoncharova_lmckone.stability_score'].insert_many(score)
print("trial mode: " + str(trial))
print("inserted " + str(len(score)) + " stability score data points")
repo.logout()
endTime = datetime.datetime.now()
return {"start": startTime, "end": endTime}
@staticmethod
def provenance(doc=prov.model.ProvDocument(), startTime=None, endTime=None):
"""
Create the provenance document describing everything happening
in this script. Each run of the script will generate a new
document describing that invocation event.
"""
# Set up the database connection.
client = dml.pymongo.MongoClient()
repo = client.repo
repo.authenticate('agoncharova_lmckone', 'agoncharova_lmckone')
doc.add_namespace('alg', 'http://datamechanics.io/algorithm/') # The scripts are in <folder>#<filename> format.
doc.add_namespace('dat', 'http://datamechanics.io/data/') # The data sets are in <user>#<collection> format.
doc.add_namespace('ont',
'http://datamechanics.io/ontology#') # 'Extension', 'DataResource', 'DataSet', 'Retrieval', 'Query', or 'Computation'.
doc.add_namespace('log', 'http://datamechanics.io/log/') # The event log.
this_script = doc.agent('alg:agoncharova_lmckone#stability_score',
{prov.model.PROV_TYPE: prov.model.PROV['SoftwareAgent'], 'ont:Extension': 'py'})
resource_stability = doc.entity('dat:agoncharova_lmckone#boston_tract_counts',
{'prov:label': 'Stability Analysis by Census Tract',
prov.model.PROV_TYPE: 'ont:DataSet'})
get_stability_score = doc.activity('log:uuid' + str(uuid.uuid4()), startTime, endTime)
doc.wasAssociatedWith(get_stability_score, this_script)
doc.usage(get_stability_score, resource_stability, startTime, None, {prov.model.PROV_TYPE: 'ont:Computation'})
stability_score = doc.entity('dat:agoncharova_lmckone#stability_score',
{prov.model.PROV_LABEL: 'Stability Score', prov.model.PROV_TYPE: 'ont:DataSet'})
doc.wasAttributedTo(stability_score, this_script)
doc.wasGeneratedBy(stability_score, get_stability_score, endTime)
doc.wasDerivedFrom(stability_score, resource_stability, get_stability_score, get_stability_score, get_stability_score)
repo.logout()
return doc
# stability_score.execute()
# stability_score.provenance()