-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreducer_week_by_edit_age.py
52 lines (41 loc) · 1.37 KB
/
reducer_week_by_edit_age.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
import sys
from sets import Set
import math
'''
@author : susan_biancani
outputs tab delimited file: week, timestamp of first edit, time of each edit
'''
def stDev(values, mean):
n = len(values)
return math.sqrt(sum((x-mean)**2 for x in values) / n)
# Schema defined at :
#https://github.com/whym/RevDiffSearch/blob/master/README.rst
get = {'curr_week':0, 'page':1, 'first_week':2}
def processAndOutput(week, firstWeeks):
if len(firstWeeks) == 0: #Only one line in the data for that week
avgWeek = float(week)
stDevWeeks = 0.0
else:
avgWeek = float(sum(firstWeeks))/len(firstWeeks)
stDevWeeks = stDev(firstWeeks, avgWeek)
output = [ str(week), str(avgWeek), str(stDevWeeks) ]
print '\t'.join(output)
firstWeeks = []
lastWeek = -1
for line in sys.stdin :
line = line.strip('\n').split('\t')
# gather article statistics and output
#week = line[get['curr_week']+1]
week = line[get['curr_week']]
if lastWeek != week:
if lastWeek < 0:
lastWeek = week
else:
processAndOutput(lastWeek, firstWeeks)
lastWeek = week
firsttWeeks = []
# collect edit time for a revision
firstWeeks.append(int(line[get['first_week']] ))
#firstWeeks.append(int(line[get['first_week']+1] ))
processAndOutput(lastWeek, firstWeeks)