-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyse.rb
289 lines (247 loc) · 8.26 KB
/
analyse.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/env ruby
require 'json'
require 'sqlite3'
require 'yaml'
#Look for configuration file, if not found give useful error message then exit
if File.exists?('RooProbeMail.conf')
# file found, continue
else
puts "The configuration file is missing."
puts "Copy the file 'RooProbeMail.conf.sample' to 'RooProbeMail.conf'"
puts "and remember to update it for your username/password"
puts "then please try again"
exit
end
#Read the configuration file
rooprobemail_conf = YAML::load_file('RooProbeMail.conf')
@sqlitedb = rooprobemail_conf["config"]["sqlitedb"]
@resultfile = rooprobemail_conf["config"]["results"]
#Look for database file, if not found give useful error message then exit
if File.exists?(@sqlitedb)
# database found, continue
else
puts @sqlitedb
puts "The database file is missing."
puts "You probably need to run 'get_envelopes_into_db.rb' first."
exit
end
# Defaults you can configure
db = SQLite3::Database.new rooprobemail_conf["config"]["sqlitedb"]
limit = 20
# Look for an existing results file. If it exists, ask if there
# should be a backup before it's regenerated
if File.file?(@resultfile)
puts "The results file already exists. What would you like to do?"
puts "[D] Delete the old results file "
puts "[R] Rename and keep or "
puts "[Q] Quit the script and do nothing (default)"
user_input = gets.chomp
case user_input
when "D","d"
File.delete(@resultfile)
puts 'Old results file deleted'
when "R","r"
File.rename(@resultfile, @resultfile + '.' + Time.now.iso8601 + '.bak')
puts 'Old results file renamed'
else
puts "Script exited without touching the existing results file"
exit
end
end
#Create the new results file ready to append results
@keepoutput = File.open( @resultfile,"w" )
# The defaults can be overriden during each run by the user.
print "How many results to return [default=#{limit}]?"
STDOUT.flush
user_input = gets.chomp
if user_input.empty?
#limit unchanged
else
limit = user_input.to_i
end
# XXX TODO XXX if a non integer is entered, it's treated as 0
# but it should be treated as use the default.
puts "you said #{limit} results"
puts "\n"
STDOUT.flush
# A class for the queries and result display
class Queries
def initialize(db,limit)
@db = db #SQLite3::Database.new "gmail_envelopes.db"
@limit = limit
end
def MailboxTotalSize
@var = "Total size of mailbox\n"
@var = @var + "bytes, MB\n"
@var = @var + "---------------------\n"
sql = "SELECT sum(mail_size) as totalbytes, sum(mail_size)/1024/1024 as MB
FROM emails"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}\n"
i = i+row[0]
end
@var = @var + "Total #{i/1024/1024} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def LargestEmails
@var = "largest emails\n"
@var = @var + "---------------------\n"
@var = @var + "Date, MB, sender, subject\n"
sql = "SELECT mail_date, mail_size/1024/1024 as MB, mail_from_mailbox||'@'||mail_from_host as mail_from, mail_subject, mail_size
FROM emails
ORDER BY mail_size DESC LIMIT #{@limit}"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}, #{row[3]}\n"
i = i+row[4]
end
@var = @var + "Total #{i/1024/1024} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def LargestOlderThan12MonthsEmails
@var = "largest emails more than 12 months old\n"
@var = @var + "---------------------\n"
@var = @var + "bytes, MB, sender, date, subject\n"
sql = "SELECT mail_size as totalbytes, mail_size/1024/1024 as MB, mail_from_mailbox||'@'||mail_from_host as mail_from, mail_date, mail_subject
FROM emails
WHERE mail_date < date('now','-12 months')
ORDER BY mail_size DESC LIMIT #{@limit}"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}, #{row[3]}, #{row[4]}\n"
i = i+row[0]
end
@var = @var + "Total #{i/1024/1024} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def EmailsWithBadDates
@var = "EmailsWithBadDates\n"
@var = @var + "---------------------\n"
@var = @var + "sender, date, subject\n"
sql = "SELECT mail_from_mailbox||'@'||mail_from_host as mail_from, mail_date, mail_subject
FROM emails
WHERE mail_date = date('1875-05-20')
ORDER BY mail_date DESC LIMIT #{@limit}"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}\n"
i = i+1
end
@var = @var + "Found #{i} emails\n"
@var = @var + "\n"
puts @var
return @var
end
def SenderLargestByTotalSize
@var = "largest sender by total mail size\n"
@var = @var + "---------------------\n"
@var = @var + "bytes, MB, sender\n"
sql = "SELECT sum(mail_size) as totalbytes, sum(mail_size)/1024/1024 as MB, mail_from_mailbox||'@'||mail_from_host as mail_from
FROM emails
GROUP BY mail_from ORDER BY sum(mail_size) DESC LIMIT #{@limit}"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}\n"
i = i+row[0]
end
@var = @var + "Total #{i/1024/1024} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def SenderLargestDomain
@var = "largest sender by domain\n"
@var = @var + "---------------------\n"
@var = @var + "bytes, MB, sender domain\n"
sql = "SELECT sum(mail_size) as totalbytes, sum(mail_size)/1024/1024 as MB, '@'||mail_from_host as mail_from_domain
FROM emails
GROUP BY mail_from_domain ORDER BY sum(mail_size) DESC LIMIT #{@limit}"
i=0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}\n"
i = i+row[0]
end
@var = @var + "Total #{i/1024/1024} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def SenderMostProlific
@var = "most prolific senders\n"
@var = @var + "count, size MB, sender\n"
@var = @var + "---------------------\n"
sql = "SELECT count(mail_from_mailbox||'@'||mail_from_host) as mail_count, sum(mail_size)/1024/1024 as MB, mail_from_mailbox||'@'||mail_from_host as mail_from
FROM emails
GROUP BY mail_from ORDER BY count(mail_from_mailbox||'@'||mail_from_host) DESC LIMIT #{@limit}"
i = 0
@db.execute(sql) do |row|
@var = @var + "#{row[0]}, #{row[1]}, #{row[2]}\n"
i = i+row[1]
end
@var = @var + "Total #{i} MB\n"
@var = @var + "\n"
puts @var
return @var
end
def SimilarTitles_count
@var = "Similar titles - highest number of emails\n"
@var = @var + "Emails where the subjects begin with the same 12 characters\n"
@var = @var + "count, subject begins, size(MB)\n"
@var = @var + "---------------------\n"
sql = "SELECT substr(mail_subject,1,12), count(substr(mail_subject,1,12)) as counted, sum(mail_size)/1024/1024 as MB
FROM emails
GROUP BY substr(mail_subject,1,12) ORDER BY counted DESC LIMIT #{@limit}"
@db.execute(sql) do |row|
@var = @var + "#{row[1]}, #{row[0]}, #{row[2]}\n"
end
@var = @var + "\n"
puts @var
return @var
end
def SimilarTitles_size
@var = "Similar titles - largest total size\n"
@var = @var + "Emails where the subjects begin with the same 12 characters\n"
@var = @var + "count, subject begins, size(MB)\n"
@var = @var + "---------------------\n"
sql = "SELECT substr(mail_subject,1,12), count(substr(mail_subject,1,12)) as counted, sum(mail_size)/1024/1024 as sizedmb
FROM emails
GROUP BY substr(mail_subject,1,12) ORDER BY sizedmb DESC LIMIT #{@limit}"
@db.execute(sql) do |row|
@var = @var + "#{row[1]}, #{row[0]}, #{row[2]}\n"
end
@var = @var + "\n"
puts @var
return @var
end
end
#db.close
# How do I close the DB? I can't seem to create the connection
# outside the Queries class and use it, and I don't know how
# to close it once the query class has finished.
#eamils with the same title (and less than X senders)
#emails with similar starting titles
# Show the results
# The function prints to screen as well as outputs to the file
q = Queries.new(db,limit)
@keepoutput << q.SenderMostProlific
@keepoutput << q.MailboxTotalSize
@keepoutput << q.SenderLargestByTotalSize
@keepoutput << q.SenderLargestDomain
@keepoutput << q.LargestEmails
@keepoutput << q.LargestOlderThan12MonthsEmails
@keepoutput << q.EmailsWithBadDates
@keepoutput << q.SimilarTitles_count
@keepoutput << q.SimilarTitles_size
#we should close the db connection
db.close
#we should close the results file too
@keepoutput.close
puts ""
puts "Analysis Complete"