-
Notifications
You must be signed in to change notification settings - Fork 0
/
sites.rb
71 lines (61 loc) · 1.72 KB
/
sites.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
require 'net/http'
require 'uri'
require 'json'
module Sites
class Site
def initialize(url_template, tokens, range)
@url_template = url_template
@tokens = tokens
@range = range
end
def getCompaniesFromPage(url_text)
url = URI.parse(url_text)
resource = Net::HTTP.new(url.host, url.port)
headers, data = resource.get(url.to_s)
pattern = /#{@tokens['id']}.*?#{@tokens['before']}(.*?)#{@tokens['after']}/x
encoding = headers['content-type'].split('=')[1]
companies = Array.new
data.scan(pattern) {
companies.push($1.strip.encode!("UTF-8", encoding))
}
return companies
end
def getAllCompanies
companies = Array.new
@range.each do |page|
getCompaniesFromPage(sprintf(@url_template, page)).each { |c|
companies.push(c)
}
end
return companies
end
def getStatsHash
stats = Hash.new(0)
getAllCompanies.each do |c|
stats["#{c}"] += 1
end
return stats
end
def getStatsJSON
return getStatsHash.to_json
end
end
erabota_tokens = {
'id' => 'src="\/img\/icons\/icon-link\.gif"',
'before' => 'a>',
'after' => ', '
}
Erabota = Site.new("http://nsk.erabota.ru/job/it/?page=%d", erabota_tokens, (1..17))
ngs_tokens = {
'id' => 'company-name.*?_blank',
'before' => '>',
'after' => '<\/a'
}
Ngs = Site.new("http://rabota.ngs.ru/vacancy?page=%d&search_key=n8gohq&limit=25&order_by[]=orderby_date&order_dir[]=desc", ngs_tokens, (1..35))
hh_tokens = {
'id' => 'class="searchresult__placetime">',
'before' => '>',
'after' => '<'
}
Hh = Site.new("http://novosibirsk.hh.ru/applicant/searchvacancyresult.xml?orderBy=2&itemsOnPage=20&areaId=4&professionalAreaId=1&compensationCurrencyCode=RUR&searchPeriod=30&page=%d", hh_tokens, (0..24))
end