-
Notifications
You must be signed in to change notification settings - Fork 1
/
concordance.js
102 lines (77 loc) · 2.24 KB
/
concordance.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// task
// Reetesh Kumar
// http://reet.herokuapp.com
// https://github.com/krreet/node_concordance
// This is a way of making something available to another JS file in node
module.exports = {
// An object that does classification with us of words
Concordance: class {
constructor(stopWords) {
this.dict = {};
this.keys = [];
this.stopWords = stopWords;
}
// Splitting up the text
split(text) {
// Split into array of tokens
return text.split(/\W+/);
}
// A function to validate a toke
validate(token) {
return /\w{2,}/.test(token) && (!(/\d+/.test(token)) && !(this.stopWords.indexOf(token) > -1));
}
// Process new text
process(data) {
//data here is json
// console.log(data.xlData);
// var json = JSON.stringify(data.xlData);
// //use fs to write the file to disk
// var fs = require('fs');
// fs.writeFileSync('myjsonfile.json', json, 'utf8');
var tokens = this.split(data);
//console.log(tokens.length);
// For every token
for (var i = 0; i < tokens.length; i++) {
// Lowercase everything to ignore case
var token = tokens[i].toLowerCase();
if (this.validate(token)) {
// Increase the count for the token
this.increment(token);
}
}
}
// An array of keys
getKeys() {
return this.keys;
}
// Get the count for a word
getCount(word) {
return this.dict[word];
}
// Increment the count for a word
increment(word) {
// Is this a new word?
if (!this.dict[word]) {
this.dict[word] = 1;
this.keys.push(word);
// Otherwise just increment its count
} else {
this.dict[word]++;
}
}
// Sort array of keys by counts
sortByCount() {
// For this function to work for sorting, I have
// to store a reference to this so the context is not lost!
var concordance = this;
// A fancy way to sort each element
// Compare the counts
function sorter(a, b) {
var diff = concordance.getCount(b) - concordance.getCount(a);
return diff;
}
// Sort using the function above!
this.keys.sort(sorter);
}
}
};