-
Notifications
You must be signed in to change notification settings - Fork 2
/
search.js
326 lines (299 loc) · 11.4 KB
/
search.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
try {
require('whatwg-fetch');
} catch (e) {
// likely this isn't a browser environment
}
const FilterNode = require('./FilterNode');
const SearchState = require('./searchState');
class Search {
constructor(apiHost, searchIndexText, searchIndexSheet) {
this.apiHost = apiHost;
this.searchIndexText = searchIndexText;
this.searchIndexSheet = searchIndexSheet;
this._cache = {}
}
cache(key, result) {
if (result !== undefined) {
this._cache[key] = result;
}
return this._cache[key];
}
execute_query(args) {
// To replace sjs.search.post in search.js
/* args can contain
query: query string
size: size of result set
start: from what result to start
type: "sheet" or "text"
applied_filters: filter query by these filters
appliedFilterAggTypes: array of same len as applied_filters giving aggType for each filter
field: field to query in elastic_search
sort_type: See SearchState.metadataByType for possible sort types
exact: if query is exact
success: callback on success
error: callback on error
*/
if (!args.query) {
return Promise.reject('NO_QUERY');
}
var req = JSON.stringify(this.get_query_object(args));
var cache_result = this.cache(req);
if (cache_result) {
return Promise.resolve(cache_result);
}
return fetch(`${this.apiHost}/api/search-wrapper`, {
method: 'POST',
body: req,
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json; charset=utf-8',
},
}).then(response => {
if (response.status >= 200 && response.status < 300) {
return response;
} else {
const error = new Error(response + response.statusText);
error.response = response;
throw error;
}
}).then(response => response.json())
.then(json => {
this.cache(req, json);
return json;
});
}
get_query_object({
query,
applied_filters,
appliedFilterAggTypes,
aggregationsToUpdate,
size,
start,
type,
field,
sort_type,
exact
}) {
const { sortTypeArray, aggregation_field_array } = SearchState.metadataByType[type];
const { sort_method, fieldArray, score_missing, direction } = sortTypeArray.find( x => x.type === sort_type );
return {
type,
query,
field,
source_proj: true,
slop: exact ? 0 : 10,
start,
size,
filters: applied_filters.length ? applied_filters : [],
filter_fields: appliedFilterAggTypes,
aggs: aggregationsToUpdate,
sort_method,
sort_fields: fieldArray,
sort_reverse: direction === "desc",
sort_score_missing: score_missing,
};
}
process_text_hits(hits) {
var newHits = [];
var newHitsObj = {}; // map ref -> index in newHits
const alreadySeenIds = {}; // for some reason there are duplicates in the `hits` array. This needs to be dealth with. This is a patch.
for (let hit of hits) {
if (alreadySeenIds[hit._id]) { continue; }
alreadySeenIds[hit._id] = true;
let currRef = hit._source.ref;
let newHitsIndex = newHitsObj[currRef];
if (typeof newHitsIndex != "undefined") {
newHits[newHitsIndex].push(hit);
} else {
newHits.push([hit]);
newHitsObj[currRef] = newHits.length - 1;
}
}
newHits = newHits.map(hit_list => {
if (hit_list.length === 1) { return hit_list[0]; }
const new_hit_list = hit_list.sort((a, b) => a._source.version_priority - b._source.version_priority);
new_hit_list[0].duplicates = hit_list.slice(1);
return new_hit_list[0];
});
return newHits;
}
buildFilterTree(aggregation_buckets, appliedFilters, Sefaria) {
//returns object w/ keys 'availableFilters', 'registry'
//Add already applied filters w/ empty doc count?
var rawTree = {};
appliedFilters.forEach(
fkey => this._addAvailableFilter(rawTree, fkey, {"docCount":0})
);
aggregation_buckets.forEach(
f => this._addAvailableFilter(rawTree, f["key"], {"docCount":f["doc_count"]})
);
this._aggregate(rawTree);
return this._build(rawTree, Sefaria);
}
_addAvailableFilter(rawTree, key, data) {
//key is a '/' separated key list, data is an arbitrary object
//Based on http://stackoverflow.com/a/11433067/213042
var keys = key.split("/");
var base = rawTree;
// If a value is given, remove the last name and keep it for later:
var lastName = arguments.length === 3 ? keys.pop() : false;
// Walk the hierarchy, creating new objects where needed.
// If the lastName was removed, then the last object is not set yet:
var i;
for(i = 0; i < keys.length; i++ ) {
base = base[ keys[i] ] = base[ keys[i] ] || {};
}
// If a value was given, set it to the last name:
if( lastName ) {
base = base[ lastName ] = data;
}
// Could return the last object in the hierarchy.
// return base;
}
_aggregate(rawTree) {
//Iterates the raw tree to aggregate doc_counts from the bottom up
//Nod to http://stackoverflow.com/a/17546800/213042
walker("", rawTree);
function walker(key, branch) {
if (branch !== null && typeof branch === "object") {
// Recurse into children
Object.keys(branch).forEach(key => {
walker(key, branch[key]);
});
// Do the summation with a hacked object 'reduce'
if ((!("docCount" in branch)) || (branch["docCount"] === 0)) {
branch["docCount"] = Object.keys(branch).reduce(function (previous, key) {
if (typeof branch[key] === "object" && "docCount" in branch[key]) {
previous += branch[key].docCount;
}
return previous;
}, 0);
}
}
}
}
_build(rawTree, Sefaria) {
//returns dict w/ keys 'availableFilters', 'registry'
//Aggregate counts, then sort rawTree into filter objects and add Hebrew using Sefaria.toc as reference
//Nod to http://stackoverflow.com/a/17546800/213042
var path = [];
var filters = [];
var registry = {};
var commentaryNode = new FilterNode();
for(var j = 0; j < Sefaria.search_toc.length; j++) {
var b = walk(Sefaria.search_toc[j]);
if (b) filters.push(b);
// Remove after commentary refactor ?
// If there is commentary on this node, add it as a sibling
if (commentaryNode.hasChildren()) {
var toc_branch = Sefaria.toc[j];
var cat = toc_branch["category"];
// Append commentary node to result filters, add a fresh one for the next round
var docCount = 0;
if (rawTree.Commentary && rawTree.Commentary[cat]) { docCount += rawTree.Commentary[cat].docCount; }
if (rawTree.Commentary2 && rawTree.Commentary2[cat]) { docCount += rawTree.Commentary2[cat].docCount; }
commentaryNode = Object.assign(commentaryNode, {
"title": cat + " Commentary",
"aggKey": "Commentary/" + cat,
"heTitle": "מפרשי" + " " + toc_branch["heCategory"],
"docCount": docCount
});
registry[commentaryNode.aggKey] = commentaryNode;
filters.push(commentaryNode);
commentaryNode = new FilterNode();
}
}
return { availableFilters: filters, registry };
function walk(branch) {
var node = new FilterNode();
node["docCount"] = 0;
if("category" in branch) { // Category node
path.push(branch["category"]); // Place this category at the *end* of the path
node = Object.assign(node, {
"title": path.slice(-1)[0],
"aggKey": path.join("/"),
"heTitle": branch["heCategory"]
});
for(var j = 0; j < branch["contents"].length; j++) {
const b = walk(branch["contents"][j]);
if (b) node.append(b);
}
}
else if ("title" in branch) { // Text Node
path.push(branch["title"]);
node = Object.assign(node, {
"title": path.slice(-1)[0],
"aggKey": path.join("/"),
"heTitle": branch["heTitle"]
});
}
try {
var rawNode = rawTree;
var i;
for (i = 0; i < path.length; i++) {
//For TOC nodes that we don't have results for, we catch the exception below.
rawNode = rawNode[path[i]];
}
node["docCount"] += rawNode.docCount;
registry[node.aggKey] = node;
path.pop();
return node;
}
catch (e) {
path.pop();
return false;
}
}
}
applyFilters(registry, appliedFilters) {
var orphans = []; // todo: confirm behavior
appliedFilters.forEach(aggKey => {
var node = registry[aggKey];
if (node) { node.setSelected(true); }
else { orphans.push(aggKey); }
});
return orphans;
}
getAppliedSearchFilters(availableFilters) {
let appliedFilters = [];
let appliedFilterAggTypes = [];
//results = results.concat(this.orphanFilters);
for (let tempFilter of availableFilters) {
const tempApplied = tempFilter.getAppliedFilters();
const tempAppliedTypes = tempApplied.map( x => tempFilter.aggType ); // assume all child filters have the same type as their parent
appliedFilters = appliedFilters.concat(tempApplied);
appliedFilterAggTypes = appliedFilterAggTypes.concat(tempAppliedTypes);
}
return {
appliedFilters,
appliedFilterAggTypes,
};
}
buildAndApplyTextFilters(aggregation_buckets, appliedFilters, appliedFilterAggTypes, aggType, Sefaria) {
const { availableFilters, registry } = this.buildFilterTree(aggregation_buckets, appliedFilters, Sefaria);
const orphans = this.applyFilters(registry, appliedFilters);
return { availableFilters, registry, orphans };
}
buildAndApplySheetFilters(aggregation_buckets, appliedFilters, appliedFilterAggTypes, aggType, Sefaria) {
const availableFilters = aggregation_buckets.map( b => {
const isHeb = Sefaria.hebrew.isHebrew(b.key);
const enTitle = isHeb ? '' : b.key;
const heTitle = isHeb ? b.key : (aggType === 'group' || !Sefaria.terms[b.key] ? '' : Sefaria.terms[b.key].he);
const aggKey = enTitle || heTitle;
const filterInd = appliedFilters.indexOf(aggKey);
const isSelected = filterInd !== -1 && appliedFilterAggTypes[filterInd] === aggType;
return new FilterNode(
{
title: enTitle,
heTitle,
docCount: b.doc_count,
aggKey,
aggType,
selected: isSelected ? 1 : 0
}
);
});
return { availableFilters, registry: {}, orphans: [] };
}
}
module.exports = Search;