From 540d25e97117c73b1ce9e50e54423e5d8140a710 Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Mon, 28 Jun 2021 16:23:58 +0200 Subject: [PATCH] support custom boost improve whitespace normalization --- README.md | 64 +++--- bench/test/flexsearch-0.7.0/index.html | 16 +- dist/flexsearch.bundle.js | 28 +-- dist/flexsearch.compact.js | 22 +-- dist/flexsearch.debug.js | 257 ++++++++++++------------- dist/flexsearch.es5.js | 40 ++-- dist/flexsearch.light.js | 20 +- dist/module/index.js | 2 +- dist/module/lang.js | 2 +- dist/module/lang/arabic/default.js | 2 +- dist/module/lang/cjk/default.js | 2 +- dist/module/lang/cyrillic/default.js | 2 +- dist/module/lang/latin/default.js | 2 +- dist/module/lang/latin/simple.js | 2 +- dist/module/type.js | 2 +- package.json | 2 +- src/index.js | 10 +- src/lang.js | 1 + src/lang/arabic/default.js | 4 +- src/lang/cjk/default.js | 4 +- src/lang/cyrillic/default.js | 4 +- src/lang/latin/default.js | 7 +- src/lang/latin/simple.js | 7 +- src/type.js | 2 +- 24 files changed, 263 insertions(+), 241 deletions(-) diff --git a/README.md b/README.md index 07e3ebb..69165d6 100644 --- a/README.md +++ b/README.md @@ -65,31 +65,31 @@ Plugins (extern projects): flexsearch.bundle.js - Download - https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.1/dist/flexsearch.bundle.js + Download + https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.2/dist/flexsearch.bundle.js flexsearch.light.js - Download - https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.1/dist/flexsearch.light.js + Download + https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.2/dist/flexsearch.light.js flexsearch.compact.js - Download - https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.1/dist/flexsearch.compact.js + Download + https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.2/dist/flexsearch.compact.js flexsearch.es5.js * - Download - https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.1/dist/flexsearch.es5.js + Download + https://rawcdn.githack.com/nextapps-de/flexsearch/0.7.2/dist/flexsearch.es5.js ES6 Modules - Download + Download The /dist/module/ folder of this Github repository @@ -104,7 +104,7 @@ npm install flexsearch #### Get Latest Nightly (Do not use for production!) -Just exchange the version number from the URLs above with "master", e.g.: "/flexsearch/__0.7.1__/dist/" into "/flexsearch/__master__/dist". +Just exchange the version number from the URLs above with "master", e.g.: "/flexsearch/__0.7.2__/dist/" into "/flexsearch/__master__/dist". ### Compare Web-Bundles @@ -269,12 +269,12 @@ Operation per seconds, higher is better, except the test "Memory" on which lower 1 FlexSearch - 23 - 7039844 - 1429457 - 113091 - 1467937 - 2895284 + 17 + 7084129 + 1586856 + 511585 + 2017142 + 3202006 @@ -436,7 +436,7 @@ const worker = new WorkerIndex(options); Or via CDN: ```html - + ``` AMD: @@ -445,7 +445,7 @@ AMD: var FlexSearch = require("./flexsearch.js"); ``` -Load one of the builds from the folder dist within your html as a script and use as follows: +Load one of the builds from the folder dist within your html as a script and use as follows: ```js var index = new FlexSearch.Index(options); @@ -576,7 +576,7 @@ FlexSearch is highly customizable. Make use of the right options can really impr Default - preset




+ preset "memory"
"performance"
@@ -584,29 +584,28 @@ FlexSearch is highly customizable. Make use of the right options can really impr "score"
"default" - + The configuration profile as a shortcut or as a base for your custom settings.
"default" - tokenize




+ tokenize "strict"
"forward"
"reverse"
- "full"
- function() + "full" - + The indexing mode (tokenizer).

Choose one of the built-ins or pass a custom tokenizer function.
"strict" - cache


+ cache Boolean
Number @@ -625,7 +624,7 @@ FlexSearch is highly customizable. Make use of the right options can really impr - context

+ context Boolean
Context Options @@ -635,13 +634,22 @@ FlexSearch is highly customizable. Make use of the right options can really impr - optimize
+ optimize Boolean When enabled it uses a memory-optimized stack flow for the index. true + + + boost + + function(arr, str, int) => float + + A custom boost function used when indexing contents to the index. The function has this signature: Function(words[], term, index) => Float. It has 3 parameters where you get an array of all words, the current term and the current index where the term is placed in the word array. You can apply your own calculation e.g. the occurrences of a term and return this factor (<1 means relevance is lowered, >1 means relevance is increased).

Note: this feature is currently limited by using the tokenizer "strict" only. + null + Language-specific Options and Encoding: @@ -680,7 +688,7 @@ FlexSearch is highly customizable. Make use of the right options can really impr "balance"
"advanced"
"extra"
- function(str):[words] + function(str) => [words] The encoding type.

Choose one of the built-ins or pass a custom encoding function. "default" diff --git a/bench/test/flexsearch-0.7.0/index.html b/bench/test/flexsearch-0.7.0/index.html index fb056f3..e17f25d 100644 --- a/bench/test/flexsearch-0.7.0/index.html +++ b/bench/test/flexsearch-0.7.0/index.html @@ -15,7 +15,7 @@

Indexed Text: "Gulliver's Travels" (Swift Jonathan 1726)

//import Document from "../../../src/document.js"; import { suite } from "../../bench.js"; - let lib, split = /[^a-z]+/; + let lib, split = / /; suite["flexsearch-0.7.0"] = { @@ -26,15 +26,14 @@

Indexed Text: "Gulliver's Travels" (Swift Jonathan 1726)

encode: str => str.toLowerCase().split(split), tokenize: "strict", resolution: 1, - minlength: 1, + minlength: 3, optimize: false, fastupdate: false, context: { depth: 1, bidirectional: false, resolution: 1 - }, - doc: "body" + } }); }, add: function(data){ @@ -48,16 +47,19 @@

Indexed Text: "Gulliver's Travels" (Swift Jonathan 1726)

return lib.search(query, 9999); }, - update: function(id){ + update: function(data){ for(let i = 0, len = data.length; i < len; i++){ lib.update(i, data[i]); } }, - remove: function(id){ + remove: function(data){ - lib.remove(id); + for(let i = 0, len = data.length; i < len; i++){ + + lib.remove(i, data[i]); + } } }; diff --git a/dist/flexsearch.bundle.js b/dist/flexsearch.bundle.js index 29ddc36..dd0c083 100644 --- a/dist/flexsearch.bundle.js +++ b/dist/flexsearch.bundle.js @@ -1,33 +1,33 @@ /**! - * FlexSearch.js v0.7.11 (Bundle) + * FlexSearch.js v0.7.2 (Bundle) * Copyright 2018-2021 Nextapps GmbH * Author: Thomas Wilkerling * Licence: Apache-2.0 * https://github.com/nextapps-de/flexsearch */ -(function _f(self){'use strict';try{if(module)self=module}catch(e){}self._factory=_f;var t;function u(a){return"undefined"!==typeof a?a:!0}function aa(a){const b=Array(a);for(let c=0;c=this.B&&(w||!n[l])){var f=L(q,d,r),g="";switch(this.F){case "full":if(3f;h--)if(h-f>=this.B){var k=L(q,d,r,e,f);g=l.substring(f,h);M(this,n,g,k,a,c)}break}case "reverse":if(2=this.B&&M(this,n, -g,L(q,d,r,e,h),a,c);g=""}case "forward":if(1=this.B&&M(this,n,g,f,a,c);break}default:if(M(this,n,l,f,a,c),w&&1=this.B&&!e[l]){e[l]=1;const p=this.l&&l>f;M(this,m,p?f:l,L(g+(d/2>g?0:1),d,r,h-1,k-1),a,c,p?l:f)}}}}this.m||(this.register[a]=1)}}return this};function L(a,b,c,d,e){return c&&1=this.B&&(w||!n[l])){var f=L(q,d,r),g="";switch(this.G){case "full":if(3f;h--)if(h-f>=this.B){var k=L(q,d,r,e,f);g=l.substring(f,h);M(this,n,g,k,a,c)}break}case "reverse":if(2=this.B&&M(this,n, +g,L(q,d,r,e,h),a,c);g=""}case "forward":if(1=this.B&&M(this,n,g,f,a,c);break}default:if(this.C&&(f=Math.min(f/this.C(b,l,r)|0,q-1)),M(this,n,l,f,a,c),w&&1=this.B&&!e[l]){e[l]=1;const p=this.l&&l>f;M(this,m,p?f:l,L(g+(d/2>g?0:1),d,r,h-1,k-1),a,c,p?l:f)}}}}this.m||(this.register[a]=1)}}return this}; +function L(a,b,c,d,e){return c&&1=this.B&&!c[q])if(this.s||f||this.map[q])k[w++]=q,c[q]=1;else return d;a=k;e=a.length}if(!e)return d;b||(b=100);h=this.depth&&1=d)))break;if(n){if(f)return ta(k,d,0);b[b.length]=k;return}}return!c&&k}function ta(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} +function sa(a,b,c,d,e,f,g,h){let k=[],m=h?a.h:a.map;a.s||(m=ua(m,g,h,a.l));if(m){let n=0;const w=Math.min(m.length,h?a.A:a.D);for(let q=0,r=0,l,p;q=d)))break;if(n){if(f)return ta(k,d,0);b[b.length]=k;return}}return!c&&k}function ta(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} function ua(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}t.contain=function(a){return!!this.register[a]};t.update=function(a,b){return this.remove(a).add(a,b)}; -t.remove=function(a,b){const c=this.register[a];if(c){if(this.m)for(let d=0,e;db||c)e=e.slice(c,c+b);d&&(e=za.call(this,e));return{tag:a,result:e}}}function za(a){const b=Array(a.length);for(let c=0,d;c=this.m&&(u||!n[l])){var f=P(q,e,r),g="";switch(this.C){case "full":if(3f;h--)if(h-f>=this.m){var k=P(q,e,r,d,f);g=l.substring(f,h);Q(this,n,g,k,a,c)}break}case "reverse":if(2=this.m&&Q(this,n, -g,P(q,e,r,d,h),a,c);g=""}case "forward":if(1=this.m&&Q(this,n,g,f,a,c);break}default:if(Q(this,n,l,f,a,c),u&&1=this.m&&!d[l]){d[l]=1;const p=this.l&&l>f;Q(this,m,p?f:l,P(g+(e/2>g?0:1),e,r,h-1,k-1),a,c,p?l:f)}}}}this.D||(this.register[a]=1)}}return this};function P(a,b,c,e,d){return c&&1=this.m&&Q(this,n,g,f,a,c);break}default:if(this.G&&(f=Math.min(f/this.G(b,l,r)|0,q-1)),Q(this,n,l,f,a,c),u&&1=this.m&&!d[l]){d[l]=1;const p=this.l&&l>f;Q(this,m,p?f:l,P(g+(e/2>g?0:1),e,r,h-1,k-1),a,c,p?l:f)}}}}this.D||(this.register[a]=1)}}return this}; +function P(a,b,c,e,d){return c&&1=this.m&&!c[q])if(this.s||f||this.h[q])k[u++]=q,c[q]=1;else return e;a=k;d=a.length}if(!d)return e;b||(b=100);h=this.depth&&1=e)))break;if(n){if(f)return ma(k,e,0);b[b.length]=k;return}}return!c&&k}function ma(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} function na(a,b,c,e){c?(e=e&&b>c,a=(a=a[e?b:c])&&a[e?c:b]):a=a[b];return a}t.contain=function(a){return!!this.register[a]};t.update=function(a,b){return this.remove(a).add(a,b)};t.remove=function(a,b){const c=this.register[a];if(c){if(this.D)for(let e=0,d;eb||c)d=d.slice(c,c+b);e&&(d=qa.call(this,d));return{tag:a,result:d}}}function qa(a){const b=Array(a.length);for(let c=0,e;c= this.minlength && (u || !n[l])) { - var f = M(q, d, p), g = ""; + if (l && e >= this.minlength && (u || !p[l])) { + var f = M(q, d, n), g = ""; switch(this.tokenize) { case "full": if (3 < e) { for (f = 0; f < e; f++) { for (var h = e; h > f; h--) { if (h - f >= this.minlength) { - var k = M(q, d, p, e, f); + var k = M(q, d, n, e, f); g = l.substring(f, h); - this.push_index(n, g, k, a, c); + this.push_index(p, g, k, a, c); } } } @@ -307,23 +307,23 @@ L.prototype.add = function(a, b, c, d) { case "reverse": if (2 < e) { for (h = e - 1; 0 < h; h--) { - g = l[h] + g, g.length >= this.minlength && (k = M(q, d, p, e, h), this.push_index(n, g, k, a, c)); + g = l[h] + g, g.length >= this.minlength && (k = M(q, d, n, e, h), this.push_index(p, g, k, a, c)); } g = ""; } case "forward": if (1 < e) { for (h = 0; h < e; h++) { - g += l[h], g.length >= this.minlength && this.push_index(n, g, f, a, c); + g += l[h], g.length >= this.minlength && this.push_index(p, g, f, a, c); } break; } default: - if (this.push_index(n, l, f, a, c), u && 1 < d && p < d - 1) { - for (e = w(), g = this.resolution_ctx, f = l, h = Math.min(u + 1, d - p), e[f] = 1, k = 1; k < h; k++) { - if ((l = b[this.rtl ? d - 1 - p - k : p + k]) && l.length >= this.minlength && !e[l]) { + if (this.boost && (f = Math.min(f / this.boost(b, l, n) | 0, q - 1)), this.push_index(p, l, f, a, c), u && 1 < d && n < d - 1) { + for (e = w(), g = this.resolution_ctx, f = l, h = Math.min(u + 1, d - n), e[f] = 1, k = 1; k < h; k++) { + if ((l = b[this.rtl ? d - 1 - n - k : n + k]) && l.length >= this.minlength && !e[l]) { e[l] = 1; - const t = M(g + (d / 2 > g ? 0 : 1), d, p, h - 1, k - 1), y = this.bidirectional && l > f; + const t = M(g + (d / 2 > g ? 0 : 1), d, n, h - 1, k - 1), y = this.bidirectional && l > f; this.push_index(m, y ? f : l, t, a, c, y ? l : f); } } @@ -358,8 +358,8 @@ L.prototype.search = function(a, b, c) { if (a && (a = this.encode(a), e = a.length, 1 < e)) { c = w(); var k = []; - for (let n = 0, u = 0, q; n < e; n++) { - if ((q = a[n]) && q.length >= this.minlength && !c[q]) { + for (let p = 0, u = 0, q; p < e; p++) { + if ((q = a[p]) && q.length >= this.minlength && !c[q]) { if (this.optimize || f || this.map[q]) { k[u++] = q, c[q] = 1; } else { @@ -378,11 +378,11 @@ L.prototype.search = function(a, b, c) { c = 0; let m; h ? (m = a[0], c = 1) : 1 < e && a.sort(aa); - for (let n, u; c < e; c++) { + for (let p, u; c < e; c++) { u = a[c]; - h ? (n = this.add_result(d, f, b, g, 2 === e, u, m), f && !1 === n && d.length || (m = u)) : n = this.add_result(d, f, b, g, 1 === e, u); - if (n) { - return n; + h ? (p = this.add_result(d, f, b, g, 2 === e, u, m), f && !1 === p && d.length || (m = u)) : p = this.add_result(d, f, b, g, 1 === e, u); + if (p) { + return p; } if (f && c === e - 1) { k = d.length; @@ -395,28 +395,28 @@ L.prototype.search = function(a, b, c) { return d; } if (1 === k) { - return pa(d[0], b, g); + return qa(d[0], b, g); } } } - return ja(d, b, g, f); + return ka(d, b, g, f); }; L.prototype.add_result = function(a, b, c, d, e, f, g) { let h = [], k = g ? this.ctx : this.map; - this.optimize || (k = qa(k, f, g, this.bidirectional)); + this.optimize || (k = ra(k, f, g, this.bidirectional)); if (k) { let m = 0; - const n = Math.min(k.length, g ? this.resolution_ctx : this.resolution); - for (let u = 0, q = 0, p, l; u < n; u++) { - if (p = k[u]) { - if (this.optimize && (p = qa(p, f, g, this.bidirectional)), d && p && e && (l = p.length, l <= d ? (d -= l, p = null) : (p = p.slice(d), d = 0)), p && (h[m++] = p, e && (q += p.length, q >= c))) { + const p = Math.min(k.length, g ? this.resolution_ctx : this.resolution); + for (let u = 0, q = 0, n, l; u < p; u++) { + if (n = k[u]) { + if (this.optimize && (n = ra(n, f, g, this.bidirectional)), d && n && e && (l = n.length, l <= d ? (d -= l, n = null) : (n = n.slice(d), d = 0)), n && (h[m++] = n, e && (q += n.length, q >= c))) { break; } } } if (m) { if (e) { - return pa(h, c, 0); + return qa(h, c, 0); } a[a.length] = h; return; @@ -424,11 +424,11 @@ L.prototype.add_result = function(a, b, c, d, e, f, g) { } return !b && h; }; -function pa(a, b, c) { +function qa(a, b, c) { a = 1 === a.length ? a[0] : [].concat.apply([], a); return c || a.length > b ? a.slice(c, c + b) : a; } -function qa(a, b, c, d) { +function ra(a, b, c, d) { c ? (d = d && b > c, a = (a = a[d ? b : c]) && a[d ? c : b]) : a = a[b]; return a; } @@ -473,7 +473,7 @@ function N(a, b, c, d, e) { } return f; } -L.prototype.searchCache = la; +L.prototype.searchCache = ma; L.prototype.export = function(a, b, c, d, e) { let f, g; switch(e || (e = 0)) { @@ -503,7 +503,7 @@ L.prototype.export = function(a, b, c, d, e) { default: return; } - oa(a, b || this, c ? c + "." + f : f, d, e, g); + pa(a, b || this, c ? c + "." + f : f, d, e, g); return !0; }; L.prototype.import = function(a, b) { @@ -524,8 +524,8 @@ L.prototype.import = function(a, b) { } } }; -ia(L.prototype); -function ra(a) { +ja(L.prototype); +function sa(a) { a = a.data; var b = self._index; const c = a.args; @@ -543,7 +543,7 @@ function ra(a) { a = a.id, b = b[d].apply(b, c), postMessage("search" === d ? {id:a, msg:b} : {id:a}); } } -;let sa = 0; +;let ta = 0; function O(a) { if (!(this instanceof O)) { return new O(a); @@ -552,7 +552,7 @@ function O(a) { a ? D(b = a.encode) && (a.encode = b.toString()) : a = {}; (b = (self || window)._factory) && (b = b.toString()); const c = self.exports, d = this; - this.worker = ta(b, c, a.worker); + this.worker = ua(b, c, a.worker); this.resolver = w(); if (this.worker) { if (c) { @@ -583,17 +583,17 @@ function Q(a) { D(d) && (e = d, c.splice(c.length - 1, 1)); d = new Promise(function(f) { setTimeout(function() { - b.resolver[++sa] = f; - b.worker.postMessage({task:a, id:sa, args:c}); + b.resolver[++ta] = f; + b.worker.postMessage({task:a, id:ta, args:c}); }); }); return e ? (d.then(e), this) : d; }; } -function ta(a, b, c) { +function ua(a, b, c) { let d; try { - d = b ? eval('new (require("worker_threads")["Worker"])("../dist/node/node.js")') : a ? new Worker(URL.createObjectURL(new Blob(["onmessage=" + ra.toString()], {type:"text/javascript"}))) : new Worker(x(c) ? c : "worker/worker.js", {type:"module"}); + d = b ? eval('new (require("worker_threads")["Worker"])("../dist/node/node.js")') : a ? new Worker(URL.createObjectURL(new Blob(["onmessage=" + sa.toString()], {type:"text/javascript"}))) : new Worker(x(c) ? c : "worker/worker.js", {type:"module"}); } catch (e) { } return d; @@ -748,7 +748,7 @@ R.prototype.remove = function(a) { }; R.prototype.search = function(a, b, c, d) { c || (!b && z(a) ? (c = a, a = c.query) : z(b) && (c = b, b = 0)); - let e = [], f = [], g, h, k, m, n, u, q = 0; + let e = [], f = [], g, h, k, m, p, u, q = 0; if (c) { if (c.constructor === Array) { k = c, c = null; @@ -756,12 +756,12 @@ R.prototype.search = function(a, b, c, d) { k = (g = c.pluck) || c.index || c.field; m = c.tag; h = this.store && c.enrich; - n = "and" === c.bool; + p = "and" === c.bool; b = c.limit || 100; u = c.offset || 0; if (m && (x(m) && (m = [m]), !a)) { for (let l = 0, t; l < m.length; l++) { - if (t = ua.call(this, m[l], b, u, h)) { + if (t = va.call(this, m[l], b, u, h)) { e[e.length] = t, q++; } } @@ -771,40 +771,40 @@ R.prototype.search = function(a, b, c, d) { } } k || (k = this.field); - n = n && (1 < k.length || m && 1 < m.length); - const p = !d && (this.worker || this.async) && []; + p = p && (1 < k.length || m && 1 < m.length); + const n = !d && (this.worker || this.async) && []; for (let l = 0, t, y, C; l < k.length; l++) { let B; y = k[l]; x(y) || (B = y, y = y.field); - if (p) { - p[l] = this.index[y].searchAsync(a, b, B || c); + if (n) { + n[l] = this.index[y].searchAsync(a, b, B || c); } else { C = (t = d ? d[l] : this.index[y].search(a, b, B || c)) && t.length; if (m && C) { const A = []; let H = 0; - n && (A[0] = [t]); - for (let W = 0, ma, P; W < m.length; W++) { - if (ma = m[W], C = (P = this.tagindex[ma]) && P.length) { - H++, A[A.length] = n ? [P] : P; + p && (A[0] = [t]); + for (let W = 0, oa, P; W < m.length; W++) { + if (oa = m[W], C = (P = this.tagindex[oa]) && P.length) { + H++, A[A.length] = p ? [P] : P; } } - H && (t = n ? ja(A, b || 100, u || 0) : ka(t, A), C = t.length); + H && (t = p ? ka(A, b || 100, u || 0) : la(t, A), C = t.length); } if (C) { f[q] = y, e[q++] = t; } else { - if (n) { + if (p) { return []; } } } } - if (p) { + if (n) { const l = this; return new Promise(function(t) { - Promise.all(p).then(function(y) { + Promise.all(n).then(function(y) { t(l.search(a, b, c, y)); }); }); @@ -817,7 +817,7 @@ R.prototype.search = function(a, b, c, d) { } for (let l = 0, t; l < f.length; l++) { t = e[l]; - t.length && h && (t = va.call(this, t)); + t.length && h && (t = wa.call(this, t)); if (g) { return t; } @@ -825,17 +825,17 @@ R.prototype.search = function(a, b, c, d) { } return e; }; -function ua(a, b, c, d) { +function va(a, b, c, d) { let e = this.tagindex[a], f = e && e.length - c; if (f && 0 < f) { if (f > b || c) { e = e.slice(c, c + b); } - d && (e = va.call(this, e)); + d && (e = wa.call(this, e)); return {tag:a, result:e}; } } -function va(a) { +function wa(a) { const b = Array(a.length); for (let c = 0, d; c < a.length; c++) { d = a[c], b[c] = {id:d, doc:this.store[d]}; @@ -852,7 +852,7 @@ R.prototype.set = function(a, b) { this.store[a] = b; return this; }; -R.prototype.searchCache = la; +R.prototype.searchCache = ma; R.prototype.export = function(a, b, c, d, e) { e || (e = 0); d || (d = 0); @@ -876,7 +876,7 @@ R.prototype.export = function(a, b, c, d, e) { default: return; } - oa(a, this, c, d, e, f); + pa(a, this, c, d, e, f); } }; R.prototype.import = function(a, b) { @@ -903,19 +903,18 @@ R.prototype.import = function(a, b) { } } }; -ia(R.prototype); -var xa = {encode:wa, rtl:!1, tokenize:""}; -const ya = /[\W_]+/, za = E("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"), Aa = E("[\u00e8\u00e9\u00ea\u00eb]"), Ba = E("[\u00ec\u00ed\u00ee\u00ef]"), Ca = E("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"), Da = E("[\u00f9\u00fa\u00fb\u00fc\u0171]"), Ea = E("[\u00fd\u0177\u00ff]"), Fa = E("\u00f1"), Ga = E("[\u00e7c]"), Ha = E("\u00df"), Ia = E(" & "), Ja = [za, "a", Aa, "e", Ba, "i", Ca, "o", Da, "u", Ea, "y", Fa, "n", Ga, "k", Ha, "s", Ia, " and "]; -function wa(a) { - a = "" + a; - var b = this.pipeline, c = a; - c.normalize && (c = c.normalize("NFD").replace(ba, "")); - return b.call(this, c.toLowerCase(), !a.normalize && Ja, ya, !1); +ja(R.prototype); +var ya = {encode:xa, rtl:!1, tokenize:""}; +const za = G("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"), Aa = G("[\u00e8\u00e9\u00ea\u00eb]"), Ba = G("[\u00ec\u00ed\u00ee\u00ef]"), Ca = G("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"), Da = G("[\u00f9\u00fa\u00fb\u00fc\u0171]"), Ea = G("[\u00fd\u0177\u00ff]"), Fa = G("\u00f1"), Ga = G("[\u00e7c]"), Ha = G("\u00df"), Ia = G(" & "), Ja = [za, "a", Aa, "e", Ba, "i", Ca, "o", Da, "u", Ea, "y", Fa, "n", Ga, "k", Ha, "s", Ia, " and "]; +function xa(a) { + var b = a = "" + a; + b.normalize && (b = b.normalize("NFD").replace(da, "")); + return ba.call(this, b.toLowerCase(), !a.normalize && Ja, ca, !1); } ;var La = {encode:Ka, rtl:!1, tokenize:"strict"}; const Ma = /[^a-z0-9]+/, Na = {b:"p", v:"f", w:"f", z:"s", x:"s", "\u00df":"s", d:"t", n:"m", c:"k", g:"k", j:"k", q:"k", i:"e", y:"e", u:"o"}; function Ka(a) { - a = wa.call(this, a).join(" "); + a = xa.call(this, a).join(" "); const b = []; if (a) { const c = a.split(Ma), d = c.length; @@ -925,8 +924,8 @@ function Ka(a) { let h = Na[f] || f, k = h; for (let m = 1; m < a.length; m++) { f = a[m]; - const n = Na[f] || f; - n && n !== k && (h += n, k = n); + const p = Na[f] || f; + p && p !== k && (h += p, k = p); } b[g++] = h; } @@ -935,19 +934,19 @@ function Ka(a) { return b; } ;var Pa = {encode:Oa, rtl:!1, tokenize:""}; -const Qa = E("ae"), Ra = E("oe"), Sa = E("sh"), Ta = E("th"), Ua = E("ph"), Va = E("pf"), Wa = [Qa, "a", Ra, "o", Sa, "s", Ta, "t", Ua, "f", Va, "f", E("(?![aeo])h(?![aeo])"), "", E("(?!^[aeo])h(?!^[aeo])"), ""]; +const Qa = G("ae"), Ra = G("oe"), Sa = G("sh"), Ta = G("th"), Ua = G("ph"), Va = G("pf"), Wa = [Qa, "a", Ra, "o", Sa, "s", Ta, "t", Ua, "f", Va, "f", G("(?![aeo])h(?![aeo])"), "", G("(?!^[aeo])h(?!^[aeo])"), ""]; function Oa(a, b) { - a && (a = Ka.call(this, a).join(" "), 2 < a.length && (a = F(a, Wa)), b || (1 < a.length && (a = G(a)), a && (a = a.split(" ")))); + a && (a = Ka.call(this, a).join(" "), 2 < a.length && (a = E(a, Wa)), b || (1 < a.length && (a = F(a)), a && (a = a.split(" ")))); return a; } ;var Ya = {encode:Xa, rtl:!1, tokenize:""}; -const Za = E("(?!\\b)[aeo]"); +const Za = G("(?!\\b)[aeo]"); function Xa(a) { - a && (a = Oa.call(this, a, !0), 1 < a.length && (a = a.replace(Za, "")), 1 < a.length && (a = G(a)), a && (a = a.split(" "))); + a && (a = Oa.call(this, a, !0), 1 < a.length && (a = a.replace(Za, "")), 1 < a.length && (a = F(a)), a && (a = a.split(" "))); return a; } -;I["latin:default"] = ea; -I["latin:simple"] = xa; +;I["latin:default"] = ha; +I["latin:simple"] = ya; I["latin:balance"] = La; I["latin:advanced"] = Pa; I["latin:extra"] = Ya; @@ -956,7 +955,7 @@ let Y; const Z = {Index:L, Document:R, Worker:O, registerCharset:function(a, b) { I[a] = b; }, registerLanguage:function(a, b) { - ha[a] = b; + ia[a] = b; }}; (Y = X.define) && Y.amd ? Y([], function() { return Z; diff --git a/dist/flexsearch.es5.js b/dist/flexsearch.es5.js index 6a30c2c..e0d1163 100644 --- a/dist/flexsearch.es5.js +++ b/dist/flexsearch.es5.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.7.11 (ES5) + * FlexSearch.js v0.7.2 (ES5) * Copyright 2018-2021 Nextapps GmbH * Author: Thomas Wilkerling * Licence: Apache-2.0 @@ -10,35 +10,35 @@ function ba(a){a=["object"==typeof globalThis&&globalThis,a,"object"==typeof win x("Symbol",function(a){function b(g){if(this instanceof b)throw new TypeError("Symbol is not a constructor");return new c(d+(g||"")+"_"+f++,g)}function c(g,e){this.h=g;v(this,"description",{configurable:!0,writable:!0,value:e})}if(a)return a;c.prototype.toString=function(){return this.h};var d="jscomp_symbol_"+(1E9*Math.random()>>>0)+"_",f=0;return b}); x("Symbol.iterator",function(a){if(a)return a;a=Symbol("Symbol.iterator");for(var b="Array Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".split(" "),c=0;c=this.C&&(e||!g[l])){var n=P(h,d,k),m="";switch(this.H){case "full":if(3n;q--)if(q-n>=this.C){var r=P(h,d,k,p,n);m=l.substring(n,q);Q(this,g,m,r,a,c)}break}case "reverse":if(2=this.C&&Q(this,g,m,P(h,d,k, -p,q),a,c);m=""}case "forward":if(1=this.C&&Q(this,g,m,n,a,c);break}default:if(Q(this,g,l,n,a,c),e&&1=this.C&&!p[l]){p[l]=1;var u=this.l&&l>n;Q(this,f,u?n:l,P(m+(d/2>m?0:1),d,k,q-1,r-1),a,c,u?l:n)}}}}this.o||(this.register[a]=1)}}return this};function P(a,b,c,d,f){return c&&1=this.C&&Q(this,g,m,n,a,c);break}default:if(this.o&&(n=Math.min(n/this.o(b,l,k)|0,h-1)),Q(this,g,l,n,a,c),e&&1=this.C&&!p[l]){p[l]=1;var u=this.l&&l>n;Q(this,f,u?n:l,P(m+(d/2>m?0:1),d,k,q-1,r-1),a,c,u?l:n)}}}}this.s||(this.register[a]=1)}}return this}; +function P(a,b,c,d,f){return c&&1=this.C&&!c[n])if(this.B||e||this.map[n])k[p++]=n,c[n]=1;else return d;a=k;h=a.length}}if(!h)return d;b||(b=100);g=this.depth&&1=d)))break;if(p){if(g)return ya(k,d,0);b[b.length]=k;return}}return!c&&k}function ya(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} function za(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}t.contain=function(a){return!!this.register[a]};t.update=function(a,b){return this.remove(a).add(a,b)}; -t.remove=function(a,b){var c=this.register[a];if(c){if(this.o)for(var d=0,f;db||c)f=f.slice(c,c+b);d&&(f=Ea.call(this,f));return{tag:a,result:f}}}function Ea(a){for(var b=Array(a.length),c=0,d;c=this.h&&(r||!m[l])){var h=J(p,d,q),f="";switch(this.C){case "full":if(3h;g--)if(g-h>=this.h){var k=J(p,d,q,e,h);f=l.substring(h,g);K(this,m,f,k,a,b)}break}case "reverse":if(2=this.h&&K(this, -m,f,J(p,d,q,e,g),a,b);f=""}case "forward":if(1=this.h&&K(this,m,f,h,a,b);break}default:if(K(this,m,l,h,a,b),r&&1=this.h&&!e[l]){e[l]=1;const u=this.l&&l>h;K(this,n,u?h:l,J(f+(d/2>f?0:1),d,q,g-1,k-1),a,b,u?l:h)}}}}this.s||(this.register[a]=1)}}return this};function J(a,c,b,d,e){return b&&1=this.h&&!b[p])if(this.g||h||this.i[p])k[r++]=p,b[p]=1;else return d;a=k;e=a.length}if(!e)return d;c||(c=100);g=this.j&&1=d)))break;if(m){if(h)return O(k,d,0);c[c.length]=k;return}}return!b&&k}function O(a,c,b){a=1===a.length?a[0]:[].concat.apply([],a);return b||a.length>c?a.slice(b,b+c):a} +(function(self){'use strict';function t(a){return"undefined"!==typeof a?a:!0}function v(a){const c=Array(a);for(let b=0;b=this.h&&(r||!m[k])){var h=J(p,d,q),f="";switch(this.C){case "full":if(3h;g--)if(g-h>=this.h){var l=J(p,d,q,e,h);f=k.substring(h,g);K(this,m,f,l,a,b)}break}case "reverse":if(2=this.h&&K(this, +m,f,J(p,d,q,e,g),a,b);f=""}case "forward":if(1=this.h&&K(this,m,f,h,a,b);break}default:if(this.D&&(h=Math.min(h/this.D(c,k,q)|0,p-1)),K(this,m,k,h,a,b),r&&1=this.h&&!e[k]){e[k]=1;const u=this.l&&k>h;K(this,n,u?h:k,J(f+(d/2>f?0:1),d,q,g-1,l-1),a,b,u?k:h)}}}}this.s||(this.register[a]=1)}}return this}; +function J(a,c,b,d,e){return b&&1=this.h&&!b[p])if(this.g||h||this.i[p])l[r++]=p,b[p]=1;else return d;a=l;e=a.length}if(!e)return d;c||(c=100);g=this.j&&1=d)))break;if(m){if(h)return O(l,d,0);c[c.length]=l;return}}return!b&&l}function O(a,c,b){a=1===a.length?a[0]:[].concat.apply([],a);return b||a.length>c?a.slice(b,b+c):a} function P(a,c,b,d){b?(d=d&&c>b,a=(a=a[d?c:b])&&a[d?b:c]):a=a[c];return a}I.prototype.contain=function(a){return!!this.register[a]};I.prototype.update=function(a,c){return this.remove(a).add(a,c)};I.prototype.remove=function(a,c){const b=this.register[a];if(b){if(this.s)for(let d=0,e;d=this.minlength&&(g||!f[i])){const l=get_score(h,e,j);let m="";switch(this.tokenize){case"full":if(3b;d--)if(d-b>=this.minlength){const g=get_score(h,e,j,k,b);m=i.substring(b,d),this.push_index(f,m,g,a,c)}break}case"reverse":if(2=this.minlength){const d=get_score(h,e,j,k,b);this.push_index(f,m,d,a,c)}m=""}case"forward":if(1=this.minlength&&this.push_index(f,m,l,a,c);break}default:if(this.push_index(f,i,l,a,c),g&&1=this.minlength&&!f[i]){f[i]=1;const b=get_score(h+(e/2>h?0:1),e,j,l-1,g-1),m=this.bidirectional&&i>k;this.push_index(d,m?k:i,b,a,c,m?i:k)}}}}}this.fastupdate||(this.register[a]=1)}}return this};function get_score(a,b,c,d,e){return c&&1=this.minlength&&!b[e]){if(!this.optimize&&!f&&!this.map[e])return g;c[i++]=e,b[e]=1}a=c,d=a.length}if(!d)return g;b||(b=100);let i,j=this.depth&&1=c)))));l++);if(b)return e?single_result(h,c,0):void(a[a.length]=h)}return!b&&h};function single_result(a,b,c){return a=1===a.length?a[0]:concat(a),c||a.length>b?a.slice(c,c+b):a}function get_array(a,b,c,d){if(c){const e=d&&b>c;a=a[e?b:c],a=a&&a[e?c:b]}else a=a[b];return a}Index.prototype.contain=function(a){return!!this.register[a]},Index.prototype.update=function(a,b){return this.remove(a).add(a,b)},Index.prototype.remove=function(a,b){const c=this.register[a];if(c){if(this.fastupdate)for(let b,d=0;d=this.minlength&&(g||!f[i])){let l=get_score(h,e,j),m="";switch(this.tokenize){case"full":if(3b;d--)if(d-b>=this.minlength){const g=get_score(h,e,j,k,b);m=i.substring(b,d),this.push_index(f,m,g,a,c)}break}case"reverse":if(2=this.minlength){const d=get_score(h,e,j,k,b);this.push_index(f,m,d,a,c)}m=""}case"forward":if(1=this.minlength&&this.push_index(f,m,l,a,c);break}default:if(this.boost&&(l=Math.min(0|l/this.boost(b,i,j),h-1)),this.push_index(f,i,l,a,c),g&&1=this.minlength&&!f[i]){f[i]=1;const b=get_score(h+(e/2>h?0:1),e,j,l-1,g-1),m=this.bidirectional&&i>k;this.push_index(d,m?k:i,b,a,c,m?i:k)}}}}}this.fastupdate||(this.register[a]=1)}}return this};function get_score(a,b,c,d,e){return c&&1=this.minlength&&!b[e]){if(!this.optimize&&!f&&!this.map[e])return g;c[i++]=e,b[e]=1}a=c,d=a.length}if(!d)return g;b||(b=100);let i,j=this.depth&&1=c)))));l++);if(b)return e?single_result(h,c,0):void(a[a.length]=h)}return!b&&h};function single_result(a,b,c){return a=1===a.length?a[0]:concat(a),c||a.length>b?a.slice(c,c+b):a}function get_array(a,b,c,d){if(c){const e=d&&b>c;a=a[e?b:c],a=a&&a[e?c:b]}else a=a[b];return a}Index.prototype.contain=function(a){return!!this.register[a]},Index.prototype.update=function(a,b){return this.remove(a).add(a,b)},Index.prototype.remove=function(a,b){const c=this.register[a];if(c){if(this.fastupdate)for(let b,d=0;d= this.minlength) && (depth || !dupes[term])){ - const score = get_score(resolution, length, i); + let score = get_score(resolution, length, i); let token = ""; switch(this.tokenize){ @@ -224,6 +225,11 @@ Index.prototype.add = function(id, content, _append, _skip_update){ default: // case "strict": + if(this.boost){ + + score = Math.min((score / this.boost(content, term, i)) | 0, resolution - 1); + } + this.push_index(dupes, term, score, id, _append); // context is just supported by tokenizer "strict" diff --git a/src/lang.js b/src/lang.js index 7b404ba..92749d9 100644 --- a/src/lang.js +++ b/src/lang.js @@ -45,6 +45,7 @@ export function pipeline(str, normalize, split, _collapse){ return str; } +export const regex_whitespace = /[\p{Z}\p{S}\p{P}\p{C}]+/u; const regex_normalize = /[\u0300-\u036f]/g; export function normalize(str){ diff --git a/src/lang/arabic/default.js b/src/lang/arabic/default.js index 5915a16..621647b 100644 --- a/src/lang/arabic/default.js +++ b/src/lang/arabic/default.js @@ -1,4 +1,5 @@ import { IndexInterface } from "../../type.js"; +import { pipeline } from "../../lang.js"; export const rtl = true; export const tokenize = ""; @@ -15,8 +16,9 @@ const regex = /[\x00-\x7F]+/g; export function encode(str){ - return this.pipeline( + return pipeline.call( + this, /* string: */ str.replace(regex, " "), /* normalize: */ false, /* split: */ " ", diff --git a/src/lang/cjk/default.js b/src/lang/cjk/default.js index 9a3827b..5c54d89 100644 --- a/src/lang/cjk/default.js +++ b/src/lang/cjk/default.js @@ -1,4 +1,5 @@ import { IndexInterface } from "../../type.js"; +import { pipeline } from "../../lang.js"; export const rtl = false; export const tokenize = "strict"; @@ -16,8 +17,9 @@ const regex = /[\x00-\x7F]+/g; export function encode(str){ - return this.pipeline( + return pipeline.call( + this, /* string: */ str.replace(regex, ""), /* normalize: */ false, /* split: */ "", diff --git a/src/lang/cyrillic/default.js b/src/lang/cyrillic/default.js index 3adabbe..f6d1193 100644 --- a/src/lang/cyrillic/default.js +++ b/src/lang/cyrillic/default.js @@ -1,4 +1,5 @@ import { IndexInterface } from "../../type.js"; +import { pipeline } from "../../lang.js"; export const rtl = false; export const tokenize = ""; @@ -15,8 +16,9 @@ const regex = /[\x00-\x7F]+/g; export function encode(str){ - return this.pipeline( + return pipeline.call( + this, /* string: */ str.replace(regex, " "), /* normalize: */ false, /* split: */ " ", diff --git a/src/lang/latin/default.js b/src/lang/latin/default.js index cd007ae..5094999 100644 --- a/src/lang/latin/default.js +++ b/src/lang/latin/default.js @@ -1,5 +1,5 @@ import { IndexInterface } from "../../type.js"; -import { normalize } from "../../lang.js"; +import { pipeline, normalize, regex_whitespace } from "../../lang.js"; export const rtl = false; export const tokenize = ""; @@ -9,16 +9,15 @@ export default { tokenize: tokenize } -const regex_whitespace = /[\W_]+/; - /** * @this IndexInterface */ export function encode(str){ - return this.pipeline( + return pipeline.call( + this, /* string: */ str.toLowerCase(), /* normalize: */ false, /* split: */ regex_whitespace, diff --git a/src/lang/latin/simple.js b/src/lang/latin/simple.js index 72daf17..38de37a 100644 --- a/src/lang/latin/simple.js +++ b/src/lang/latin/simple.js @@ -1,5 +1,5 @@ import { IndexInterface } from "../../type.js"; -import { normalize, regex } from "../../lang.js"; +import { pipeline, normalize, regex_whitespace, regex } from "../../lang.js"; export const rtl = false; export const tokenize = ""; @@ -11,7 +11,7 @@ export default { // Charset Normalization -const regex_whitespace = /[\W_]+/, +const //regex_whitespace = /\W+/, //regex_strip = regex("[^a-z0-9 ]"), regex_a = regex("[àáâãäå]"), regex_e = regex("[èéêë]"), @@ -49,8 +49,9 @@ export function encode(str){ str = "" + str; - return this.pipeline( + return pipeline.call( + this, /* string: */ normalize(str).toLowerCase(), /* normalize: */ !str.normalize && pairs, /* split: */ regex_whitespace, diff --git a/src/type.js b/src/type.js index 82ccde6..f6acd1b 100644 --- a/src/type.js +++ b/src/type.js @@ -18,7 +18,7 @@ export function IndexInterface(){ * @returns {string|Array} */ -IndexInterface.prototype.pipeline; +//IndexInterface.prototype.pipeline; /** * @param {!number|string} id