1/** 2 * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.8 3 * Copyright (C) 2019 Oliver Nightingale 4 * @license MIT 5 */ 6 7;(function(){ 8 9/** 10 * A convenience function for configuring and constructing 11 * a new lunr Index. 12 * 13 * A lunr.Builder instance is created and the pipeline setup 14 * with a trimmer, stop word filter and stemmer. 15 * 16 * This builder object is yielded to the configuration function 17 * that is passed as a parameter, allowing the list of fields 18 * and other builder parameters to be customised. 19 * 20 * All documents _must_ be added within the passed config function. 21 * 22 * @example 23 * var idx = lunr(function () { 24 * this.field('title') 25 * this.field('body') 26 * this.ref('id') 27 * 28 * documents.forEach(function (doc) { 29 * this.add(doc) 30 * }, this) 31 * }) 32 * 33 * @see {@link lunr.Builder} 34 * @see {@link lunr.Pipeline} 35 * @see {@link lunr.trimmer} 36 * @see {@link lunr.stopWordFilter} 37 * @see {@link lunr.stemmer} 38 * @namespace {function} lunr 39 */ 40var lunr = function (config) { 41 var builder = new lunr.Builder 42 43 builder.pipeline.add( 44 lunr.trimmer, 45 lunr.stopWordFilter, 46 lunr.stemmer 47 ) 48 49 builder.searchPipeline.add( 50 lunr.stemmer 51 ) 52 53 config.call(builder, builder) 54 return builder.build() 55} 56 57lunr.version = "2.3.8" 58/*! 59 * lunr.utils 60 * Copyright (C) 2019 Oliver Nightingale 61 */ 62 63/** 64 * A namespace containing utils for the rest of the lunr library 65 * @namespace lunr.utils 66 */ 67lunr.utils = {} 68 69/** 70 * Print a warning message to the console. 71 * 72 * @param {String} message The message to be printed. 73 * @memberOf lunr.utils 74 * @function 75 */ 76lunr.utils.warn = (function (global) { 77 /* eslint-disable no-console */ 78 return function (message) { 79 if (global.console && console.warn) { 80 console.warn(message) 81 } 82 } 83 /* eslint-enable no-console */ 84})(this) 85 86/** 87 * Convert an object to a string. 88 * 89 * In the case of `null` and `undefined` the function returns 90 * the empty string, in all other cases the result of calling 91 * `toString` on the passed object is returned. 92 * 93 * @param {Any} obj The object to convert to a string. 94 * @return {String} string representation of the passed object. 95 * @memberOf lunr.utils 96 */ 97lunr.utils.asString = function (obj) { 98 if (obj === void 0 || obj === null) { 99 return "" 100 } else { 101 return obj.toString() 102 } 103} 104 105/** 106 * Clones an object. 107 * 108 * Will create a copy of an existing object such that any mutations 109 * on the copy cannot affect the original. 110 * 111 * Only shallow objects are supported, passing a nested object to this 112 * function will cause a TypeError. 113 * 114 * Objects with primitives, and arrays of primitives are supported. 115 * 116 * @param {Object} obj The object to clone. 117 * @return {Object} a clone of the passed object. 118 * @throws {TypeError} when a nested object is passed. 119 * @memberOf Utils 120 */ 121lunr.utils.clone = function (obj) { 122 if (obj === null || obj === undefined) { 123 return obj 124 } 125 126 var clone = Object.create(null), 127 keys = Object.keys(obj) 128 129 for (var i = 0; i < keys.length; i++) { 130 var key = keys[i], 131 val = obj[key] 132 133 if (Array.isArray(val)) { 134 clone[key] = val.slice() 135 continue 136 } 137 138 if (typeof val === 'string' || 139 typeof val === 'number' || 140 typeof val === 'boolean') { 141 clone[key] = val 142 continue 143 } 144 145 throw new TypeError("clone is not deep and does not support nested objects") 146 } 147 148 return clone 149} 150lunr.FieldRef = function (docRef, fieldName, stringValue) { 151 this.docRef = docRef 152 this.fieldName = fieldName 153 this._stringValue = stringValue 154} 155 156lunr.FieldRef.joiner = "/" 157 158lunr.FieldRef.fromString = function (s) { 159 var n = s.indexOf(lunr.FieldRef.joiner) 160 161 if (n === -1) { 162 throw "malformed field ref string" 163 } 164 165 var fieldRef = s.slice(0, n), 166 docRef = s.slice(n + 1) 167 168 return new lunr.FieldRef (docRef, fieldRef, s) 169} 170 171lunr.FieldRef.prototype.toString = function () { 172 if (this._stringValue == undefined) { 173 this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef 174 } 175 176 return this._stringValue 177} 178/*! 179 * lunr.Set 180 * Copyright (C) 2019 Oliver Nightingale 181 */ 182 183/** 184 * A lunr set. 185 * 186 * @constructor 187 */ 188lunr.Set = function (elements) { 189 this.elements = Object.create(null) 190 191 if (elements) { 192 this.length = elements.length 193 194 for (var i = 0; i < this.length; i++) { 195 this.elements[elements[i]] = true 196 } 197 } else { 198 this.length = 0 199 } 200} 201 202/** 203 * A complete set that contains all elements. 204 * 205 * @static 206 * @readonly 207 * @type {lunr.Set} 208 */ 209lunr.Set.complete = { 210 intersect: function (other) { 211 return other 212 }, 213 214 union: function (other) { 215 return other 216 }, 217 218 contains: function () { 219 return true 220 } 221} 222 223/** 224 * An empty set that contains no elements. 225 * 226 * @static 227 * @readonly 228 * @type {lunr.Set} 229 */ 230lunr.Set.empty = { 231 intersect: function () { 232 return this 233 }, 234 235 union: function (other) { 236 return other 237 }, 238 239 contains: function () { 240 return false 241 } 242} 243 244/** 245 * Returns true if this set contains the specified object. 246 * 247 * @param {object} object - Object whose presence in this set is to be tested. 248 * @returns {boolean} - True if this set contains the specified object. 249 */ 250lunr.Set.prototype.contains = function (object) { 251 return !!this.elements[object] 252} 253 254/** 255 * Returns a new set containing only the elements that are present in both 256 * this set and the specified set. 257 * 258 * @param {lunr.Set} other - set to intersect with this set. 259 * @returns {lunr.Set} a new set that is the intersection of this and the specified set. 260 */ 261 262lunr.Set.prototype.intersect = function (other) { 263 var a, b, elements, intersection = [] 264 265 if (other === lunr.Set.complete) { 266 return this 267 } 268 269 if (other === lunr.Set.empty) { 270 return other 271 } 272 273 if (this.length < other.length) { 274 a = this 275 b = other 276 } else { 277 a = other 278 b = this 279 } 280 281 elements = Object.keys(a.elements) 282 283 for (var i = 0; i < elements.length; i++) { 284 var element = elements[i] 285 if (element in b.elements) { 286 intersection.push(element) 287 } 288 } 289 290 return new lunr.Set (intersection) 291} 292 293/** 294 * Returns a new set combining the elements of this and the specified set. 295 * 296 * @param {lunr.Set} other - set to union with this set. 297 * @return {lunr.Set} a new set that is the union of this and the specified set. 298 */ 299 300lunr.Set.prototype.union = function (other) { 301 if (other === lunr.Set.complete) { 302 return lunr.Set.complete 303 } 304 305 if (other === lunr.Set.empty) { 306 return this 307 } 308 309 return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements))) 310} 311/** 312 * A function to calculate the inverse document frequency for 313 * a posting. This is shared between the builder and the index 314 * 315 * @private 316 * @param {object} posting - The posting for a given term 317 * @param {number} documentCount - The total number of documents. 318 */ 319lunr.idf = function (posting, documentCount) { 320 var documentsWithTerm = 0 321 322 for (var fieldName in posting) { 323 if (fieldName == '_index') continue // Ignore the term index, its not a field 324 documentsWithTerm += Object.keys(posting[fieldName]).length 325 } 326 327 var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) 328 329 return Math.log(1 + Math.abs(x)) 330} 331 332/** 333 * A token wraps a string representation of a token 334 * as it is passed through the text processing pipeline. 335 * 336 * @constructor 337 * @param {string} [str=''] - The string token being wrapped. 338 * @param {object} [metadata={}] - Metadata associated with this token. 339 */ 340lunr.Token = function (str, metadata) { 341 this.str = str || "" 342 this.metadata = metadata || {} 343} 344 345/** 346 * Returns the token string that is being wrapped by this object. 347 * 348 * @returns {string} 349 */ 350lunr.Token.prototype.toString = function () { 351 return this.str 352} 353 354/** 355 * A token update function is used when updating or optionally 356 * when cloning a token. 357 * 358 * @callback lunr.Token~updateFunction 359 * @param {string} str - The string representation of the token. 360 * @param {Object} metadata - All metadata associated with this token. 361 */ 362 363/** 364 * Applies the given function to the wrapped string token. 365 * 366 * @example 367 * token.update(function (str, metadata) { 368 * return str.toUpperCase() 369 * }) 370 * 371 * @param {lunr.Token~updateFunction} fn - A function to apply to the token string. 372 * @returns {lunr.Token} 373 */ 374lunr.Token.prototype.update = function (fn) { 375 this.str = fn(this.str, this.metadata) 376 return this 377} 378 379/** 380 * Creates a clone of this token. Optionally a function can be 381 * applied to the cloned token. 382 * 383 * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token. 384 * @returns {lunr.Token} 385 */ 386lunr.Token.prototype.clone = function (fn) { 387 fn = fn || function (s) { return s } 388 return new lunr.Token (fn(this.str, this.metadata), this.metadata) 389} 390/*! 391 * lunr.tokenizer 392 * Copyright (C) 2019 Oliver Nightingale 393 */ 394 395/** 396 * A function for splitting a string into tokens ready to be inserted into 397 * the search index. Uses `lunr.tokenizer.separator` to split strings, change 398 * the value of this property to change how strings are split into tokens. 399 * 400 * This tokenizer will convert its parameter to a string by calling `toString` and 401 * then will split this string on the character in `lunr.tokenizer.separator`. 402 * Arrays will have their elements converted to strings and wrapped in a lunr.Token. 403 * 404 * Optional metadata can be passed to the tokenizer, this metadata will be cloned and 405 * added as metadata to every token that is created from the object to be tokenized. 406 * 407 * @static 408 * @param {?(string|object|object[])} obj - The object to convert into tokens 409 * @param {?object} metadata - Optional metadata to associate with every token 410 * @returns {lunr.Token[]} 411 * @see {@link lunr.Pipeline} 412 */ 413lunr.tokenizer = function (obj, metadata) { 414 if (obj == null || obj == undefined) { 415 return [] 416 } 417 418 if (Array.isArray(obj)) { 419 return obj.map(function (t) { 420 return new lunr.Token( 421 lunr.utils.asString(t).toLowerCase(), 422 lunr.utils.clone(metadata) 423 ) 424 }) 425 } 426 427 var str = obj.toString().toLowerCase(), 428 len = str.length, 429 tokens = [] 430 431 for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) { 432 var char = str.charAt(sliceEnd), 433 sliceLength = sliceEnd - sliceStart 434 435 if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) { 436 437 if (sliceLength > 0) { 438 var tokenMetadata = lunr.utils.clone(metadata) || {} 439 tokenMetadata["position"] = [sliceStart, sliceLength] 440 tokenMetadata["index"] = tokens.length 441 442 tokens.push( 443 new lunr.Token ( 444 str.slice(sliceStart, sliceEnd), 445 tokenMetadata 446 ) 447 ) 448 } 449 450 sliceStart = sliceEnd + 1 451 } 452 453 } 454 455 return tokens 456} 457 458/** 459 * The separator used to split a string into tokens. Override this property to change the behaviour of 460 * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. 461 * 462 * @static 463 * @see lunr.tokenizer 464 */ 465lunr.tokenizer.separator = /[\s\-]+/ 466/*! 467 * lunr.Pipeline 468 * Copyright (C) 2019 Oliver Nightingale 469 */ 470 471/** 472 * lunr.Pipelines maintain an ordered list of functions to be applied to all 473 * tokens in documents entering the search index and queries being ran against 474 * the index. 475 * 476 * An instance of lunr.Index created with the lunr shortcut will contain a 477 * pipeline with a stop word filter and an English language stemmer. Extra 478 * functions can be added before or after either of these functions or these 479 * default functions can be removed. 480 * 481 * When run the pipeline will call each function in turn, passing a token, the 482 * index of that token in the original list of all tokens and finally a list of 483 * all the original tokens. 484 * 485 * The output of functions in the pipeline will be passed to the next function 486 * in the pipeline. To exclude a token from entering the index the function 487 * should return undefined, the rest of the pipeline will not be called with 488 * this token. 489 * 490 * For serialisation of pipelines to work, all functions used in an instance of 491 * a pipeline should be registered with lunr.Pipeline. Registered functions can 492 * then be loaded. If trying to load a serialised pipeline that uses functions 493 * that are not registered an error will be thrown. 494 * 495 * If not planning on serialising the pipeline then registering pipeline functions 496 * is not necessary. 497 * 498 * @constructor 499 */ 500lunr.Pipeline = function () { 501 this._stack = [] 502} 503 504lunr.Pipeline.registeredFunctions = Object.create(null) 505 506/** 507 * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token 508 * string as well as all known metadata. A pipeline function can mutate the token string 509 * or mutate (or add) metadata for a given token. 510 * 511 * A pipeline function can indicate that the passed token should be discarded by returning 512 * null, undefined or an empty string. This token will not be passed to any downstream pipeline 513 * functions and will not be added to the index. 514 * 515 * Multiple tokens can be returned by returning an array of tokens. Each token will be passed 516 * to any downstream pipeline functions and all will returned tokens will be added to the index. 517 * 518 * Any number of pipeline functions may be chained together using a lunr.Pipeline. 519 * 520 * @interface lunr.PipelineFunction 521 * @param {lunr.Token} token - A token from the document being processed. 522 * @param {number} i - The index of this token in the complete list of tokens for this document/field. 523 * @param {lunr.Token[]} tokens - All tokens for this document/field. 524 * @returns {(?lunr.Token|lunr.Token[])} 525 */ 526 527/** 528 * Register a function with the pipeline. 529 * 530 * Functions that are used in the pipeline should be registered if the pipeline 531 * needs to be serialised, or a serialised pipeline needs to be loaded. 532 * 533 * Registering a function does not add it to a pipeline, functions must still be 534 * added to instances of the pipeline for them to be used when running a pipeline. 535 * 536 * @param {lunr.PipelineFunction} fn - The function to check for. 537 * @param {String} label - The label to register this function with 538 */ 539lunr.Pipeline.registerFunction = function (fn, label) { 540 if (label in this.registeredFunctions) { 541 lunr.utils.warn('Overwriting existing registered function: ' + label) 542 } 543 544 fn.label = label 545 lunr.Pipeline.registeredFunctions[fn.label] = fn 546} 547 548/** 549 * Warns if the function is not registered as a Pipeline function. 550 * 551 * @param {lunr.PipelineFunction} fn - The function to check for. 552 * @private 553 */ 554lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { 555 var isRegistered = fn.label && (fn.label in this.registeredFunctions) 556 557 if (!isRegistered) { 558 lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn) 559 } 560} 561 562/** 563 * Loads a previously serialised pipeline. 564 * 565 * All functions to be loaded must already be registered with lunr.Pipeline. 566 * If any function from the serialised data has not been registered then an 567 * error will be thrown. 568 * 569 * @param {Object} serialised - The serialised pipeline to load. 570 * @returns {lunr.Pipeline} 571 */ 572lunr.Pipeline.load = function (serialised) { 573 var pipeline = new lunr.Pipeline 574 575 serialised.forEach(function (fnName) { 576 var fn = lunr.Pipeline.registeredFunctions[fnName] 577 578 if (fn) { 579 pipeline.add(fn) 580 } else { 581 throw new Error('Cannot load unregistered function: ' + fnName) 582 } 583 }) 584 585 return pipeline 586} 587 588/** 589 * Adds new functions to the end of the pipeline. 590 * 591 * Logs a warning if the function has not been registered. 592 * 593 * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline. 594 */ 595lunr.Pipeline.prototype.add = function () { 596 var fns = Array.prototype.slice.call(arguments) 597 598 fns.forEach(function (fn) { 599 lunr.Pipeline.warnIfFunctionNotRegistered(fn) 600 this._stack.push(fn) 601 }, this) 602} 603 604/** 605 * Adds a single function after a function that already exists in the 606 * pipeline. 607 * 608 * Logs a warning if the function has not been registered. 609 * 610 * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. 611 * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. 612 */ 613lunr.Pipeline.prototype.after = function (existingFn, newFn) { 614 lunr.Pipeline.warnIfFunctionNotRegistered(newFn) 615 616 var pos = this._stack.indexOf(existingFn) 617 if (pos == -1) { 618 throw new Error('Cannot find existingFn') 619 } 620 621 pos = pos + 1 622 this._stack.splice(pos, 0, newFn) 623} 624 625/** 626 * Adds a single function before a function that already exists in the 627 * pipeline. 628 * 629 * Logs a warning if the function has not been registered. 630 * 631 * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. 632 * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. 633 */ 634lunr.Pipeline.prototype.before = function (existingFn, newFn) { 635 lunr.Pipeline.warnIfFunctionNotRegistered(newFn) 636 637 var pos = this._stack.indexOf(existingFn) 638 if (pos == -1) { 639 throw new Error('Cannot find existingFn') 640 } 641 642 this._stack.splice(pos, 0, newFn) 643} 644 645/** 646 * Removes a function from the pipeline. 647 * 648 * @param {lunr.PipelineFunction} fn The function to remove from the pipeline. 649 */ 650lunr.Pipeline.prototype.remove = function (fn) { 651 var pos = this._stack.indexOf(fn) 652 if (pos == -1) { 653 return 654 } 655 656 this._stack.splice(pos, 1) 657} 658 659/** 660 * Runs the current list of functions that make up the pipeline against the 661 * passed tokens. 662 * 663 * @param {Array} tokens The tokens to run through the pipeline. 664 * @returns {Array} 665 */ 666lunr.Pipeline.prototype.run = function (tokens) { 667 var stackLength = this._stack.length 668 669 for (var i = 0; i < stackLength; i++) { 670 var fn = this._stack[i] 671 var memo = [] 672 673 for (var j = 0; j < tokens.length; j++) { 674 var result = fn(tokens[j], j, tokens) 675 676 if (result === null || result === void 0 || result === '') continue 677 678 if (Array.isArray(result)) { 679 for (var k = 0; k < result.length; k++) { 680 memo.push(result[k]) 681 } 682 } else { 683 memo.push(result) 684 } 685 } 686 687 tokens = memo 688 } 689 690 return tokens 691} 692 693/** 694 * Convenience method for passing a string through a pipeline and getting 695 * strings out. This method takes care of wrapping the passed string in a 696 * token and mapping the resulting tokens back to strings. 697 * 698 * @param {string} str - The string to pass through the pipeline. 699 * @param {?object} metadata - Optional metadata to associate with the token 700 * passed to the pipeline. 701 * @returns {string[]} 702 */ 703lunr.Pipeline.prototype.runString = function (str, metadata) { 704 var token = new lunr.Token (str, metadata) 705 706 return this.run([token]).map(function (t) { 707 return t.toString() 708 }) 709} 710 711/** 712 * Resets the pipeline by removing any existing processors. 713 * 714 */ 715lunr.Pipeline.prototype.reset = function () { 716 this._stack = [] 717} 718 719/** 720 * Returns a representation of the pipeline ready for serialisation. 721 * 722 * Logs a warning if the function has not been registered. 723 * 724 * @returns {Array} 725 */ 726lunr.Pipeline.prototype.toJSON = function () { 727 return this._stack.map(function (fn) { 728 lunr.Pipeline.warnIfFunctionNotRegistered(fn) 729 730 return fn.label 731 }) 732} 733/*! 734 * lunr.Vector 735 * Copyright (C) 2019 Oliver Nightingale 736 */ 737 738/** 739 * A vector is used to construct the vector space of documents and queries. These 740 * vectors support operations to determine the similarity between two documents or 741 * a document and a query. 742 * 743 * Normally no parameters are required for initializing a vector, but in the case of 744 * loading a previously dumped vector the raw elements can be provided to the constructor. 745 * 746 * For performance reasons vectors are implemented with a flat array, where an elements 747 * index is immediately followed by its value. E.g. [index, value, index, value]. This 748 * allows the underlying array to be as sparse as possible and still offer decent 749 * performance when being used for vector calculations. 750 * 751 * @constructor 752 * @param {Number[]} [elements] - The flat list of element index and element value pairs. 753 */ 754lunr.Vector = function (elements) { 755 this._magnitude = 0 756 this.elements = elements || [] 757} 758 759 760/** 761 * Calculates the position within the vector to insert a given index. 762 * 763 * This is used internally by insert and upsert. If there are duplicate indexes then 764 * the position is returned as if the value for that index were to be updated, but it 765 * is the callers responsibility to check whether there is a duplicate at that index 766 * 767 * @param {Number} insertIdx - The index at which the element should be inserted. 768 * @returns {Number} 769 */ 770lunr.Vector.prototype.positionForIndex = function (index) { 771 // For an empty vector the tuple can be inserted at the beginning 772 if (this.elements.length == 0) { 773 return 0 774 } 775 776 var start = 0, 777 end = this.elements.length / 2, 778 sliceLength = end - start, 779 pivotPoint = Math.floor(sliceLength / 2), 780 pivotIndex = this.elements[pivotPoint * 2] 781 782 while (sliceLength > 1) { 783 if (pivotIndex < index) { 784 start = pivotPoint 785 } 786 787 if (pivotIndex > index) { 788 end = pivotPoint 789 } 790 791 if (pivotIndex == index) { 792 break 793 } 794 795 sliceLength = end - start 796 pivotPoint = start + Math.floor(sliceLength / 2) 797 pivotIndex = this.elements[pivotPoint * 2] 798 } 799 800 if (pivotIndex == index) { 801 return pivotPoint * 2 802 } 803 804 if (pivotIndex > index) { 805 return pivotPoint * 2 806 } 807 808 if (pivotIndex < index) { 809 return (pivotPoint + 1) * 2 810 } 811} 812 813/** 814 * Inserts an element at an index within the vector. 815 * 816 * Does not allow duplicates, will throw an error if there is already an entry 817 * for this index. 818 * 819 * @param {Number} insertIdx - The index at which the element should be inserted. 820 * @param {Number} val - The value to be inserted into the vector. 821 */ 822lunr.Vector.prototype.insert = function (insertIdx, val) { 823 this.upsert(insertIdx, val, function () { 824 throw "duplicate index" 825 }) 826} 827 828/** 829 * Inserts or updates an existing index within the vector. 830 * 831 * @param {Number} insertIdx - The index at which the element should be inserted. 832 * @param {Number} val - The value to be inserted into the vector. 833 * @param {function} fn - A function that is called for updates, the existing value and the 834 * requested value are passed as arguments 835 */ 836lunr.Vector.prototype.upsert = function (insertIdx, val, fn) { 837 this._magnitude = 0 838 var position = this.positionForIndex(insertIdx) 839 840 if (this.elements[position] == insertIdx) { 841 this.elements[position + 1] = fn(this.elements[position + 1], val) 842 } else { 843 this.elements.splice(position, 0, insertIdx, val) 844 } 845} 846 847/** 848 * Calculates the magnitude of this vector. 849 * 850 * @returns {Number} 851 */ 852lunr.Vector.prototype.magnitude = function () { 853 if (this._magnitude) return this._magnitude 854 855 var sumOfSquares = 0, 856 elementsLength = this.elements.length 857 858 for (var i = 1; i < elementsLength; i += 2) { 859 var val = this.elements[i] 860 sumOfSquares += val * val 861 } 862 863 return this._magnitude = Math.sqrt(sumOfSquares) 864} 865 866/** 867 * Calculates the dot product of this vector and another vector. 868 * 869 * @param {lunr.Vector} otherVector - The vector to compute the dot product with. 870 * @returns {Number} 871 */ 872lunr.Vector.prototype.dot = function (otherVector) { 873 var dotProduct = 0, 874 a = this.elements, b = otherVector.elements, 875 aLen = a.length, bLen = b.length, 876 aVal = 0, bVal = 0, 877 i = 0, j = 0 878 879 while (i < aLen && j < bLen) { 880 aVal = a[i], bVal = b[j] 881 if (aVal < bVal) { 882 i += 2 883 } else if (aVal > bVal) { 884 j += 2 885 } else if (aVal == bVal) { 886 dotProduct += a[i + 1] * b[j + 1] 887 i += 2 888 j += 2 889 } 890 } 891 892 return dotProduct 893} 894 895/** 896 * Calculates the similarity between this vector and another vector. 897 * 898 * @param {lunr.Vector} otherVector - The other vector to calculate the 899 * similarity with. 900 * @returns {Number} 901 */ 902lunr.Vector.prototype.similarity = function (otherVector) { 903 return this.dot(otherVector) / this.magnitude() || 0 904} 905 906/** 907 * Converts the vector to an array of the elements within the vector. 908 * 909 * @returns {Number[]} 910 */ 911lunr.Vector.prototype.toArray = function () { 912 var output = new Array (this.elements.length / 2) 913 914 for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) { 915 output[j] = this.elements[i] 916 } 917 918 return output 919} 920 921/** 922 * A JSON serializable representation of the vector. 923 * 924 * @returns {Number[]} 925 */ 926lunr.Vector.prototype.toJSON = function () { 927 return this.elements 928} 929/* eslint-disable */ 930/*! 931 * lunr.stemmer 932 * Copyright (C) 2019 Oliver Nightingale 933 * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt 934 */ 935 936/** 937 * lunr.stemmer is an english language stemmer, this is a JavaScript 938 * implementation of the PorterStemmer taken from http://tartarus.org/~martin 939 * 940 * @static 941 * @implements {lunr.PipelineFunction} 942 * @param {lunr.Token} token - The string to stem 943 * @returns {lunr.Token} 944 * @see {@link lunr.Pipeline} 945 * @function 946 */ 947lunr.stemmer = (function(){ 948 var step2list = { 949 "ational" : "ate", 950 "tional" : "tion", 951 "enci" : "ence", 952 "anci" : "ance", 953 "izer" : "ize", 954 "bli" : "ble", 955 "alli" : "al", 956 "entli" : "ent", 957 "eli" : "e", 958 "ousli" : "ous", 959 "ization" : "ize", 960 "ation" : "ate", 961 "ator" : "ate", 962 "alism" : "al", 963 "iveness" : "ive", 964 "fulness" : "ful", 965 "ousness" : "ous", 966 "aliti" : "al", 967 "iviti" : "ive", 968 "biliti" : "ble", 969 "logi" : "log" 970 }, 971 972 step3list = { 973 "icate" : "ic", 974 "ative" : "", 975 "alize" : "al", 976 "iciti" : "ic", 977 "ical" : "ic", 978 "ful" : "", 979 "ness" : "" 980 }, 981 982 c = "[^aeiou]", // consonant 983 v = "[aeiouy]", // vowel 984 C = c + "[^aeiouy]*", // consonant sequence 985 V = v + "[aeiou]*", // vowel sequence 986 987 mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 988 meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 989 mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 990 s_v = "^(" + C + ")?" + v; // vowel in stem 991 992 var re_mgr0 = new RegExp(mgr0); 993 var re_mgr1 = new RegExp(mgr1); 994 var re_meq1 = new RegExp(meq1); 995 var re_s_v = new RegExp(s_v); 996 997 var re_1a = /^(.+?)(ss|i)es$/; 998 var re2_1a = /^(.+?)([^s])s$/; 999 var re_1b = /^(.+?)eed$/; 1000 var re2_1b = /^(.+?)(ed|ing)$/; 1001 var re_1b_2 = /.$/; 1002 var re2_1b_2 = /(at|bl|iz)$/; 1003 var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); 1004 var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 1005 1006 var re_1c = /^(.+?[^aeiou])y$/; 1007 var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 1008 1009 var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 1010 1011 var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 1012 var re2_4 = /^(.+?)(s|t)(ion)$/; 1013 1014 var re_5 = /^(.+?)e$/; 1015 var re_5_1 = /ll$/; 1016 var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 1017 1018 var porterStemmer = function porterStemmer(w) { 1019 var stem, 1020 suffix, 1021 firstch, 1022 re, 1023 re2, 1024 re3, 1025 re4; 1026 1027 if (w.length < 3) { return w; } 1028 1029 firstch = w.substr(0,1); 1030 if (firstch == "y") { 1031 w = firstch.toUpperCase() + w.substr(1); 1032 } 1033 1034 // Step 1a 1035 re = re_1a 1036 re2 = re2_1a; 1037 1038 if (re.test(w)) { w = w.replace(re,"$1$2"); } 1039 else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } 1040 1041 // Step 1b 1042 re = re_1b; 1043 re2 = re2_1b; 1044 if (re.test(w)) { 1045 var fp = re.exec(w); 1046 re = re_mgr0; 1047 if (re.test(fp[1])) { 1048 re = re_1b_2; 1049 w = w.replace(re,""); 1050 } 1051 } else if (re2.test(w)) { 1052 var fp = re2.exec(w); 1053 stem = fp[1]; 1054 re2 = re_s_v; 1055 if (re2.test(stem)) { 1056 w = stem; 1057 re2 = re2_1b_2; 1058 re3 = re3_1b_2; 1059 re4 = re4_1b_2; 1060 if (re2.test(w)) { w = w + "e"; } 1061 else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } 1062 else if (re4.test(w)) { w = w + "e"; } 1063 } 1064 } 1065 1066 // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) 1067 re = re_1c; 1068 if (re.test(w)) { 1069 var fp = re.exec(w); 1070 stem = fp[1]; 1071 w = stem + "i"; 1072 } 1073 1074 // Step 2 1075 re = re_2; 1076 if (re.test(w)) { 1077 var fp = re.exec(w); 1078 stem = fp[1]; 1079 suffix = fp[2]; 1080 re = re_mgr0; 1081 if (re.test(stem)) { 1082 w = stem + step2list[suffix]; 1083 } 1084 } 1085 1086 // Step 3 1087 re = re_3; 1088 if (re.test(w)) { 1089 var fp = re.exec(w); 1090 stem = fp[1]; 1091 suffix = fp[2]; 1092 re = re_mgr0; 1093 if (re.test(stem)) { 1094 w = stem + step3list[suffix]; 1095 } 1096 } 1097 1098 // Step 4 1099 re = re_4; 1100 re2 = re2_4; 1101 if (re.test(w)) { 1102 var fp = re.exec(w); 1103 stem = fp[1]; 1104 re = re_mgr1; 1105 if (re.test(stem)) { 1106 w = stem; 1107 } 1108 } else if (re2.test(w)) { 1109 var fp = re2.exec(w); 1110 stem = fp[1] + fp[2]; 1111 re2 = re_mgr1; 1112 if (re2.test(stem)) { 1113 w = stem; 1114 } 1115 } 1116 1117 // Step 5 1118 re = re_5; 1119 if (re.test(w)) { 1120 var fp = re.exec(w); 1121 stem = fp[1]; 1122 re = re_mgr1; 1123 re2 = re_meq1; 1124 re3 = re3_5; 1125 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { 1126 w = stem; 1127 } 1128 } 1129 1130 re = re_5_1; 1131 re2 = re_mgr1; 1132 if (re.test(w) && re2.test(w)) { 1133 re = re_1b_2; 1134 w = w.replace(re,""); 1135 } 1136 1137 // and turn initial Y back to y 1138 1139 if (firstch == "y") { 1140 w = firstch.toLowerCase() + w.substr(1); 1141 } 1142 1143 return w; 1144 }; 1145 1146 return function (token) { 1147 return token.update(porterStemmer); 1148 } 1149})(); 1150 1151lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer') 1152/*! 1153 * lunr.stopWordFilter 1154 * Copyright (C) 2019 Oliver Nightingale 1155 */ 1156 1157/** 1158 * lunr.generateStopWordFilter builds a stopWordFilter function from the provided 1159 * list of stop words. 1160 * 1161 * The built in lunr.stopWordFilter is built using this generator and can be used 1162 * to generate custom stopWordFilters for applications or non English languages. 1163 * 1164 * @function 1165 * @param {Array} token The token to pass through the filter 1166 * @returns {lunr.PipelineFunction} 1167 * @see lunr.Pipeline 1168 * @see lunr.stopWordFilter 1169 */ 1170lunr.generateStopWordFilter = function (stopWords) { 1171 var words = stopWords.reduce(function (memo, stopWord) { 1172 memo[stopWord] = stopWord 1173 return memo 1174 }, {}) 1175 1176 return function (token) { 1177 if (token && words[token.toString()] !== token.toString()) return token 1178 } 1179} 1180 1181/** 1182 * lunr.stopWordFilter is an English language stop word list filter, any words 1183 * contained in the list will not be passed through the filter. 1184 * 1185 * This is intended to be used in the Pipeline. If the token does not pass the 1186 * filter then undefined will be returned. 1187 * 1188 * @function 1189 * @implements {lunr.PipelineFunction} 1190 * @params {lunr.Token} token - A token to check for being a stop word. 1191 * @returns {lunr.Token} 1192 * @see {@link lunr.Pipeline} 1193 */ 1194lunr.stopWordFilter = lunr.generateStopWordFilter([ 1195 'a', 1196 'able', 1197 'about', 1198 'across', 1199 'after', 1200 'all', 1201 'almost', 1202 'also', 1203 'am', 1204 'among', 1205 'an', 1206 'and', 1207 'any', 1208 'are', 1209 'as', 1210 'at', 1211 'be', 1212 'because', 1213 'been', 1214 'but', 1215 'by', 1216 'can', 1217 'cannot', 1218 'could', 1219 'dear', 1220 'did', 1221 'do', 1222 'does', 1223 'either', 1224 'else', 1225 'ever', 1226 'every', 1227 'for', 1228 'from', 1229 'get', 1230 'got', 1231 'had', 1232 'has', 1233 'have', 1234 'he', 1235 'her', 1236 'hers', 1237 'him', 1238 'his', 1239 'how', 1240 'however', 1241 'i', 1242 'if', 1243 'in', 1244 'into', 1245 'is', 1246 'it', 1247 'its', 1248 'just', 1249 'least', 1250 'let', 1251 'like', 1252 'likely', 1253 'may', 1254 'me', 1255 'might', 1256 'most', 1257 'must', 1258 'my', 1259 'neither', 1260 'no', 1261 'nor', 1262 'not', 1263 'of', 1264 'off', 1265 'often', 1266 'on', 1267 'only', 1268 'or', 1269 'other', 1270 'our', 1271 'own', 1272 'rather', 1273 'said', 1274 'say', 1275 'says', 1276 'she', 1277 'should', 1278 'since', 1279 'so', 1280 'some', 1281 'than', 1282 'that', 1283 'the', 1284 'their', 1285 'them', 1286 'then', 1287 'there', 1288 'these', 1289 'they', 1290 'this', 1291 'tis', 1292 'to', 1293 'too', 1294 'twas', 1295 'us', 1296 'wants', 1297 'was', 1298 'we', 1299 'were', 1300 'what', 1301 'when', 1302 'where', 1303 'which', 1304 'while', 1305 'who', 1306 'whom', 1307 'why', 1308 'will', 1309 'with', 1310 'would', 1311 'yet', 1312 'you', 1313 'your' 1314]) 1315 1316lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') 1317/*! 1318 * lunr.trimmer 1319 * Copyright (C) 2019 Oliver Nightingale 1320 */ 1321 1322/** 1323 * lunr.trimmer is a pipeline function for trimming non word 1324 * characters from the beginning and end of tokens before they 1325 * enter the index. 1326 * 1327 * This implementation may not work correctly for non latin 1328 * characters and should either be removed or adapted for use 1329 * with languages with non-latin characters. 1330 * 1331 * @static 1332 * @implements {lunr.PipelineFunction} 1333 * @param {lunr.Token} token The token to pass through the filter 1334 * @returns {lunr.Token} 1335 * @see lunr.Pipeline 1336 */ 1337lunr.trimmer = function (token) { 1338 return token.update(function (s) { 1339 return s.replace(/^\W+/, '').replace(/\W+$/, '') 1340 }) 1341} 1342 1343lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer') 1344/*! 1345 * lunr.TokenSet 1346 * Copyright (C) 2019 Oliver Nightingale 1347 */ 1348 1349/** 1350 * A token set is used to store the unique list of all tokens 1351 * within an index. Token sets are also used to represent an 1352 * incoming query to the index, this query token set and index 1353 * token set are then intersected to find which tokens to look 1354 * up in the inverted index. 1355 * 1356 * A token set can hold multiple tokens, as in the case of the 1357 * index token set, or it can hold a single token as in the 1358 * case of a simple query token set. 1359 * 1360 * Additionally token sets are used to perform wildcard matching. 1361 * Leading, contained and trailing wildcards are supported, and 1362 * from this edit distance matching can also be provided. 1363 * 1364 * Token sets are implemented as a minimal finite state automata, 1365 * where both common prefixes and suffixes are shared between tokens. 1366 * This helps to reduce the space used for storing the token set. 1367 * 1368 * @constructor 1369 */ 1370lunr.TokenSet = function () { 1371 this.final = false 1372 this.edges = {} 1373 this.id = lunr.TokenSet._nextId 1374 lunr.TokenSet._nextId += 1 1375} 1376 1377/** 1378 * Keeps track of the next, auto increment, identifier to assign 1379 * to a new tokenSet. 1380 * 1381 * TokenSets require a unique identifier to be correctly minimised. 1382 * 1383 * @private 1384 */ 1385lunr.TokenSet._nextId = 1 1386 1387/** 1388 * Creates a TokenSet instance from the given sorted array of words. 1389 * 1390 * @param {String[]} arr - A sorted array of strings to create the set from. 1391 * @returns {lunr.TokenSet} 1392 * @throws Will throw an error if the input array is not sorted. 1393 */ 1394lunr.TokenSet.fromArray = function (arr) { 1395 var builder = new lunr.TokenSet.Builder 1396 1397 for (var i = 0, len = arr.length; i < len; i++) { 1398 builder.insert(arr[i]) 1399 } 1400 1401 builder.finish() 1402 return builder.root 1403} 1404 1405/** 1406 * Creates a token set from a query clause. 1407 * 1408 * @private 1409 * @param {Object} clause - A single clause from lunr.Query. 1410 * @param {string} clause.term - The query clause term. 1411 * @param {number} [clause.editDistance] - The optional edit distance for the term. 1412 * @returns {lunr.TokenSet} 1413 */ 1414lunr.TokenSet.fromClause = function (clause) { 1415 if ('editDistance' in clause) { 1416 return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance) 1417 } else { 1418 return lunr.TokenSet.fromString(clause.term) 1419 } 1420} 1421 1422/** 1423 * Creates a token set representing a single string with a specified 1424 * edit distance. 1425 * 1426 * Insertions, deletions, substitutions and transpositions are each 1427 * treated as an edit distance of 1. 1428 * 1429 * Increasing the allowed edit distance will have a dramatic impact 1430 * on the performance of both creating and intersecting these TokenSets. 1431 * It is advised to keep the edit distance less than 3. 1432 * 1433 * @param {string} str - The string to create the token set from. 1434 * @param {number} editDistance - The allowed edit distance to match. 1435 * @returns {lunr.Vector} 1436 */ 1437lunr.TokenSet.fromFuzzyString = function (str, editDistance) { 1438 var root = new lunr.TokenSet 1439 1440 var stack = [{ 1441 node: root, 1442 editsRemaining: editDistance, 1443 str: str 1444 }] 1445 1446 while (stack.length) { 1447 var frame = stack.pop() 1448 1449 // no edit 1450 if (frame.str.length > 0) { 1451 var char = frame.str.charAt(0), 1452 noEditNode 1453 1454 if (char in frame.node.edges) { 1455 noEditNode = frame.node.edges[char] 1456 } else { 1457 noEditNode = new lunr.TokenSet 1458 frame.node.edges[char] = noEditNode 1459 } 1460 1461 if (frame.str.length == 1) { 1462 noEditNode.final = true 1463 } 1464 1465 stack.push({ 1466 node: noEditNode, 1467 editsRemaining: frame.editsRemaining, 1468 str: frame.str.slice(1) 1469 }) 1470 } 1471 1472 if (frame.editsRemaining == 0) { 1473 continue 1474 } 1475 1476 // insertion 1477 if ("*" in frame.node.edges) { 1478 var insertionNode = frame.node.edges["*"] 1479 } else { 1480 var insertionNode = new lunr.TokenSet 1481 frame.node.edges["*"] = insertionNode 1482 } 1483 1484 if (frame.str.length == 0) { 1485 insertionNode.final = true 1486 } 1487 1488 stack.push({ 1489 node: insertionNode, 1490 editsRemaining: frame.editsRemaining - 1, 1491 str: frame.str 1492 }) 1493 1494 // deletion 1495 // can only do a deletion if we have enough edits remaining 1496 // and if there are characters left to delete in the string 1497 if (frame.str.length > 1) { 1498 stack.push({ 1499 node: frame.node, 1500 editsRemaining: frame.editsRemaining - 1, 1501 str: frame.str.slice(1) 1502 }) 1503 } 1504 1505 // deletion 1506 // just removing the last character from the str 1507 if (frame.str.length == 1) { 1508 frame.node.final = true 1509 } 1510 1511 // substitution 1512 // can only do a substitution if we have enough edits remaining 1513 // and if there are characters left to substitute 1514 if (frame.str.length >= 1) { 1515 if ("*" in frame.node.edges) { 1516 var substitutionNode = frame.node.edges["*"] 1517 } else { 1518 var substitutionNode = new lunr.TokenSet 1519 frame.node.edges["*"] = substitutionNode 1520 } 1521 1522 if (frame.str.length == 1) { 1523 substitutionNode.final = true 1524 } 1525 1526 stack.push({ 1527 node: substitutionNode, 1528 editsRemaining: frame.editsRemaining - 1, 1529 str: frame.str.slice(1) 1530 }) 1531 } 1532 1533 // transposition 1534 // can only do a transposition if there are edits remaining 1535 // and there are enough characters to transpose 1536 if (frame.str.length > 1) { 1537 var charA = frame.str.charAt(0), 1538 charB = frame.str.charAt(1), 1539 transposeNode 1540 1541 if (charB in frame.node.edges) { 1542 transposeNode = frame.node.edges[charB] 1543 } else { 1544 transposeNode = new lunr.TokenSet 1545 frame.node.edges[charB] = transposeNode 1546 } 1547 1548 if (frame.str.length == 1) { 1549 transposeNode.final = true 1550 } 1551 1552 stack.push({ 1553 node: transposeNode, 1554 editsRemaining: frame.editsRemaining - 1, 1555 str: charA + frame.str.slice(2) 1556 }) 1557 } 1558 } 1559 1560 return root 1561} 1562 1563/** 1564 * Creates a TokenSet from a string. 1565 * 1566 * The string may contain one or more wildcard characters (*) 1567 * that will allow wildcard matching when intersecting with 1568 * another TokenSet. 1569 * 1570 * @param {string} str - The string to create a TokenSet from. 1571 * @returns {lunr.TokenSet} 1572 */ 1573lunr.TokenSet.fromString = function (str) { 1574 var node = new lunr.TokenSet, 1575 root = node 1576 1577 /* 1578 * Iterates through all characters within the passed string 1579 * appending a node for each character. 1580 * 1581 * When a wildcard character is found then a self 1582 * referencing edge is introduced to continually match 1583 * any number of any characters. 1584 */ 1585 for (var i = 0, len = str.length; i < len; i++) { 1586 var char = str[i], 1587 final = (i == len - 1) 1588 1589 if (char == "*") { 1590 node.edges[char] = node 1591 node.final = final 1592 1593 } else { 1594 var next = new lunr.TokenSet 1595 next.final = final 1596 1597 node.edges[char] = next 1598 node = next 1599 } 1600 } 1601 1602 return root 1603} 1604 1605/** 1606 * Converts this TokenSet into an array of strings 1607 * contained within the TokenSet. 1608 * 1609 * This is not intended to be used on a TokenSet that 1610 * contains wildcards, in these cases the results are 1611 * undefined and are likely to cause an infinite loop. 1612 * 1613 * @returns {string[]} 1614 */ 1615lunr.TokenSet.prototype.toArray = function () { 1616 var words = [] 1617 1618 var stack = [{ 1619 prefix: "", 1620 node: this 1621 }] 1622 1623 while (stack.length) { 1624 var frame = stack.pop(), 1625 edges = Object.keys(frame.node.edges), 1626 len = edges.length 1627 1628 if (frame.node.final) { 1629 /* In Safari, at this point the prefix is sometimes corrupted, see: 1630 * https://github.com/olivernn/lunr.js/issues/279 Calling any 1631 * String.prototype method forces Safari to "cast" this string to what 1632 * it's supposed to be, fixing the bug. */ 1633 frame.prefix.charAt(0) 1634 words.push(frame.prefix) 1635 } 1636 1637 for (var i = 0; i < len; i++) { 1638 var edge = edges[i] 1639 1640 stack.push({ 1641 prefix: frame.prefix.concat(edge), 1642 node: frame.node.edges[edge] 1643 }) 1644 } 1645 } 1646 1647 return words 1648} 1649 1650/** 1651 * Generates a string representation of a TokenSet. 1652 * 1653 * This is intended to allow TokenSets to be used as keys 1654 * in objects, largely to aid the construction and minimisation 1655 * of a TokenSet. As such it is not designed to be a human 1656 * friendly representation of the TokenSet. 1657 * 1658 * @returns {string} 1659 */ 1660lunr.TokenSet.prototype.toString = function () { 1661 // NOTE: Using Object.keys here as this.edges is very likely 1662 // to enter 'hash-mode' with many keys being added 1663 // 1664 // avoiding a for-in loop here as it leads to the function 1665 // being de-optimised (at least in V8). From some simple 1666 // benchmarks the performance is comparable, but allowing 1667 // V8 to optimize may mean easy performance wins in the future. 1668 1669 if (this._str) { 1670 return this._str 1671 } 1672 1673 var str = this.final ? '1' : '0', 1674 labels = Object.keys(this.edges).sort(), 1675 len = labels.length 1676 1677 for (var i = 0; i < len; i++) { 1678 var label = labels[i], 1679 node = this.edges[label] 1680 1681 str = str + label + node.id 1682 } 1683 1684 return str 1685} 1686 1687/** 1688 * Returns a new TokenSet that is the intersection of 1689 * this TokenSet and the passed TokenSet. 1690 * 1691 * This intersection will take into account any wildcards 1692 * contained within the TokenSet. 1693 * 1694 * @param {lunr.TokenSet} b - An other TokenSet to intersect with. 1695 * @returns {lunr.TokenSet} 1696 */ 1697lunr.TokenSet.prototype.intersect = function (b) { 1698 var output = new lunr.TokenSet, 1699 frame = undefined 1700 1701 var stack = [{ 1702 qNode: b, 1703 output: output, 1704 node: this 1705 }] 1706 1707 while (stack.length) { 1708 frame = stack.pop() 1709 1710 // NOTE: As with the #toString method, we are using 1711 // Object.keys and a for loop instead of a for-in loop 1712 // as both of these objects enter 'hash' mode, causing 1713 // the function to be de-optimised in V8 1714 var qEdges = Object.keys(frame.qNode.edges), 1715 qLen = qEdges.length, 1716 nEdges = Object.keys(frame.node.edges), 1717 nLen = nEdges.length 1718 1719 for (var q = 0; q < qLen; q++) { 1720 var qEdge = qEdges[q] 1721 1722 for (var n = 0; n < nLen; n++) { 1723 var nEdge = nEdges[n] 1724 1725 if (nEdge == qEdge || qEdge == '*') { 1726 var node = frame.node.edges[nEdge], 1727 qNode = frame.qNode.edges[qEdge], 1728 final = node.final && qNode.final, 1729 next = undefined 1730 1731 if (nEdge in frame.output.edges) { 1732 // an edge already exists for this character 1733 // no need to create a new node, just set the finality 1734 // bit unless this node is already final 1735 next = frame.output.edges[nEdge] 1736 next.final = next.final || final 1737 1738 } else { 1739 // no edge exists yet, must create one 1740 // set the finality bit and insert it 1741 // into the output 1742 next = new lunr.TokenSet 1743 next.final = final 1744 frame.output.edges[nEdge] = next 1745 } 1746 1747 stack.push({ 1748 qNode: qNode, 1749 output: next, 1750 node: node 1751 }) 1752 } 1753 } 1754 } 1755 } 1756 1757 return output 1758} 1759lunr.TokenSet.Builder = function () { 1760 this.previousWord = "" 1761 this.root = new lunr.TokenSet 1762 this.uncheckedNodes = [] 1763 this.minimizedNodes = {} 1764} 1765 1766lunr.TokenSet.Builder.prototype.insert = function (word) { 1767 var node, 1768 commonPrefix = 0 1769 1770 if (word < this.previousWord) { 1771 throw new Error ("Out of order word insertion") 1772 } 1773 1774 for (var i = 0; i < word.length && i < this.previousWord.length; i++) { 1775 if (word[i] != this.previousWord[i]) break 1776 commonPrefix++ 1777 } 1778 1779 this.minimize(commonPrefix) 1780 1781 if (this.uncheckedNodes.length == 0) { 1782 node = this.root 1783 } else { 1784 node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child 1785 } 1786 1787 for (var i = commonPrefix; i < word.length; i++) { 1788 var nextNode = new lunr.TokenSet, 1789 char = word[i] 1790 1791 node.edges[char] = nextNode 1792 1793 this.uncheckedNodes.push({ 1794 parent: node, 1795 char: char, 1796 child: nextNode 1797 }) 1798 1799 node = nextNode 1800 } 1801 1802 node.final = true 1803 this.previousWord = word 1804} 1805 1806lunr.TokenSet.Builder.prototype.finish = function () { 1807 this.minimize(0) 1808} 1809 1810lunr.TokenSet.Builder.prototype.minimize = function (downTo) { 1811 for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) { 1812 var node = this.uncheckedNodes[i], 1813 childKey = node.child.toString() 1814 1815 if (childKey in this.minimizedNodes) { 1816 node.parent.edges[node.char] = this.minimizedNodes[childKey] 1817 } else { 1818 // Cache the key for this node since 1819 // we know it can't change anymore 1820 node.child._str = childKey 1821 1822 this.minimizedNodes[childKey] = node.child 1823 } 1824 1825 this.uncheckedNodes.pop() 1826 } 1827} 1828/*! 1829 * lunr.Index 1830 * Copyright (C) 2019 Oliver Nightingale 1831 */ 1832 1833/** 1834 * An index contains the built index of all documents and provides a query interface 1835 * to the index. 1836 * 1837 * Usually instances of lunr.Index will not be created using this constructor, instead 1838 * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be 1839 * used to load previously built and serialized indexes. 1840 * 1841 * @constructor 1842 * @param {Object} attrs - The attributes of the built search index. 1843 * @param {Object} attrs.invertedIndex - An index of term/field to document reference. 1844 * @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors 1845 * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens. 1846 * @param {string[]} attrs.fields - The names of indexed document fields. 1847 * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms. 1848 */ 1849lunr.Index = function (attrs) { 1850 this.invertedIndex = attrs.invertedIndex 1851 this.fieldVectors = attrs.fieldVectors 1852 this.tokenSet = attrs.tokenSet 1853 this.fields = attrs.fields 1854 this.pipeline = attrs.pipeline 1855} 1856 1857/** 1858 * A result contains details of a document matching a search query. 1859 * @typedef {Object} lunr.Index~Result 1860 * @property {string} ref - The reference of the document this result represents. 1861 * @property {number} score - A number between 0 and 1 representing how similar this document is to the query. 1862 * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match. 1863 */ 1864 1865/** 1866 * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple 1867 * query language which itself is parsed into an instance of lunr.Query. 1868 * 1869 * For programmatically building queries it is advised to directly use lunr.Query, the query language 1870 * is best used for human entered text rather than program generated text. 1871 * 1872 * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported 1873 * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello' 1874 * or 'world', though those that contain both will rank higher in the results. 1875 * 1876 * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can 1877 * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding 1878 * wildcards will increase the number of documents that will be found but can also have a negative 1879 * impact on query performance, especially with wildcards at the beginning of a term. 1880 * 1881 * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term 1882 * hello in the title field will match this query. Using a field not present in the index will lead 1883 * to an error being thrown. 1884 * 1885 * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term 1886 * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported 1887 * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2. 1888 * Avoid large values for edit distance to improve query performance. 1889 * 1890 * Each term also supports a presence modifier. By default a term's presence in document is optional, however 1891 * this can be changed to either required or prohibited. For a term's presence to be required in a document the 1892 * term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and 1893 * optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not 1894 * appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'. 1895 * 1896 * To escape special characters the backslash character '\' can be used, this allows searches to include 1897 * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead 1898 * of attempting to apply a boost of 2 to the search term "foo". 1899 * 1900 * @typedef {string} lunr.Index~QueryString 1901 * @example <caption>Simple single term query</caption> 1902 * hello 1903 * @example <caption>Multiple term query</caption> 1904 * hello world 1905 * @example <caption>term scoped to a field</caption> 1906 * title:hello 1907 * @example <caption>term with a boost of 10</caption> 1908 * hello^10 1909 * @example <caption>term with an edit distance of 2</caption> 1910 * hello~2 1911 * @example <caption>terms with presence modifiers</caption> 1912 * -foo +bar baz 1913 */ 1914 1915/** 1916 * Performs a search against the index using lunr query syntax. 1917 * 1918 * Results will be returned sorted by their score, the most relevant results 1919 * will be returned first. For details on how the score is calculated, please see 1920 * the {@link https://lunrjs.com/guides/searching.html#scoring|guide}. 1921 * 1922 * For more programmatic querying use lunr.Index#query. 1923 * 1924 * @param {lunr.Index~QueryString} queryString - A string containing a lunr query. 1925 * @throws {lunr.QueryParseError} If the passed query string cannot be parsed. 1926 * @returns {lunr.Index~Result[]} 1927 */ 1928lunr.Index.prototype.search = function (queryString) { 1929 return this.query(function (query) { 1930 var parser = new lunr.QueryParser(queryString, query) 1931 parser.parse() 1932 }) 1933} 1934 1935/** 1936 * A query builder callback provides a query object to be used to express 1937 * the query to perform on the index. 1938 * 1939 * @callback lunr.Index~queryBuilder 1940 * @param {lunr.Query} query - The query object to build up. 1941 * @this lunr.Query 1942 */ 1943 1944/** 1945 * Performs a query against the index using the yielded lunr.Query object. 1946 * 1947 * If performing programmatic queries against the index, this method is preferred 1948 * over lunr.Index#search so as to avoid the additional query parsing overhead. 1949 * 1950 * A query object is yielded to the supplied function which should be used to 1951 * express the query to be run against the index. 1952 * 1953 * Note that although this function takes a callback parameter it is _not_ an 1954 * asynchronous operation, the callback is just yielded a query object to be 1955 * customized. 1956 * 1957 * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query. 1958 * @returns {lunr.Index~Result[]} 1959 */ 1960lunr.Index.prototype.query = function (fn) { 1961 // for each query clause 1962 // * process terms 1963 // * expand terms from token set 1964 // * find matching documents and metadata 1965 // * get document vectors 1966 // * score documents 1967 1968 var query = new lunr.Query(this.fields), 1969 matchingFields = Object.create(null), 1970 queryVectors = Object.create(null), 1971 termFieldCache = Object.create(null), 1972 requiredMatches = Object.create(null), 1973 prohibitedMatches = Object.create(null) 1974 1975 /* 1976 * To support field level boosts a query vector is created per 1977 * field. An empty vector is eagerly created to support negated 1978 * queries. 1979 */ 1980 for (var i = 0; i < this.fields.length; i++) { 1981 queryVectors[this.fields[i]] = new lunr.Vector 1982 } 1983 1984 fn.call(query, query) 1985 1986 for (var i = 0; i < query.clauses.length; i++) { 1987 /* 1988 * Unless the pipeline has been disabled for this term, which is 1989 * the case for terms with wildcards, we need to pass the clause 1990 * term through the search pipeline. A pipeline returns an array 1991 * of processed terms. Pipeline functions may expand the passed 1992 * term, which means we may end up performing multiple index lookups 1993 * for a single query term. 1994 */ 1995 var clause = query.clauses[i], 1996 terms = null, 1997 clauseMatches = lunr.Set.complete 1998 1999 if (clause.usePipeline) { 2000 terms = this.pipeline.runString(clause.term, { 2001 fields: clause.fields 2002 }) 2003 } else { 2004 terms = [clause.term] 2005 } 2006 2007 for (var m = 0; m < terms.length; m++) { 2008 var term = terms[m] 2009 2010 /* 2011 * Each term returned from the pipeline needs to use the same query 2012 * clause object, e.g. the same boost and or edit distance. The 2013 * simplest way to do this is to re-use the clause object but mutate 2014 * its term property. 2015 */ 2016 clause.term = term 2017 2018 /* 2019 * From the term in the clause we create a token set which will then 2020 * be used to intersect the indexes token set to get a list of terms 2021 * to lookup in the inverted index 2022 */ 2023 var termTokenSet = lunr.TokenSet.fromClause(clause), 2024 expandedTerms = this.tokenSet.intersect(termTokenSet).toArray() 2025 2026 /* 2027 * If a term marked as required does not exist in the tokenSet it is 2028 * impossible for the search to return any matches. We set all the field 2029 * scoped required matches set to empty and stop examining any further 2030 * clauses. 2031 */ 2032 if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) { 2033 for (var k = 0; k < clause.fields.length; k++) { 2034 var field = clause.fields[k] 2035 requiredMatches[field] = lunr.Set.empty 2036 } 2037 2038 break 2039 } 2040 2041 for (var j = 0; j < expandedTerms.length; j++) { 2042 /* 2043 * For each term get the posting and termIndex, this is required for 2044 * building the query vector. 2045 */ 2046 var expandedTerm = expandedTerms[j], 2047 posting = this.invertedIndex[expandedTerm], 2048 termIndex = posting._index 2049 2050 for (var k = 0; k < clause.fields.length; k++) { 2051 /* 2052 * For each field that this query term is scoped by (by default 2053 * all fields are in scope) we need to get all the document refs 2054 * that have this term in that field. 2055 * 2056 * The posting is the entry in the invertedIndex for the matching 2057 * term from above. 2058 */ 2059 var field = clause.fields[k], 2060 fieldPosting = posting[field], 2061 matchingDocumentRefs = Object.keys(fieldPosting), 2062 termField = expandedTerm + "/" + field, 2063 matchingDocumentsSet = new lunr.Set(matchingDocumentRefs) 2064 2065 /* 2066 * if the presence of this term is required ensure that the matching 2067 * documents are added to the set of required matches for this clause. 2068 * 2069 */ 2070 if (clause.presence == lunr.Query.presence.REQUIRED) { 2071 clauseMatches = clauseMatches.union(matchingDocumentsSet) 2072 2073 if (requiredMatches[field] === undefined) { 2074 requiredMatches[field] = lunr.Set.complete 2075 } 2076 } 2077 2078 /* 2079 * if the presence of this term is prohibited ensure that the matching 2080 * documents are added to the set of prohibited matches for this field, 2081 * creating that set if it does not yet exist. 2082 */ 2083 if (clause.presence == lunr.Query.presence.PROHIBITED) { 2084 if (prohibitedMatches[field] === undefined) { 2085 prohibitedMatches[field] = lunr.Set.empty 2086 } 2087 2088 prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet) 2089 2090 /* 2091 * Prohibited matches should not be part of the query vector used for 2092 * similarity scoring and no metadata should be extracted so we continue 2093 * to the next field 2094 */ 2095 continue 2096 } 2097 2098 /* 2099 * The query field vector is populated using the termIndex found for 2100 * the term and a unit value with the appropriate boost applied. 2101 * Using upsert because there could already be an entry in the vector 2102 * for the term we are working with. In that case we just add the scores 2103 * together. 2104 */ 2105 queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b }) 2106 2107 /** 2108 * If we've already seen this term, field combo then we've already collected 2109 * the matching documents and metadata, no need to go through all that again 2110 */ 2111 if (termFieldCache[termField]) { 2112 continue 2113 } 2114 2115 for (var l = 0; l < matchingDocumentRefs.length; l++) { 2116 /* 2117 * All metadata for this term/field/document triple 2118 * are then extracted and collected into an instance 2119 * of lunr.MatchData ready to be returned in the query 2120 * results 2121 */ 2122 var matchingDocumentRef = matchingDocumentRefs[l], 2123 matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field), 2124 metadata = fieldPosting[matchingDocumentRef], 2125 fieldMatch 2126 2127 if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) { 2128 matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata) 2129 } else { 2130 fieldMatch.add(expandedTerm, field, metadata) 2131 } 2132 2133 } 2134 2135 termFieldCache[termField] = true 2136 } 2137 } 2138 } 2139 2140 /** 2141 * If the presence was required we need to update the requiredMatches field sets. 2142 * We do this after all fields for the term have collected their matches because 2143 * the clause terms presence is required in _any_ of the fields not _all_ of the 2144 * fields. 2145 */ 2146 if (clause.presence === lunr.Query.presence.REQUIRED) { 2147 for (var k = 0; k < clause.fields.length; k++) { 2148 var field = clause.fields[k] 2149 requiredMatches[field] = requiredMatches[field].intersect(clauseMatches) 2150 } 2151 } 2152 } 2153 2154 /** 2155 * Need to combine the field scoped required and prohibited 2156 * matching documents into a global set of required and prohibited 2157 * matches 2158 */ 2159 var allRequiredMatches = lunr.Set.complete, 2160 allProhibitedMatches = lunr.Set.empty 2161 2162 for (var i = 0; i < this.fields.length; i++) { 2163 var field = this.fields[i] 2164 2165 if (requiredMatches[field]) { 2166 allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field]) 2167 } 2168 2169 if (prohibitedMatches[field]) { 2170 allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field]) 2171 } 2172 } 2173 2174 var matchingFieldRefs = Object.keys(matchingFields), 2175 results = [], 2176 matches = Object.create(null) 2177 2178 /* 2179 * If the query is negated (contains only prohibited terms) 2180 * we need to get _all_ fieldRefs currently existing in the 2181 * index. This is only done when we know that the query is 2182 * entirely prohibited terms to avoid any cost of getting all 2183 * fieldRefs unnecessarily. 2184 * 2185 * Additionally, blank MatchData must be created to correctly 2186 * populate the results. 2187 */ 2188 if (query.isNegated()) { 2189 matchingFieldRefs = Object.keys(this.fieldVectors) 2190 2191 for (var i = 0; i < matchingFieldRefs.length; i++) { 2192 var matchingFieldRef = matchingFieldRefs[i] 2193 var fieldRef = lunr.FieldRef.fromString(matchingFieldRef) 2194 matchingFields[matchingFieldRef] = new lunr.MatchData 2195 } 2196 } 2197 2198 for (var i = 0; i < matchingFieldRefs.length; i++) { 2199 /* 2200 * Currently we have document fields that match the query, but we 2201 * need to return documents. The matchData and scores are combined 2202 * from multiple fields belonging to the same document. 2203 * 2204 * Scores are calculated by field, using the query vectors created 2205 * above, and combined into a final document score using addition. 2206 */ 2207 var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]), 2208 docRef = fieldRef.docRef 2209 2210 if (!allRequiredMatches.contains(docRef)) { 2211 continue 2212 } 2213 2214 if (allProhibitedMatches.contains(docRef)) { 2215 continue 2216 } 2217 2218 var fieldVector = this.fieldVectors[fieldRef], 2219 score = queryVectors[fieldRef.fieldName].similarity(fieldVector), 2220 docMatch 2221 2222 if ((docMatch = matches[docRef]) !== undefined) { 2223 docMatch.score += score 2224 docMatch.matchData.combine(matchingFields[fieldRef]) 2225 } else { 2226 var match = { 2227 ref: docRef, 2228 score: score, 2229 matchData: matchingFields[fieldRef] 2230 } 2231 matches[docRef] = match 2232 results.push(match) 2233 } 2234 } 2235 2236 /* 2237 * Sort the results objects by score, highest first. 2238 */ 2239 return results.sort(function (a, b) { 2240 return b.score - a.score 2241 }) 2242} 2243 2244/** 2245 * Prepares the index for JSON serialization. 2246 * 2247 * The schema for this JSON blob will be described in a 2248 * separate JSON schema file. 2249 * 2250 * @returns {Object} 2251 */ 2252lunr.Index.prototype.toJSON = function () { 2253 var invertedIndex = Object.keys(this.invertedIndex) 2254 .sort() 2255 .map(function (term) { 2256 return [term, this.invertedIndex[term]] 2257 }, this) 2258 2259 var fieldVectors = Object.keys(this.fieldVectors) 2260 .map(function (ref) { 2261 return [ref, this.fieldVectors[ref].toJSON()] 2262 }, this) 2263 2264 return { 2265 version: lunr.version, 2266 fields: this.fields, 2267 fieldVectors: fieldVectors, 2268 invertedIndex: invertedIndex, 2269 pipeline: this.pipeline.toJSON() 2270 } 2271} 2272 2273/** 2274 * Loads a previously serialized lunr.Index 2275 * 2276 * @param {Object} serializedIndex - A previously serialized lunr.Index 2277 * @returns {lunr.Index} 2278 */ 2279lunr.Index.load = function (serializedIndex) { 2280 var attrs = {}, 2281 fieldVectors = {}, 2282 serializedVectors = serializedIndex.fieldVectors, 2283 invertedIndex = Object.create(null), 2284 serializedInvertedIndex = serializedIndex.invertedIndex, 2285 tokenSetBuilder = new lunr.TokenSet.Builder, 2286 pipeline = lunr.Pipeline.load(serializedIndex.pipeline) 2287 2288 if (serializedIndex.version != lunr.version) { 2289 lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'") 2290 } 2291 2292 for (var i = 0; i < serializedVectors.length; i++) { 2293 var tuple = serializedVectors[i], 2294 ref = tuple[0], 2295 elements = tuple[1] 2296 2297 fieldVectors[ref] = new lunr.Vector(elements) 2298 } 2299 2300 for (var i = 0; i < serializedInvertedIndex.length; i++) { 2301 var tuple = serializedInvertedIndex[i], 2302 term = tuple[0], 2303 posting = tuple[1] 2304 2305 tokenSetBuilder.insert(term) 2306 invertedIndex[term] = posting 2307 } 2308 2309 tokenSetBuilder.finish() 2310 2311 attrs.fields = serializedIndex.fields 2312 2313 attrs.fieldVectors = fieldVectors 2314 attrs.invertedIndex = invertedIndex 2315 attrs.tokenSet = tokenSetBuilder.root 2316 attrs.pipeline = pipeline 2317 2318 return new lunr.Index(attrs) 2319} 2320/*! 2321 * lunr.Builder 2322 * Copyright (C) 2019 Oliver Nightingale 2323 */ 2324 2325/** 2326 * lunr.Builder performs indexing on a set of documents and 2327 * returns instances of lunr.Index ready for querying. 2328 * 2329 * All configuration of the index is done via the builder, the 2330 * fields to index, the document reference, the text processing 2331 * pipeline and document scoring parameters are all set on the 2332 * builder before indexing. 2333 * 2334 * @constructor 2335 * @property {string} _ref - Internal reference to the document reference field. 2336 * @property {string[]} _fields - Internal reference to the document fields to index. 2337 * @property {object} invertedIndex - The inverted index maps terms to document fields. 2338 * @property {object} documentTermFrequencies - Keeps track of document term frequencies. 2339 * @property {object} documentLengths - Keeps track of the length of documents added to the index. 2340 * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing. 2341 * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing. 2342 * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index. 2343 * @property {number} documentCount - Keeps track of the total number of documents indexed. 2344 * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75. 2345 * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2. 2346 * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space. 2347 * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index. 2348 */ 2349lunr.Builder = function () { 2350 this._ref = "id" 2351 this._fields = Object.create(null) 2352 this._documents = Object.create(null) 2353 this.invertedIndex = Object.create(null) 2354 this.fieldTermFrequencies = {} 2355 this.fieldLengths = {} 2356 this.tokenizer = lunr.tokenizer 2357 this.pipeline = new lunr.Pipeline 2358 this.searchPipeline = new lunr.Pipeline 2359 this.documentCount = 0 2360 this._b = 0.75 2361 this._k1 = 1.2 2362 this.termIndex = 0 2363 this.metadataWhitelist = [] 2364} 2365 2366/** 2367 * Sets the document field used as the document reference. Every document must have this field. 2368 * The type of this field in the document should be a string, if it is not a string it will be 2369 * coerced into a string by calling toString. 2370 * 2371 * The default ref is 'id'. 2372 * 2373 * The ref should _not_ be changed during indexing, it should be set before any documents are 2374 * added to the index. Changing it during indexing can lead to inconsistent results. 2375 * 2376 * @param {string} ref - The name of the reference field in the document. 2377 */ 2378lunr.Builder.prototype.ref = function (ref) { 2379 this._ref = ref 2380} 2381 2382/** 2383 * A function that is used to extract a field from a document. 2384 * 2385 * Lunr expects a field to be at the top level of a document, if however the field 2386 * is deeply nested within a document an extractor function can be used to extract 2387 * the right field for indexing. 2388 * 2389 * @callback fieldExtractor 2390 * @param {object} doc - The document being added to the index. 2391 * @returns {?(string|object|object[])} obj - The object that will be indexed for this field. 2392 * @example <caption>Extracting a nested field</caption> 2393 * function (doc) { return doc.nested.field } 2394 */ 2395 2396/** 2397 * Adds a field to the list of document fields that will be indexed. Every document being 2398 * indexed should have this field. Null values for this field in indexed documents will 2399 * not cause errors but will limit the chance of that document being retrieved by searches. 2400 * 2401 * All fields should be added before adding documents to the index. Adding fields after 2402 * a document has been indexed will have no effect on already indexed documents. 2403 * 2404 * Fields can be boosted at build time. This allows terms within that field to have more 2405 * importance when ranking search results. Use a field boost to specify that matches within 2406 * one field are more important than other fields. 2407 * 2408 * @param {string} fieldName - The name of a field to index in all documents. 2409 * @param {object} attributes - Optional attributes associated with this field. 2410 * @param {number} [attributes.boost=1] - Boost applied to all terms within this field. 2411 * @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document. 2412 * @throws {RangeError} fieldName cannot contain unsupported characters '/' 2413 */ 2414lunr.Builder.prototype.field = function (fieldName, attributes) { 2415 if (/\//.test(fieldName)) { 2416 throw new RangeError ("Field '" + fieldName + "' contains illegal character '/'") 2417 } 2418 2419 this._fields[fieldName] = attributes || {} 2420} 2421 2422/** 2423 * A parameter to tune the amount of field length normalisation that is applied when 2424 * calculating relevance scores. A value of 0 will completely disable any normalisation 2425 * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b 2426 * will be clamped to the range 0 - 1. 2427 * 2428 * @param {number} number - The value to set for this tuning parameter. 2429 */ 2430lunr.Builder.prototype.b = function (number) { 2431 if (number < 0) { 2432 this._b = 0 2433 } else if (number > 1) { 2434 this._b = 1 2435 } else { 2436 this._b = number 2437 } 2438} 2439 2440/** 2441 * A parameter that controls the speed at which a rise in term frequency results in term 2442 * frequency saturation. The default value is 1.2. Setting this to a higher value will give 2443 * slower saturation levels, a lower value will result in quicker saturation. 2444 * 2445 * @param {number} number - The value to set for this tuning parameter. 2446 */ 2447lunr.Builder.prototype.k1 = function (number) { 2448 this._k1 = number 2449} 2450 2451/** 2452 * Adds a document to the index. 2453 * 2454 * Before adding fields to the index the index should have been fully setup, with the document 2455 * ref and all fields to index already having been specified. 2456 * 2457 * The document must have a field name as specified by the ref (by default this is 'id') and 2458 * it should have all fields defined for indexing, though null or undefined values will not 2459 * cause errors. 2460 * 2461 * Entire documents can be boosted at build time. Applying a boost to a document indicates that 2462 * this document should rank higher in search results than other documents. 2463 * 2464 * @param {object} doc - The document to add to the index. 2465 * @param {object} attributes - Optional attributes associated with this document. 2466 * @param {number} [attributes.boost=1] - Boost applied to all terms within this document. 2467 */ 2468lunr.Builder.prototype.add = function (doc, attributes) { 2469 var docRef = doc[this._ref], 2470 fields = Object.keys(this._fields) 2471 2472 this._documents[docRef] = attributes || {} 2473 this.documentCount += 1 2474 2475 for (var i = 0; i < fields.length; i++) { 2476 var fieldName = fields[i], 2477 extractor = this._fields[fieldName].extractor, 2478 field = extractor ? extractor(doc) : doc[fieldName], 2479 tokens = this.tokenizer(field, { 2480 fields: [fieldName] 2481 }), 2482 terms = this.pipeline.run(tokens), 2483 fieldRef = new lunr.FieldRef (docRef, fieldName), 2484 fieldTerms = Object.create(null) 2485 2486 this.fieldTermFrequencies[fieldRef] = fieldTerms 2487 this.fieldLengths[fieldRef] = 0 2488 2489 // store the length of this field for this document 2490 this.fieldLengths[fieldRef] += terms.length 2491 2492 // calculate term frequencies for this field 2493 for (var j = 0; j < terms.length; j++) { 2494 var term = terms[j] 2495 2496 if (fieldTerms[term] == undefined) { 2497 fieldTerms[term] = 0 2498 } 2499 2500 fieldTerms[term] += 1 2501 2502 // add to inverted index 2503 // create an initial posting if one doesn't exist 2504 if (this.invertedIndex[term] == undefined) { 2505 var posting = Object.create(null) 2506 posting["_index"] = this.termIndex 2507 this.termIndex += 1 2508 2509 for (var k = 0; k < fields.length; k++) { 2510 posting[fields[k]] = Object.create(null) 2511 } 2512 2513 this.invertedIndex[term] = posting 2514 } 2515 2516 // add an entry for this term/fieldName/docRef to the invertedIndex 2517 if (this.invertedIndex[term][fieldName][docRef] == undefined) { 2518 this.invertedIndex[term][fieldName][docRef] = Object.create(null) 2519 } 2520 2521 // store all whitelisted metadata about this token in the 2522 // inverted index 2523 for (var l = 0; l < this.metadataWhitelist.length; l++) { 2524 var metadataKey = this.metadataWhitelist[l], 2525 metadata = term.metadata[metadataKey] 2526 2527 if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) { 2528 this.invertedIndex[term][fieldName][docRef][metadataKey] = [] 2529 } 2530 2531 this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata) 2532 } 2533 } 2534 2535 } 2536} 2537 2538/** 2539 * Calculates the average document length for this index 2540 * 2541 * @private 2542 */ 2543lunr.Builder.prototype.calculateAverageFieldLengths = function () { 2544 2545 var fieldRefs = Object.keys(this.fieldLengths), 2546 numberOfFields = fieldRefs.length, 2547 accumulator = {}, 2548 documentsWithField = {} 2549 2550 for (var i = 0; i < numberOfFields; i++) { 2551 var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), 2552 field = fieldRef.fieldName 2553 2554 documentsWithField[field] || (documentsWithField[field] = 0) 2555 documentsWithField[field] += 1 2556 2557 accumulator[field] || (accumulator[field] = 0) 2558 accumulator[field] += this.fieldLengths[fieldRef] 2559 } 2560 2561 var fields = Object.keys(this._fields) 2562 2563 for (var i = 0; i < fields.length; i++) { 2564 var fieldName = fields[i] 2565 accumulator[fieldName] = accumulator[fieldName] / documentsWithField[fieldName] 2566 } 2567 2568 this.averageFieldLength = accumulator 2569} 2570 2571/** 2572 * Builds a vector space model of every document using lunr.Vector 2573 * 2574 * @private 2575 */ 2576lunr.Builder.prototype.createFieldVectors = function () { 2577 var fieldVectors = {}, 2578 fieldRefs = Object.keys(this.fieldTermFrequencies), 2579 fieldRefsLength = fieldRefs.length, 2580 termIdfCache = Object.create(null) 2581 2582 for (var i = 0; i < fieldRefsLength; i++) { 2583 var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), 2584 fieldName = fieldRef.fieldName, 2585 fieldLength = this.fieldLengths[fieldRef], 2586 fieldVector = new lunr.Vector, 2587 termFrequencies = this.fieldTermFrequencies[fieldRef], 2588 terms = Object.keys(termFrequencies), 2589 termsLength = terms.length 2590 2591 2592 var fieldBoost = this._fields[fieldName].boost || 1, 2593 docBoost = this._documents[fieldRef.docRef].boost || 1 2594 2595 for (var j = 0; j < termsLength; j++) { 2596 var term = terms[j], 2597 tf = termFrequencies[term], 2598 termIndex = this.invertedIndex[term]._index, 2599 idf, score, scoreWithPrecision 2600 2601 if (termIdfCache[term] === undefined) { 2602 idf = lunr.idf(this.invertedIndex[term], this.documentCount) 2603 termIdfCache[term] = idf 2604 } else { 2605 idf = termIdfCache[term] 2606 } 2607 2608 score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[fieldName])) + tf) 2609 score *= fieldBoost 2610 score *= docBoost 2611 scoreWithPrecision = Math.round(score * 1000) / 1000 2612 // Converts 1.23456789 to 1.234. 2613 // Reducing the precision so that the vectors take up less 2614 // space when serialised. Doing it now so that they behave 2615 // the same before and after serialisation. Also, this is 2616 // the fastest approach to reducing a number's precision in 2617 // JavaScript. 2618 2619 fieldVector.insert(termIndex, scoreWithPrecision) 2620 } 2621 2622 fieldVectors[fieldRef] = fieldVector 2623 } 2624 2625 this.fieldVectors = fieldVectors 2626} 2627 2628/** 2629 * Creates a token set of all tokens in the index using lunr.TokenSet 2630 * 2631 * @private 2632 */ 2633lunr.Builder.prototype.createTokenSet = function () { 2634 this.tokenSet = lunr.TokenSet.fromArray( 2635 Object.keys(this.invertedIndex).sort() 2636 ) 2637} 2638 2639/** 2640 * Builds the index, creating an instance of lunr.Index. 2641 * 2642 * This completes the indexing process and should only be called 2643 * once all documents have been added to the index. 2644 * 2645 * @returns {lunr.Index} 2646 */ 2647lunr.Builder.prototype.build = function () { 2648 this.calculateAverageFieldLengths() 2649 this.createFieldVectors() 2650 this.createTokenSet() 2651 2652 return new lunr.Index({ 2653 invertedIndex: this.invertedIndex, 2654 fieldVectors: this.fieldVectors, 2655 tokenSet: this.tokenSet, 2656 fields: Object.keys(this._fields), 2657 pipeline: this.searchPipeline 2658 }) 2659} 2660 2661/** 2662 * Applies a plugin to the index builder. 2663 * 2664 * A plugin is a function that is called with the index builder as its context. 2665 * Plugins can be used to customise or extend the behaviour of the index 2666 * in some way. A plugin is just a function, that encapsulated the custom 2667 * behaviour that should be applied when building the index. 2668 * 2669 * The plugin function will be called with the index builder as its argument, additional 2670 * arguments can also be passed when calling use. The function will be called 2671 * with the index builder as its context. 2672 * 2673 * @param {Function} plugin The plugin to apply. 2674 */ 2675lunr.Builder.prototype.use = function (fn) { 2676 var args = Array.prototype.slice.call(arguments, 1) 2677 args.unshift(this) 2678 fn.apply(this, args) 2679} 2680/** 2681 * Contains and collects metadata about a matching document. 2682 * A single instance of lunr.MatchData is returned as part of every 2683 * lunr.Index~Result. 2684 * 2685 * @constructor 2686 * @param {string} term - The term this match data is associated with 2687 * @param {string} field - The field in which the term was found 2688 * @param {object} metadata - The metadata recorded about this term in this field 2689 * @property {object} metadata - A cloned collection of metadata associated with this document. 2690 * @see {@link lunr.Index~Result} 2691 */ 2692lunr.MatchData = function (term, field, metadata) { 2693 var clonedMetadata = Object.create(null), 2694 metadataKeys = Object.keys(metadata || {}) 2695 2696 // Cloning the metadata to prevent the original 2697 // being mutated during match data combination. 2698 // Metadata is kept in an array within the inverted 2699 // index so cloning the data can be done with 2700 // Array#slice 2701 for (var i = 0; i < metadataKeys.length; i++) { 2702 var key = metadataKeys[i] 2703 clonedMetadata[key] = metadata[key].slice() 2704 } 2705 2706 this.metadata = Object.create(null) 2707 2708 if (term !== undefined) { 2709 this.metadata[term] = Object.create(null) 2710 this.metadata[term][field] = clonedMetadata 2711 } 2712} 2713 2714/** 2715 * An instance of lunr.MatchData will be created for every term that matches a 2716 * document. However only one instance is required in a lunr.Index~Result. This 2717 * method combines metadata from another instance of lunr.MatchData with this 2718 * objects metadata. 2719 * 2720 * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one. 2721 * @see {@link lunr.Index~Result} 2722 */ 2723lunr.MatchData.prototype.combine = function (otherMatchData) { 2724 var terms = Object.keys(otherMatchData.metadata) 2725 2726 for (var i = 0; i < terms.length; i++) { 2727 var term = terms[i], 2728 fields = Object.keys(otherMatchData.metadata[term]) 2729 2730 if (this.metadata[term] == undefined) { 2731 this.metadata[term] = Object.create(null) 2732 } 2733 2734 for (var j = 0; j < fields.length; j++) { 2735 var field = fields[j], 2736 keys = Object.keys(otherMatchData.metadata[term][field]) 2737 2738 if (this.metadata[term][field] == undefined) { 2739 this.metadata[term][field] = Object.create(null) 2740 } 2741 2742 for (var k = 0; k < keys.length; k++) { 2743 var key = keys[k] 2744 2745 if (this.metadata[term][field][key] == undefined) { 2746 this.metadata[term][field][key] = otherMatchData.metadata[term][field][key] 2747 } else { 2748 this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key]) 2749 } 2750 2751 } 2752 } 2753 } 2754} 2755 2756/** 2757 * Add metadata for a term/field pair to this instance of match data. 2758 * 2759 * @param {string} term - The term this match data is associated with 2760 * @param {string} field - The field in which the term was found 2761 * @param {object} metadata - The metadata recorded about this term in this field 2762 */ 2763lunr.MatchData.prototype.add = function (term, field, metadata) { 2764 if (!(term in this.metadata)) { 2765 this.metadata[term] = Object.create(null) 2766 this.metadata[term][field] = metadata 2767 return 2768 } 2769 2770 if (!(field in this.metadata[term])) { 2771 this.metadata[term][field] = metadata 2772 return 2773 } 2774 2775 var metadataKeys = Object.keys(metadata) 2776 2777 for (var i = 0; i < metadataKeys.length; i++) { 2778 var key = metadataKeys[i] 2779 2780 if (key in this.metadata[term][field]) { 2781 this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key]) 2782 } else { 2783 this.metadata[term][field][key] = metadata[key] 2784 } 2785 } 2786} 2787/** 2788 * A lunr.Query provides a programmatic way of defining queries to be performed 2789 * against a {@link lunr.Index}. 2790 * 2791 * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method 2792 * so the query object is pre-initialized with the right index fields. 2793 * 2794 * @constructor 2795 * @property {lunr.Query~Clause[]} clauses - An array of query clauses. 2796 * @property {string[]} allFields - An array of all available fields in a lunr.Index. 2797 */ 2798lunr.Query = function (allFields) { 2799 this.clauses = [] 2800 this.allFields = allFields 2801} 2802 2803/** 2804 * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause. 2805 * 2806 * This allows wildcards to be added to the beginning and end of a term without having to manually do any string 2807 * concatenation. 2808 * 2809 * The wildcard constants can be bitwise combined to select both leading and trailing wildcards. 2810 * 2811 * @constant 2812 * @default 2813 * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour 2814 * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists 2815 * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists 2816 * @see lunr.Query~Clause 2817 * @see lunr.Query#clause 2818 * @see lunr.Query#term 2819 * @example <caption>query term with trailing wildcard</caption> 2820 * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING }) 2821 * @example <caption>query term with leading and trailing wildcard</caption> 2822 * query.term('foo', { 2823 * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING 2824 * }) 2825 */ 2826 2827lunr.Query.wildcard = new String ("*") 2828lunr.Query.wildcard.NONE = 0 2829lunr.Query.wildcard.LEADING = 1 2830lunr.Query.wildcard.TRAILING = 2 2831 2832/** 2833 * Constants for indicating what kind of presence a term must have in matching documents. 2834 * 2835 * @constant 2836 * @enum {number} 2837 * @see lunr.Query~Clause 2838 * @see lunr.Query#clause 2839 * @see lunr.Query#term 2840 * @example <caption>query term with required presence</caption> 2841 * query.term('foo', { presence: lunr.Query.presence.REQUIRED }) 2842 */ 2843lunr.Query.presence = { 2844 /** 2845 * Term's presence in a document is optional, this is the default value. 2846 */ 2847 OPTIONAL: 1, 2848 2849 /** 2850 * Term's presence in a document is required, documents that do not contain 2851 * this term will not be returned. 2852 */ 2853 REQUIRED: 2, 2854 2855 /** 2856 * Term's presence in a document is prohibited, documents that do contain 2857 * this term will not be returned. 2858 */ 2859 PROHIBITED: 3 2860} 2861 2862/** 2863 * A single clause in a {@link lunr.Query} contains a term and details on how to 2864 * match that term against a {@link lunr.Index}. 2865 * 2866 * @typedef {Object} lunr.Query~Clause 2867 * @property {string[]} fields - The fields in an index this clause should be matched against. 2868 * @property {number} [boost=1] - Any boost that should be applied when matching this clause. 2869 * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be. 2870 * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline. 2871 * @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended. 2872 * @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents. 2873 */ 2874 2875/** 2876 * Adds a {@link lunr.Query~Clause} to this query. 2877 * 2878 * Unless the clause contains the fields to be matched all fields will be matched. In addition 2879 * a default boost of 1 is applied to the clause. 2880 * 2881 * @param {lunr.Query~Clause} clause - The clause to add to this query. 2882 * @see lunr.Query~Clause 2883 * @returns {lunr.Query} 2884 */ 2885lunr.Query.prototype.clause = function (clause) { 2886 if (!('fields' in clause)) { 2887 clause.fields = this.allFields 2888 } 2889 2890 if (!('boost' in clause)) { 2891 clause.boost = 1 2892 } 2893 2894 if (!('usePipeline' in clause)) { 2895 clause.usePipeline = true 2896 } 2897 2898 if (!('wildcard' in clause)) { 2899 clause.wildcard = lunr.Query.wildcard.NONE 2900 } 2901 2902 if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) { 2903 clause.term = "*" + clause.term 2904 } 2905 2906 if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) { 2907 clause.term = "" + clause.term + "*" 2908 } 2909 2910 if (!('presence' in clause)) { 2911 clause.presence = lunr.Query.presence.OPTIONAL 2912 } 2913 2914 this.clauses.push(clause) 2915 2916 return this 2917} 2918 2919/** 2920 * A negated query is one in which every clause has a presence of 2921 * prohibited. These queries require some special processing to return 2922 * the expected results. 2923 * 2924 * @returns boolean 2925 */ 2926lunr.Query.prototype.isNegated = function () { 2927 for (var i = 0; i < this.clauses.length; i++) { 2928 if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) { 2929 return false 2930 } 2931 } 2932 2933 return true 2934} 2935 2936/** 2937 * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause} 2938 * to the list of clauses that make up this query. 2939 * 2940 * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion 2941 * to a token or token-like string should be done before calling this method. 2942 * 2943 * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an 2944 * array, each term in the array will share the same options. 2945 * 2946 * @param {object|object[]} term - The term(s) to add to the query. 2947 * @param {object} [options] - Any additional properties to add to the query clause. 2948 * @returns {lunr.Query} 2949 * @see lunr.Query#clause 2950 * @see lunr.Query~Clause 2951 * @example <caption>adding a single term to a query</caption> 2952 * query.term("foo") 2953 * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption> 2954 * query.term("foo", { 2955 * fields: ["title"], 2956 * boost: 10, 2957 * wildcard: lunr.Query.wildcard.TRAILING 2958 * }) 2959 * @example <caption>using lunr.tokenizer to convert a string to tokens before using them as terms</caption> 2960 * query.term(lunr.tokenizer("foo bar")) 2961 */ 2962lunr.Query.prototype.term = function (term, options) { 2963 if (Array.isArray(term)) { 2964 term.forEach(function (t) { this.term(t, lunr.utils.clone(options)) }, this) 2965 return this 2966 } 2967 2968 var clause = options || {} 2969 clause.term = term.toString() 2970 2971 this.clause(clause) 2972 2973 return this 2974} 2975lunr.QueryParseError = function (message, start, end) { 2976 this.name = "QueryParseError" 2977 this.message = message 2978 this.start = start 2979 this.end = end 2980} 2981 2982lunr.QueryParseError.prototype = new Error 2983lunr.QueryLexer = function (str) { 2984 this.lexemes = [] 2985 this.str = str 2986 this.length = str.length 2987 this.pos = 0 2988 this.start = 0 2989 this.escapeCharPositions = [] 2990} 2991 2992lunr.QueryLexer.prototype.run = function () { 2993 var state = lunr.QueryLexer.lexText 2994 2995 while (state) { 2996 state = state(this) 2997 } 2998} 2999 3000lunr.QueryLexer.prototype.sliceString = function () { 3001 var subSlices = [], 3002 sliceStart = this.start, 3003 sliceEnd = this.pos 3004 3005 for (var i = 0; i < this.escapeCharPositions.length; i++) { 3006 sliceEnd = this.escapeCharPositions[i] 3007 subSlices.push(this.str.slice(sliceStart, sliceEnd)) 3008 sliceStart = sliceEnd + 1 3009 } 3010 3011 subSlices.push(this.str.slice(sliceStart, this.pos)) 3012 this.escapeCharPositions.length = 0 3013 3014 return subSlices.join('') 3015} 3016 3017lunr.QueryLexer.prototype.emit = function (type) { 3018 this.lexemes.push({ 3019 type: type, 3020 str: this.sliceString(), 3021 start: this.start, 3022 end: this.pos 3023 }) 3024 3025 this.start = this.pos 3026} 3027 3028lunr.QueryLexer.prototype.escapeCharacter = function () { 3029 this.escapeCharPositions.push(this.pos - 1) 3030 this.pos += 1 3031} 3032 3033lunr.QueryLexer.prototype.next = function () { 3034 if (this.pos >= this.length) { 3035 return lunr.QueryLexer.EOS 3036 } 3037 3038 var char = this.str.charAt(this.pos) 3039 this.pos += 1 3040 return char 3041} 3042 3043lunr.QueryLexer.prototype.width = function () { 3044 return this.pos - this.start 3045} 3046 3047lunr.QueryLexer.prototype.ignore = function () { 3048 if (this.start == this.pos) { 3049 this.pos += 1 3050 } 3051 3052 this.start = this.pos 3053} 3054 3055lunr.QueryLexer.prototype.backup = function () { 3056 this.pos -= 1 3057} 3058 3059lunr.QueryLexer.prototype.acceptDigitRun = function () { 3060 var char, charCode 3061 3062 do { 3063 char = this.next() 3064 charCode = char.charCodeAt(0) 3065 } while (charCode > 47 && charCode < 58) 3066 3067 if (char != lunr.QueryLexer.EOS) { 3068 this.backup() 3069 } 3070} 3071 3072lunr.QueryLexer.prototype.more = function () { 3073 return this.pos < this.length 3074} 3075 3076lunr.QueryLexer.EOS = 'EOS' 3077lunr.QueryLexer.FIELD = 'FIELD' 3078lunr.QueryLexer.TERM = 'TERM' 3079lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE' 3080lunr.QueryLexer.BOOST = 'BOOST' 3081lunr.QueryLexer.PRESENCE = 'PRESENCE' 3082 3083lunr.QueryLexer.lexField = function (lexer) { 3084 lexer.backup() 3085 lexer.emit(lunr.QueryLexer.FIELD) 3086 lexer.ignore() 3087 return lunr.QueryLexer.lexText 3088} 3089 3090lunr.QueryLexer.lexTerm = function (lexer) { 3091 if (lexer.width() > 1) { 3092 lexer.backup() 3093 lexer.emit(lunr.QueryLexer.TERM) 3094 } 3095 3096 lexer.ignore() 3097 3098 if (lexer.more()) { 3099 return lunr.QueryLexer.lexText 3100 } 3101} 3102 3103lunr.QueryLexer.lexEditDistance = function (lexer) { 3104 lexer.ignore() 3105 lexer.acceptDigitRun() 3106 lexer.emit(lunr.QueryLexer.EDIT_DISTANCE) 3107 return lunr.QueryLexer.lexText 3108} 3109 3110lunr.QueryLexer.lexBoost = function (lexer) { 3111 lexer.ignore() 3112 lexer.acceptDigitRun() 3113 lexer.emit(lunr.QueryLexer.BOOST) 3114 return lunr.QueryLexer.lexText 3115} 3116 3117lunr.QueryLexer.lexEOS = function (lexer) { 3118 if (lexer.width() > 0) { 3119 lexer.emit(lunr.QueryLexer.TERM) 3120 } 3121} 3122 3123// This matches the separator used when tokenising fields 3124// within a document. These should match otherwise it is 3125// not possible to search for some tokens within a document. 3126// 3127// It is possible for the user to change the separator on the 3128// tokenizer so it _might_ clash with any other of the special 3129// characters already used within the search string, e.g. :. 3130// 3131// This means that it is possible to change the separator in 3132// such a way that makes some words unsearchable using a search 3133// string. 3134lunr.QueryLexer.termSeparator = lunr.tokenizer.separator 3135 3136lunr.QueryLexer.lexText = function (lexer) { 3137 while (true) { 3138 var char = lexer.next() 3139 3140 if (char == lunr.QueryLexer.EOS) { 3141 return lunr.QueryLexer.lexEOS 3142 } 3143 3144 // Escape character is '\' 3145 if (char.charCodeAt(0) == 92) { 3146 lexer.escapeCharacter() 3147 continue 3148 } 3149 3150 if (char == ":") { 3151 return lunr.QueryLexer.lexField 3152 } 3153 3154 if (char == "~") { 3155 lexer.backup() 3156 if (lexer.width() > 0) { 3157 lexer.emit(lunr.QueryLexer.TERM) 3158 } 3159 return lunr.QueryLexer.lexEditDistance 3160 } 3161 3162 if (char == "^") { 3163 lexer.backup() 3164 if (lexer.width() > 0) { 3165 lexer.emit(lunr.QueryLexer.TERM) 3166 } 3167 return lunr.QueryLexer.lexBoost 3168 } 3169 3170 // "+" indicates term presence is required 3171 // checking for length to ensure that only 3172 // leading "+" are considered 3173 if (char == "+" && lexer.width() === 1) { 3174 lexer.emit(lunr.QueryLexer.PRESENCE) 3175 return lunr.QueryLexer.lexText 3176 } 3177 3178 // "-" indicates term presence is prohibited 3179 // checking for length to ensure that only 3180 // leading "-" are considered 3181 if (char == "-" && lexer.width() === 1) { 3182 lexer.emit(lunr.QueryLexer.PRESENCE) 3183 return lunr.QueryLexer.lexText 3184 } 3185 3186 if (char.match(lunr.QueryLexer.termSeparator)) { 3187 return lunr.QueryLexer.lexTerm 3188 } 3189 } 3190} 3191 3192lunr.QueryParser = function (str, query) { 3193 this.lexer = new lunr.QueryLexer (str) 3194 this.query = query 3195 this.currentClause = {} 3196 this.lexemeIdx = 0 3197} 3198 3199lunr.QueryParser.prototype.parse = function () { 3200 this.lexer.run() 3201 this.lexemes = this.lexer.lexemes 3202 3203 var state = lunr.QueryParser.parseClause 3204 3205 while (state) { 3206 state = state(this) 3207 } 3208 3209 return this.query 3210} 3211 3212lunr.QueryParser.prototype.peekLexeme = function () { 3213 return this.lexemes[this.lexemeIdx] 3214} 3215 3216lunr.QueryParser.prototype.consumeLexeme = function () { 3217 var lexeme = this.peekLexeme() 3218 this.lexemeIdx += 1 3219 return lexeme 3220} 3221 3222lunr.QueryParser.prototype.nextClause = function () { 3223 var completedClause = this.currentClause 3224 this.query.clause(completedClause) 3225 this.currentClause = {} 3226} 3227 3228lunr.QueryParser.parseClause = function (parser) { 3229 var lexeme = parser.peekLexeme() 3230 3231 if (lexeme == undefined) { 3232 return 3233 } 3234 3235 switch (lexeme.type) { 3236 case lunr.QueryLexer.PRESENCE: 3237 return lunr.QueryParser.parsePresence 3238 case lunr.QueryLexer.FIELD: 3239 return lunr.QueryParser.parseField 3240 case lunr.QueryLexer.TERM: 3241 return lunr.QueryParser.parseTerm 3242 default: 3243 var errorMessage = "expected either a field or a term, found " + lexeme.type 3244 3245 if (lexeme.str.length >= 1) { 3246 errorMessage += " with value '" + lexeme.str + "'" 3247 } 3248 3249 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3250 } 3251} 3252 3253lunr.QueryParser.parsePresence = function (parser) { 3254 var lexeme = parser.consumeLexeme() 3255 3256 if (lexeme == undefined) { 3257 return 3258 } 3259 3260 switch (lexeme.str) { 3261 case "-": 3262 parser.currentClause.presence = lunr.Query.presence.PROHIBITED 3263 break 3264 case "+": 3265 parser.currentClause.presence = lunr.Query.presence.REQUIRED 3266 break 3267 default: 3268 var errorMessage = "unrecognised presence operator'" + lexeme.str + "'" 3269 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3270 } 3271 3272 var nextLexeme = parser.peekLexeme() 3273 3274 if (nextLexeme == undefined) { 3275 var errorMessage = "expecting term or field, found nothing" 3276 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3277 } 3278 3279 switch (nextLexeme.type) { 3280 case lunr.QueryLexer.FIELD: 3281 return lunr.QueryParser.parseField 3282 case lunr.QueryLexer.TERM: 3283 return lunr.QueryParser.parseTerm 3284 default: 3285 var errorMessage = "expecting term or field, found '" + nextLexeme.type + "'" 3286 throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 3287 } 3288} 3289 3290lunr.QueryParser.parseField = function (parser) { 3291 var lexeme = parser.consumeLexeme() 3292 3293 if (lexeme == undefined) { 3294 return 3295 } 3296 3297 if (parser.query.allFields.indexOf(lexeme.str) == -1) { 3298 var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '), 3299 errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields 3300 3301 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3302 } 3303 3304 parser.currentClause.fields = [lexeme.str] 3305 3306 var nextLexeme = parser.peekLexeme() 3307 3308 if (nextLexeme == undefined) { 3309 var errorMessage = "expecting term, found nothing" 3310 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3311 } 3312 3313 switch (nextLexeme.type) { 3314 case lunr.QueryLexer.TERM: 3315 return lunr.QueryParser.parseTerm 3316 default: 3317 var errorMessage = "expecting term, found '" + nextLexeme.type + "'" 3318 throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 3319 } 3320} 3321 3322lunr.QueryParser.parseTerm = function (parser) { 3323 var lexeme = parser.consumeLexeme() 3324 3325 if (lexeme == undefined) { 3326 return 3327 } 3328 3329 parser.currentClause.term = lexeme.str.toLowerCase() 3330 3331 if (lexeme.str.indexOf("*") != -1) { 3332 parser.currentClause.usePipeline = false 3333 } 3334 3335 var nextLexeme = parser.peekLexeme() 3336 3337 if (nextLexeme == undefined) { 3338 parser.nextClause() 3339 return 3340 } 3341 3342 switch (nextLexeme.type) { 3343 case lunr.QueryLexer.TERM: 3344 parser.nextClause() 3345 return lunr.QueryParser.parseTerm 3346 case lunr.QueryLexer.FIELD: 3347 parser.nextClause() 3348 return lunr.QueryParser.parseField 3349 case lunr.QueryLexer.EDIT_DISTANCE: 3350 return lunr.QueryParser.parseEditDistance 3351 case lunr.QueryLexer.BOOST: 3352 return lunr.QueryParser.parseBoost 3353 case lunr.QueryLexer.PRESENCE: 3354 parser.nextClause() 3355 return lunr.QueryParser.parsePresence 3356 default: 3357 var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 3358 throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 3359 } 3360} 3361 3362lunr.QueryParser.parseEditDistance = function (parser) { 3363 var lexeme = parser.consumeLexeme() 3364 3365 if (lexeme == undefined) { 3366 return 3367 } 3368 3369 var editDistance = parseInt(lexeme.str, 10) 3370 3371 if (isNaN(editDistance)) { 3372 var errorMessage = "edit distance must be numeric" 3373 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3374 } 3375 3376 parser.currentClause.editDistance = editDistance 3377 3378 var nextLexeme = parser.peekLexeme() 3379 3380 if (nextLexeme == undefined) { 3381 parser.nextClause() 3382 return 3383 } 3384 3385 switch (nextLexeme.type) { 3386 case lunr.QueryLexer.TERM: 3387 parser.nextClause() 3388 return lunr.QueryParser.parseTerm 3389 case lunr.QueryLexer.FIELD: 3390 parser.nextClause() 3391 return lunr.QueryParser.parseField 3392 case lunr.QueryLexer.EDIT_DISTANCE: 3393 return lunr.QueryParser.parseEditDistance 3394 case lunr.QueryLexer.BOOST: 3395 return lunr.QueryParser.parseBoost 3396 case lunr.QueryLexer.PRESENCE: 3397 parser.nextClause() 3398 return lunr.QueryParser.parsePresence 3399 default: 3400 var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 3401 throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 3402 } 3403} 3404 3405lunr.QueryParser.parseBoost = function (parser) { 3406 var lexeme = parser.consumeLexeme() 3407 3408 if (lexeme == undefined) { 3409 return 3410 } 3411 3412 var boost = parseInt(lexeme.str, 10) 3413 3414 if (isNaN(boost)) { 3415 var errorMessage = "boost must be numeric" 3416 throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 3417 } 3418 3419 parser.currentClause.boost = boost 3420 3421 var nextLexeme = parser.peekLexeme() 3422 3423 if (nextLexeme == undefined) { 3424 parser.nextClause() 3425 return 3426 } 3427 3428 switch (nextLexeme.type) { 3429 case lunr.QueryLexer.TERM: 3430 parser.nextClause() 3431 return lunr.QueryParser.parseTerm 3432 case lunr.QueryLexer.FIELD: 3433 parser.nextClause() 3434 return lunr.QueryParser.parseField 3435 case lunr.QueryLexer.EDIT_DISTANCE: 3436 return lunr.QueryParser.parseEditDistance 3437 case lunr.QueryLexer.BOOST: 3438 return lunr.QueryParser.parseBoost 3439 case lunr.QueryLexer.PRESENCE: 3440 parser.nextClause() 3441 return lunr.QueryParser.parsePresence 3442 default: 3443 var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 3444 throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 3445 } 3446} 3447 3448 /** 3449 * export the module via AMD, CommonJS or as a browser global 3450 * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js 3451 */ 3452 ;(function (root, factory) { 3453 if (typeof define === 'function' && define.amd) { 3454 // AMD. Register as an anonymous module. 3455 define(factory) 3456 } else if (typeof exports === 'object') { 3457 /** 3458 * Node. Does not work with strict CommonJS, but 3459 * only CommonJS-like enviroments that support module.exports, 3460 * like Node. 3461 */ 3462 module.exports = factory() 3463 } else { 3464 // Browser globals (root is window) 3465 root.lunr = factory() 3466 } 3467 }(this, function () { 3468 /** 3469 * Just return a value to define the module export. 3470 * This example returns an object, but the module 3471 * can return a function as the exported value. 3472 */ 3473 return lunr 3474 })) 3475})(); 3476