From 6077d7b76d010ade046a32711536cab3601e4910 Mon Sep 17 00:00:00 2001 From: Matthijs Kuiper Date: Tue, 16 May 2017 10:33:07 +0200 Subject: [PATCH] Update to Lunr 2.0.3 --- lib/middleman-search/search-index-resource.rb | 54 +- vendor/assets/javascripts/lunr.js | 2787 +++++++++++++++++ vendor/assets/javascripts/lunr.min.js | 9 +- 3 files changed, 2818 insertions(+), 32 deletions(-) create mode 100755 vendor/assets/javascripts/lunr.js diff --git a/lib/middleman-search/search-index-resource.rb b/lib/middleman-search/search-index-resource.rb index 54e8258..4263459 100644 --- a/lib/middleman-search/search-index-resource.rb +++ b/lib/middleman-search/search-index-resource.rb @@ -45,11 +45,15 @@ def build_index # Register pipeline functions pipeline = context.eval('lunr.Pipeline') + @pipeline.each do |name, function| context[name] = context.eval("(#{function})") pipeline.registerFunction(context[name], name) end + # Reference to resource map + store = Hash.new + # Build lunr based on config lunr = context.eval('lunr') lunr_conf = proc do |this| @@ -68,41 +72,38 @@ def build_index next if opts[:index] == false this.field(field, {:boost => opts[:boost]}) end - end - # Get lunr index - index = lunr.call(lunr_conf) + # Iterate over all resources and build index + @app.sitemap.resources.each_with_index do |resource, id| + begin + catch(:skip) do + next if resource.data['index'] == false + next unless @resources_to_index.any? {|whitelisted| resource.path.start_with? whitelisted } - # Ref to resource map - store = Hash.new + to_index = Hash.new + to_store = Hash.new - # Iterate over all resources and build index - @app.sitemap.resources.each_with_index do |resource, id| - begin - catch(:skip) do - next if resource.data['index'] == false - next unless @resources_to_index.any? {|whitelisted| resource.path.start_with? whitelisted } - - to_index = Hash.new - to_store = Hash.new - - @fields.each do |field, opts| - value = value_for(resource, field, opts) - throw(:skip) if value.blank? && opts[:required] - to_index[field] = value unless opts[:index] == false - to_store[field] = value if opts[:store] - end + @fields.each do |field, opts| + value = value_for(resource, field, opts) + throw(:skip) if value.blank? && opts[:required] + to_index[field] = value unless opts[:index] == false + to_store[field] = value if opts[:store] + end - @callback.call(to_index, to_store, resource) if @callback + @callback.call(to_index, to_store, resource) if @callback - index.add(to_index.merge(id: id)) - store[id] = to_store + this.add(to_index.merge(id: id)) + store[id] = to_store + end + rescue => ex + @app.logger.warn "Error processing resource for index: #{resource.path}\n#{ex}\n #{ex.backtrace.join("\n ")}" end - rescue => ex - @app.logger.warn "Error processing resource for index: #{resource.path}\n#{ex}\n #{ex.backtrace.join("\n ")}" end end + # Get lunr index + index = lunr.call(lunr_conf) + # Generate JSON output "{\"index\": #{index.indexJson()}, \"docs\": #{store.to_json}}" end @@ -118,7 +119,6 @@ def ignored? def value_for(resource, field, opts={}) case field.to_s when 'content' - html = resource.render( { :layout => false }, { :current_path => resource.path } ) Nokogiri::HTML(html).xpath("//text()").text when 'url' diff --git a/vendor/assets/javascripts/lunr.js b/vendor/assets/javascripts/lunr.js new file mode 100755 index 0000000..d5d1fc0 --- /dev/null +++ b/vendor/assets/javascripts/lunr.js @@ -0,0 +1,2787 @@ +/** + * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.0.3 + * Copyright (C) 2017 Oliver Nightingale + * @license MIT + */ + +;(function(){ + +/** + * A convenience function for configuring and constructing + * a new lunr Index. + * + * A lunr.Builder instance is created and the pipeline setup + * with a trimmer, stop word filter and stemmer. + * + * This builder object is yielded to the configuration function + * that is passed as a parameter, allowing the list of fields + * and other builder parameters to be customised. + * + * All documents _must_ be added within the passed config function. + * + * @example + * var idx = lunr(function () { + * this.field('title') + * this.field('body') + * this.ref('id') + * + * documents.forEach(function (doc) { + * this.add(doc) + * }, this) + * }) + * + * @see {@link lunr.Builder} + * @see {@link lunr.Pipeline} + * @see {@link lunr.trimmer} + * @see {@link lunr.stopWordFilter} + * @see {@link lunr.stemmer} + * @namespace {function} lunr + */ +var lunr = function (config) { + var builder = new lunr.Builder + + builder.pipeline.add( + lunr.trimmer, + lunr.stopWordFilter, + lunr.stemmer + ) + + builder.searchPipeline.add( + lunr.stemmer + ) + + config.call(builder, builder) + return builder.build() +} + +lunr.version = "2.0.3" +/*! + * lunr.utils + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * A namespace containing utils for the rest of the lunr library + */ +lunr.utils = {} + +/** + * Print a warning message to the console. + * + * @param {String} message The message to be printed. + * @memberOf Utils + */ +lunr.utils.warn = (function (global) { + /* eslint-disable no-console */ + return function (message) { + if (global.console && console.warn) { + console.warn(message) + } + } + /* eslint-enable no-console */ +})(this) + +/** + * Convert an object to a string. + * + * In the case of `null` and `undefined` the function returns + * the empty string, in all other cases the result of calling + * `toString` on the passed object is returned. + * + * @param {Any} obj The object to convert to a string. + * @return {String} string representation of the passed object. + * @memberOf Utils + */ +lunr.utils.asString = function (obj) { + if (obj === void 0 || obj === null) { + return "" + } else { + return obj.toString() + } +} +/** + * A function to calculate the inverse document frequency for + * a posting. This is shared between the builder and the index + * + * @private + * @param {object} posting - The posting for a given term + * @param {number} documentCount - The total number of documents. + */ +lunr.idf = function (posting, documentCount) { + var documentsWithTerm = 0 + + for (var fieldName in posting) { + if (fieldName == '_index') continue // Ignore the term index, its not a field + documentsWithTerm += Object.keys(posting[fieldName]).length + } + + return (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) +} + +/** + * A token wraps a string representation of a token + * as it is passed through the text processing pipeline. + * + * @constructor + * @param {string} [str=''] - The string token being wrapped. + * @param {object} [metadata={}] - Metadata associated with this token. + */ +lunr.Token = function (str, metadata) { + this.str = str || "" + this.metadata = metadata || {} +} + +/** + * Returns the token string that is being wrapped by this object. + * + * @returns {string} + */ +lunr.Token.prototype.toString = function () { + return this.str +} + +/** + * A token update function is used when updating or optionally + * when cloning a token. + * + * @callback lunr.Token~updateFunction + * @param {string} str - The string representation of the token. + * @param {Object} metadata - All metadata associated with this token. + */ + +/** + * Applies the given function to the wrapped string token. + * + * @example + * token.update(function (str, metadata) { + * return str.toUpperCase() + * }) + * + * @param {lunr.Token~updateFunction} fn - A function to apply to the token string. + * @returns {lunr.Token} + */ +lunr.Token.prototype.update = function (fn) { + this.str = fn(this.str, this.metadata) + return this +} + +/** + * Creates a clone of this token. Optionally a function can be + * applied to the cloned token. + * + * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token. + * @returns {lunr.Token} + */ +lunr.Token.prototype.clone = function (fn) { + fn = fn || function (s) { return s } + return new lunr.Token (fn(this.str, this.metadata), this.metadata) +} +/*! + * lunr.tokenizer + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * A function for splitting a string into tokens ready to be inserted into + * the search index. Uses `lunr.tokenizer.separator` to split strings, change + * the value of this property to change how strings are split into tokens. + * + * This tokenizer will convert its parameter to a string by calling `toString` and + * then will split this string on the character in `lunr.tokenizer.separator`. + * Arrays will have their elements converted to strings and wrapped in a lunr.Token. + * + * @static + * @param {?(string|object|object[])} obj - The object to convert into tokens + * @returns {lunr.Token[]} + */ +lunr.tokenizer = function (obj) { + if (obj == null || obj == undefined) { + return [] + } + + if (Array.isArray(obj)) { + return obj.map(function (t) { + return new lunr.Token(lunr.utils.asString(t).toLowerCase()) + }) + } + + var str = obj.toString().trim().toLowerCase(), + len = str.length, + tokens = [] + + for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) { + var char = str.charAt(sliceEnd), + sliceLength = sliceEnd - sliceStart + + if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) { + + if (sliceLength > 0) { + tokens.push( + new lunr.Token (str.slice(sliceStart, sliceEnd), { + position: [sliceStart, sliceLength], + index: tokens.length + }) + ) + } + + sliceStart = sliceEnd + 1 + } + + } + + return tokens +} + +/** + * The separator used to split a string into tokens. Override this property to change the behaviour of + * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. + * + * @static + * @see lunr.tokenizer + */ +lunr.tokenizer.separator = /[\s\-]+/ +/*! + * lunr.Pipeline + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * lunr.Pipelines maintain an ordered list of functions to be applied to all + * tokens in documents entering the search index and queries being ran against + * the index. + * + * An instance of lunr.Index created with the lunr shortcut will contain a + * pipeline with a stop word filter and an English language stemmer. Extra + * functions can be added before or after either of these functions or these + * default functions can be removed. + * + * When run the pipeline will call each function in turn, passing a token, the + * index of that token in the original list of all tokens and finally a list of + * all the original tokens. + * + * The output of functions in the pipeline will be passed to the next function + * in the pipeline. To exclude a token from entering the index the function + * should return undefined, the rest of the pipeline will not be called with + * this token. + * + * For serialisation of pipelines to work, all functions used in an instance of + * a pipeline should be registered with lunr.Pipeline. Registered functions can + * then be loaded. If trying to load a serialised pipeline that uses functions + * that are not registered an error will be thrown. + * + * If not planning on serialising the pipeline then registering pipeline functions + * is not necessary. + * + * @constructor + */ +lunr.Pipeline = function () { + this._stack = [] +} + +lunr.Pipeline.registeredFunctions = Object.create(null) + +/** + * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token + * string as well as all known metadata. A pipeline function can mutate the token string + * or mutate (or add) metadata for a given token. + * + * A pipeline function can indicate that the passed token should be discarded by returning + * null. This token will not be passed to any downstream pipeline functions and will not be + * added to the index. + * + * Multiple tokens can be returned by returning an array of tokens. Each token will be passed + * to any downstream pipeline functions and all will returned tokens will be added to the index. + * + * Any number of pipeline functions may be chained together using a lunr.Pipeline. + * + * @interface lunr.PipelineFunction + * @param {lunr.Token} token - A token from the document being processed. + * @param {number} i - The index of this token in the complete list of tokens for this document/field. + * @param {lunr.Token[]} tokens - All tokens for this document/field. + * @returns {(?lunr.Token|lunr.Token[])} + */ + +/** + * Register a function with the pipeline. + * + * Functions that are used in the pipeline should be registered if the pipeline + * needs to be serialised, or a serialised pipeline needs to be loaded. + * + * Registering a function does not add it to a pipeline, functions must still be + * added to instances of the pipeline for them to be used when running a pipeline. + * + * @param {lunr.PipelineFunction} fn - The function to check for. + * @param {String} label - The label to register this function with + */ +lunr.Pipeline.registerFunction = function (fn, label) { + if (label in this.registeredFunctions) { + lunr.utils.warn('Overwriting existing registered function: ' + label) + } + + fn.label = label + lunr.Pipeline.registeredFunctions[fn.label] = fn +} + +/** + * Warns if the function is not registered as a Pipeline function. + * + * @param {lunr.PipelineFunction} fn - The function to check for. + * @private + */ +lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { + var isRegistered = fn.label && (fn.label in this.registeredFunctions) + + if (!isRegistered) { + lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn) + } +} + +/** + * Loads a previously serialised pipeline. + * + * All functions to be loaded must already be registered with lunr.Pipeline. + * If any function from the serialised data has not been registered then an + * error will be thrown. + * + * @param {Object} serialised - The serialised pipeline to load. + * @returns {lunr.Pipeline} + */ +lunr.Pipeline.load = function (serialised) { + var pipeline = new lunr.Pipeline + + serialised.forEach(function (fnName) { + var fn = lunr.Pipeline.registeredFunctions[fnName] + + if (fn) { + pipeline.add(fn) + } else { + throw new Error('Cannot load unregistered function: ' + fnName) + } + }) + + return pipeline +} + +/** + * Adds new functions to the end of the pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline. + */ +lunr.Pipeline.prototype.add = function () { + var fns = Array.prototype.slice.call(arguments) + + fns.forEach(function (fn) { + lunr.Pipeline.warnIfFunctionNotRegistered(fn) + this._stack.push(fn) + }, this) +} + +/** + * Adds a single function after a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. + * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. + */ +lunr.Pipeline.prototype.after = function (existingFn, newFn) { + lunr.Pipeline.warnIfFunctionNotRegistered(newFn) + + var pos = this._stack.indexOf(existingFn) + if (pos == -1) { + throw new Error('Cannot find existingFn') + } + + pos = pos + 1 + this._stack.splice(pos, 0, newFn) +} + +/** + * Adds a single function before a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. + * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. + */ +lunr.Pipeline.prototype.before = function (existingFn, newFn) { + lunr.Pipeline.warnIfFunctionNotRegistered(newFn) + + var pos = this._stack.indexOf(existingFn) + if (pos == -1) { + throw new Error('Cannot find existingFn') + } + + this._stack.splice(pos, 0, newFn) +} + +/** + * Removes a function from the pipeline. + * + * @param {lunr.PipelineFunction} fn The function to remove from the pipeline. + */ +lunr.Pipeline.prototype.remove = function (fn) { + var pos = this._stack.indexOf(fn) + if (pos == -1) { + return + } + + this._stack.splice(pos, 1) +} + +/** + * Runs the current list of functions that make up the pipeline against the + * passed tokens. + * + * @param {Array} tokens The tokens to run through the pipeline. + * @returns {Array} + */ +lunr.Pipeline.prototype.run = function (tokens) { + var stackLength = this._stack.length + + for (var i = 0; i < stackLength; i++) { + var fn = this._stack[i] + + tokens = tokens.reduce(function (memo, token, j) { + var result = fn(token, j, tokens) + + if (result === void 0 || result === '') return memo + + return memo.concat(result) + }, []) + } + + return tokens +} + +/** + * Convenience method for passing a string through a pipeline and getting + * strings out. This method takes care of wrapping the passed string in a + * token and mapping the resulting tokens back to strings. + * + * @param {string} str - The string to pass through the pipeline. + * @returns {string[]} + */ +lunr.Pipeline.prototype.runString = function (str) { + var token = new lunr.Token (str) + + return this.run([token]).map(function (t) { + return t.toString() + }) +} + +/** + * Resets the pipeline by removing any existing processors. + * + */ +lunr.Pipeline.prototype.reset = function () { + this._stack = [] +} + +/** + * Returns a representation of the pipeline ready for serialisation. + * + * Logs a warning if the function has not been registered. + * + * @returns {Array} + */ +lunr.Pipeline.prototype.toJSON = function () { + return this._stack.map(function (fn) { + lunr.Pipeline.warnIfFunctionNotRegistered(fn) + + return fn.label + }) +} +/*! + * lunr.Vector + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * A vector is used to construct the vector space of documents and queries. These + * vectors support operations to determine the similarity between two documents or + * a document and a query. + * + * Normally no parameters are required for initializing a vector, but in the case of + * loading a previously dumped vector the raw elements can be provided to the constructor. + * + * For performance reasons vectors are implemented with a flat array, where an elements + * index is immediately followed by its value. E.g. [index, value, index, value]. This + * allows the underlying array to be as sparse as possible and still offer decent + * performance when being used for vector calculations. + * + * @constructor + * @param {Number[]} [elements] - The flat list of element index and element value pairs. + */ +lunr.Vector = function (elements) { + this._magnitude = 0 + this.elements = elements || [] +} + + +/** + * Calculates the position within the vector to insert a given index. + * + * This is used internally by insert and upsert. If there are duplicate indexes then + * the position is returned as if the value for that index were to be updated, but it + * is the callers responsibility to check whether there is a duplicate at that index + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @returns {Number} + */ +lunr.Vector.prototype.positionForIndex = function (index) { + // For an empty vector the tuple can be inserted at the beginning + if (this.elements.length == 0) { + return 0 + } + + var start = 0, + end = this.elements.length / 2, + sliceLength = end - start, + pivotPoint = Math.floor(sliceLength / 2), + pivotIndex = this.elements[pivotPoint * 2] + + while (sliceLength > 1) { + if (pivotIndex < index) { + start = pivotPoint + } + + if (pivotIndex > index) { + end = pivotPoint + } + + if (pivotIndex == index) { + break + } + + sliceLength = end - start + pivotPoint = start + Math.floor(sliceLength / 2) + pivotIndex = this.elements[pivotPoint * 2] + } + + if (pivotIndex == index) { + return pivotPoint * 2 + } + + if (pivotIndex > index) { + return pivotPoint * 2 + } + + if (pivotIndex < index) { + return (pivotPoint + 1) * 2 + } +} + +/** + * Inserts an element at an index within the vector. + * + * Does not allow duplicates, will throw an error if there is already an entry + * for this index. + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @param {Number} val - The value to be inserted into the vector. + */ +lunr.Vector.prototype.insert = function (insertIdx, val) { + this.upsert(insertIdx, val, function () { + throw "duplicate index" + }) +} + +/** + * Inserts or updates an existing index within the vector. + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @param {Number} val - The value to be inserted into the vector. + * @param {function} fn - A function that is called for updates, the existing value and the + * requested value are passed as arguments + */ +lunr.Vector.prototype.upsert = function (insertIdx, val, fn) { + this._magnitude = 0 + var position = this.positionForIndex(insertIdx) + + if (this.elements[position] == insertIdx) { + this.elements[position + 1] = fn(this.elements[position + 1], val) + } else { + this.elements.splice(position, 0, insertIdx, val) + } +} + +/** + * Calculates the magnitude of this vector. + * + * @returns {Number} + */ +lunr.Vector.prototype.magnitude = function () { + if (this._magnitude) return this._magnitude + + var sumOfSquares = 0, + elementsLength = this.elements.length + + for (var i = 1; i < elementsLength; i += 2) { + var val = this.elements[i] + sumOfSquares += val * val + } + + return this._magnitude = Math.sqrt(sumOfSquares) +} + +/** + * Calculates the dot product of this vector and another vector. + * + * @param {lunr.Vector} otherVector - The vector to compute the dot product with. + * @returns {Number} + */ +lunr.Vector.prototype.dot = function (otherVector) { + var dotProduct = 0, + a = this.elements, b = otherVector.elements, + aLen = a.length, bLen = b.length, + aVal = 0, bVal = 0, + i = 0, j = 0 + + while (i < aLen && j < bLen) { + aVal = a[i], bVal = b[j] + if (aVal < bVal) { + i += 2 + } else if (aVal > bVal) { + j += 2 + } else if (aVal == bVal) { + dotProduct += a[i + 1] * b[j + 1] + i += 2 + j += 2 + } + } + + return dotProduct +} + +/** + * Calculates the cosine similarity between this vector and another + * vector. + * + * @param {lunr.Vector} otherVector - The other vector to calculate the + * similarity with. + * @returns {Number} + */ +lunr.Vector.prototype.similarity = function (otherVector) { + return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude()) +} + +/** + * Converts the vector to an array of the elements within the vector. + * + * @returns {Number[]} + */ +lunr.Vector.prototype.toArray = function () { + var output = new Array (this.elements.length / 2) + + for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) { + output[j] = this.elements[i] + } + + return output +} + +/** + * A JSON serializable representation of the vector. + * + * @returns {Number[]} + */ +lunr.Vector.prototype.toJSON = function () { + return this.elements +} +/* eslint-disable */ +/*! + * lunr.stemmer + * Copyright (C) 2017 Oliver Nightingale + * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt + */ + +/** + * lunr.stemmer is an english language stemmer, this is a JavaScript + * implementation of the PorterStemmer taken from http://tartarus.org/~martin + * + * @static + * @implements {lunr.PipelineFunction} + * @param {lunr.Token} token - The string to stem + * @returns {lunr.Token} + * @see {@link lunr.Pipeline} + */ +lunr.stemmer = (function(){ + var step2list = { + "ational" : "ate", + "tional" : "tion", + "enci" : "ence", + "anci" : "ance", + "izer" : "ize", + "bli" : "ble", + "alli" : "al", + "entli" : "ent", + "eli" : "e", + "ousli" : "ous", + "ization" : "ize", + "ation" : "ate", + "ator" : "ate", + "alism" : "al", + "iveness" : "ive", + "fulness" : "ful", + "ousness" : "ous", + "aliti" : "al", + "iviti" : "ive", + "biliti" : "ble", + "logi" : "log" + }, + + step3list = { + "icate" : "ic", + "ative" : "", + "alize" : "al", + "iciti" : "ic", + "ical" : "ic", + "ful" : "", + "ness" : "" + }, + + c = "[^aeiou]", // consonant + v = "[aeiouy]", // vowel + C = c + "[^aeiouy]*", // consonant sequence + V = v + "[aeiou]*", // vowel sequence + + mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 + meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 + mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 + s_v = "^(" + C + ")?" + v; // vowel in stem + + var re_mgr0 = new RegExp(mgr0); + var re_mgr1 = new RegExp(mgr1); + var re_meq1 = new RegExp(meq1); + var re_s_v = new RegExp(s_v); + + var re_1a = /^(.+?)(ss|i)es$/; + var re2_1a = /^(.+?)([^s])s$/; + var re_1b = /^(.+?)eed$/; + var re2_1b = /^(.+?)(ed|ing)$/; + var re_1b_2 = /.$/; + var re2_1b_2 = /(at|bl|iz)$/; + var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); + var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var re_1c = /^(.+?[^aeiou])y$/; + var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + + var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + + var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + var re2_4 = /^(.+?)(s|t)(ion)$/; + + var re_5 = /^(.+?)e$/; + var re_5_1 = /ll$/; + var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var porterStemmer = function porterStemmer(w) { + var stem, + suffix, + firstch, + re, + re2, + re3, + re4; + + if (w.length < 3) { return w; } + + firstch = w.substr(0,1); + if (firstch == "y") { + w = firstch.toUpperCase() + w.substr(1); + } + + // Step 1a + re = re_1a + re2 = re2_1a; + + if (re.test(w)) { w = w.replace(re,"$1$2"); } + else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } + + // Step 1b + re = re_1b; + re2 = re2_1b; + if (re.test(w)) { + var fp = re.exec(w); + re = re_mgr0; + if (re.test(fp[1])) { + re = re_1b_2; + w = w.replace(re,""); + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = re_s_v; + if (re2.test(stem)) { + w = stem; + re2 = re2_1b_2; + re3 = re3_1b_2; + re4 = re4_1b_2; + if (re2.test(w)) { w = w + "e"; } + else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } + else if (re4.test(w)) { w = w + "e"; } + } + } + + // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) + re = re_1c; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + w = stem + "i"; + } + + // Step 2 + re = re_2; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step2list[suffix]; + } + } + + // Step 3 + re = re_3; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step3list[suffix]; + } + } + + // Step 4 + re = re_4; + re2 = re2_4; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + if (re.test(stem)) { + w = stem; + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = re_mgr1; + if (re2.test(stem)) { + w = stem; + } + } + + // Step 5 + re = re_5; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + re2 = re_meq1; + re3 = re3_5; + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { + w = stem; + } + } + + re = re_5_1; + re2 = re_mgr1; + if (re.test(w) && re2.test(w)) { + re = re_1b_2; + w = w.replace(re,""); + } + + // and turn initial Y back to y + + if (firstch == "y") { + w = firstch.toLowerCase() + w.substr(1); + } + + return w; + }; + + return function (token) { + return token.update(porterStemmer); + } +})(); + +lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer') +/*! + * lunr.stopWordFilter + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * lunr.generateStopWordFilter builds a stopWordFilter function from the provided + * list of stop words. + * + * The built in lunr.stopWordFilter is built using this generator and can be used + * to generate custom stopWordFilters for applications or non English languages. + * + * @param {Array} token The token to pass through the filter + * @returns {lunr.PipelineFunction} + * @see lunr.Pipeline + * @see lunr.stopWordFilter + */ +lunr.generateStopWordFilter = function (stopWords) { + var words = stopWords.reduce(function (memo, stopWord) { + memo[stopWord] = stopWord + return memo + }, {}) + + return function (token) { + if (token && words[token.toString()] !== token.toString()) return token + } +} + +/** + * lunr.stopWordFilter is an English language stop word list filter, any words + * contained in the list will not be passed through the filter. + * + * This is intended to be used in the Pipeline. If the token does not pass the + * filter then undefined will be returned. + * + * @implements {lunr.PipelineFunction} + * @params {lunr.Token} token - A token to check for being a stop word. + * @returns {lunr.Token} + * @see {@link lunr.Pipeline} + */ +lunr.stopWordFilter = lunr.generateStopWordFilter([ + 'a', + 'able', + 'about', + 'across', + 'after', + 'all', + 'almost', + 'also', + 'am', + 'among', + 'an', + 'and', + 'any', + 'are', + 'as', + 'at', + 'be', + 'because', + 'been', + 'but', + 'by', + 'can', + 'cannot', + 'could', + 'dear', + 'did', + 'do', + 'does', + 'either', + 'else', + 'ever', + 'every', + 'for', + 'from', + 'get', + 'got', + 'had', + 'has', + 'have', + 'he', + 'her', + 'hers', + 'him', + 'his', + 'how', + 'however', + 'i', + 'if', + 'in', + 'into', + 'is', + 'it', + 'its', + 'just', + 'least', + 'let', + 'like', + 'likely', + 'may', + 'me', + 'might', + 'most', + 'must', + 'my', + 'neither', + 'no', + 'nor', + 'not', + 'of', + 'off', + 'often', + 'on', + 'only', + 'or', + 'other', + 'our', + 'own', + 'rather', + 'said', + 'say', + 'says', + 'she', + 'should', + 'since', + 'so', + 'some', + 'than', + 'that', + 'the', + 'their', + 'them', + 'then', + 'there', + 'these', + 'they', + 'this', + 'tis', + 'to', + 'too', + 'twas', + 'us', + 'wants', + 'was', + 'we', + 'were', + 'what', + 'when', + 'where', + 'which', + 'while', + 'who', + 'whom', + 'why', + 'will', + 'with', + 'would', + 'yet', + 'you', + 'your' +]) + +lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') +/*! + * lunr.trimmer + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * lunr.trimmer is a pipeline function for trimming non word + * characters from the beginning and end of tokens before they + * enter the index. + * + * This implementation may not work correctly for non latin + * characters and should either be removed or adapted for use + * with languages with non-latin characters. + * + * @static + * @implements {lunr.PipelineFunction} + * @param {lunr.Token} token The token to pass through the filter + * @returns {lunr.Token} + * @see lunr.Pipeline + */ +lunr.trimmer = function (token) { + return token.update(function (s) { + return s.replace(/^\W+/, '').replace(/\W+$/, '') + }) +} + +lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer') +/*! + * lunr.TokenSet + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * A token set is used to store the unique list of all tokens + * within an index. Token sets are also used to represent an + * incoming query to the index, this query token set and index + * token set are then intersected to find which tokens to look + * up in the inverted index. + * + * A token set can hold multiple tokens, as in the case of the + * index token set, or it can hold a single token as in the + * case of a simple query token set. + * + * Additionally token sets are used to perform wildcard matching. + * Leading, contained and trailing wildcards are supported, and + * from this edit distance matching can also be provided. + * + * Token sets are implemented as a minimal finite state automata, + * where both common prefixes and suffixes are shared between tokens. + * This helps to reduce the space used for storing the token set. + * + * @constructor + */ +lunr.TokenSet = function () { + this.final = false + this.edges = {} + this.id = lunr.TokenSet._nextId + lunr.TokenSet._nextId += 1 +} + +/** + * Keeps track of the next, auto increment, identifier to assign + * to a new tokenSet. + * + * TokenSets require a unique identifier to be correctly minimised. + * + * @private + */ +lunr.TokenSet._nextId = 1 + +/** + * Creates a TokenSet instance from the given sorted array of words. + * + * @param {String[]} arr - A sorted array of strings to create the set from. + * @returns {lunr.TokenSet} + * @throws Will throw an error if the input array is not sorted. + */ +lunr.TokenSet.fromArray = function (arr) { + var builder = new lunr.TokenSet.Builder + + for (var i = 0, len = arr.length; i < len; i++) { + builder.insert(arr[i]) + } + + builder.finish() + return builder.root +} + +/** + * Creates a token set from a query clause. + * + * @private + * @param {Object} clause - A single clause from lunr.Query. + * @param {string} clause.term - The query clause term. + * @param {number} [clause.editDistance] - The optional edit distance for the term. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.fromClause = function (clause) { + if ('editDistance' in clause) { + return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance) + } else { + return lunr.TokenSet.fromString(clause.term) + } +} + +/** + * Creates a token set representing a single string with a specified + * edit distance. + * + * Insertions, deletions, substitutions and transpositions are each + * treated as an edit distance of 1. + * + * Increasing the allowed edit distance will have a dramatic impact + * on the performance of both creating and intersecting these TokenSets. + * It is advised to keep the edit distance less than 3. + * + * @param {string} str - The string to create the token set from. + * @param {number} editDistance - The allowed edit distance to match. + * @returns {lunr.Vector} + */ +lunr.TokenSet.fromFuzzyString = function (str, editDistance) { + var root = new lunr.TokenSet + + var stack = [{ + node: root, + editsRemaining: editDistance, + str: str + }] + + while (stack.length) { + var frame = stack.pop() + + // no edit + if (frame.str.length > 0) { + var char = frame.str.charAt(0), + noEditNode + + if (char in frame.node.edges) { + noEditNode = frame.node.edges[char] + } else { + noEditNode = new lunr.TokenSet + frame.node.edges[char] = noEditNode + } + + if (frame.str.length == 1) { + noEditNode.final = true + } else { + stack.push({ + node: noEditNode, + editsRemaining: frame.editsRemaining, + str: frame.str.slice(1) + }) + } + } + + // deletion + // can only do a deletion if we have enough edits remaining + // and if there are characters left to delete in the string + if (frame.editsRemaining > 0 && frame.str.length > 1) { + var char = frame.str.charAt(1), + deletionNode + + if (char in frame.node.edges) { + deletionNode = frame.node.edges[char] + } else { + deletionNode = new lunr.TokenSet + frame.node.edges[char] = deletionNode + } + + if (frame.str.length <= 2) { + deletionNode.final = true + } else { + stack.push({ + node: deletionNode, + editsRemaining: frame.editsRemaining - 1, + str: frame.str.slice(2) + }) + } + } + + // deletion + // just removing the last character from the str + if (frame.editsRemaining > 0 && frame.str.length == 1) { + frame.node.final = true + } + + // substitution + // can only do a substitution if we have enough edits remaining + // and if there are characters left to substitute + if (frame.editsRemaining > 0 && frame.str.length >= 1) { + if ("*" in frame.node.edges) { + var substitutionNode = frame.node.edges["*"] + } else { + var substitutionNode = new lunr.TokenSet + frame.node.edges["*"] = substitutionNode + } + + if (frame.str.length == 1) { + substitutionNode.final = true + } else { + stack.push({ + node: substitutionNode, + editsRemaining: frame.editsRemaining - 1, + str: frame.str.slice(1) + }) + } + } + + // insertion + // can only do insertion if there are edits remaining + if (frame.editsRemaining > 0) { + if ("*" in frame.node.edges) { + var insertionNode = frame.node.edges["*"] + } else { + var insertionNode = new lunr.TokenSet + frame.node.edges["*"] = insertionNode + } + + if (frame.str.length == 0) { + insertionNode.final = true + } else { + stack.push({ + node: insertionNode, + editsRemaining: frame.editsRemaining - 1, + str: frame.str + }) + } + } + + // transposition + // can only do a transposition if there are edits remaining + // and there are enough characters to transpose + if (frame.editsRemaining > 0 && frame.str.length > 1) { + var charA = frame.str.charAt(0), + charB = frame.str.charAt(1), + transposeNode + + if (charB in frame.node.edges) { + transposeNode = frame.node.edges[charB] + } else { + transposeNode = new lunr.TokenSet + frame.node.edges[charB] = transposeNode + } + + if (frame.str.length == 1) { + transposeNode.final = true + } else { + stack.push({ + node: transposeNode, + editsRemaining: frame.editsRemaining - 1, + str: charA + frame.str.slice(2) + }) + } + } + } + + return root +} + +/** + * Creates a TokenSet from a string. + * + * The string may contain one or more wildcard characters (*) + * that will allow wildcard matching when intersecting with + * another TokenSet. + * + * @param {string} str - The string to create a TokenSet from. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.fromString = function (str) { + var node = new lunr.TokenSet, + root = node, + wildcardFound = false + + /* + * Iterates through all characters within the passed string + * appending a node for each character. + * + * As soon as a wildcard character is found then a self + * referencing edge is introduced to continually match + * any number of any characters. + */ + for (var i = 0, len = str.length; i < len; i++) { + var char = str[i], + final = (i == len - 1) + + if (char == "*") { + wildcardFound = true + node.edges[char] = node + node.final = final + + } else { + var next = new lunr.TokenSet + next.final = final + + node.edges[char] = next + node = next + + // TODO: is this needed anymore? + if (wildcardFound) { + node.edges["*"] = root + } + } + } + + return root +} + +/** + * Converts this TokenSet into an array of strings + * contained within the TokenSet. + * + * @returns {string[]} + */ +lunr.TokenSet.prototype.toArray = function () { + var words = [] + + var stack = [{ + prefix: "", + node: this + }] + + while (stack.length) { + var frame = stack.pop(), + edges = Object.keys(frame.node.edges), + len = edges.length + + if (frame.node.final) { + words.push(frame.prefix) + } + + for (var i = 0; i < len; i++) { + var edge = edges[i] + + stack.push({ + prefix: frame.prefix.concat(edge), + node: frame.node.edges[edge] + }) + } + } + + return words +} + +/** + * Generates a string representation of a TokenSet. + * + * This is intended to allow TokenSets to be used as keys + * in objects, largely to aid the construction and minimisation + * of a TokenSet. As such it is not designed to be a human + * friendly representation of the TokenSet. + * + * @returns {string} + */ +lunr.TokenSet.prototype.toString = function () { + // NOTE: Using Object.keys here as this.edges is very likely + // to enter 'hash-mode' with many keys being added + // + // avoiding a for-in loop here as it leads to the function + // being de-optimised (at least in V8). From some simple + // benchmarks the performance is comparable, but allowing + // V8 to optimize may mean easy performance wins in the future. + + if (this._str) { + return this._str + } + + var str = this.final ? '1' : '0', + labels = Object.keys(this.edges).sort(), + len = labels.length + + for (var i = 0; i < len; i++) { + var label = labels[i], + node = this.edges[label] + + str = str + label + node.id + } + + return str +} + +/** + * Returns a new TokenSet that is the intersection of + * this TokenSet and the passed TokenSet. + * + * This intersection will take into account any wildcards + * contained within the TokenSet. + * + * @param {lunr.TokenSet} b - An other TokenSet to intersect with. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.prototype.intersect = function (b) { + var output = new lunr.TokenSet, + frame = undefined + + var stack = [{ + qNode: b, + output: output, + node: this + }] + + while (stack.length) { + frame = stack.pop() + + // NOTE: As with the #toString method, we are using + // Object.keys and a for loop instead of a for-in loop + // as both of these objects enter 'hash' mode, causing + // the function to be de-optimised in V8 + var qEdges = Object.keys(frame.qNode.edges), + qLen = qEdges.length, + nEdges = Object.keys(frame.node.edges), + nLen = nEdges.length + + for (var q = 0; q < qLen; q++) { + var qEdge = qEdges[q] + + for (var n = 0; n < nLen; n++) { + var nEdge = nEdges[n] + + if (nEdge == qEdge || qEdge == '*') { + var node = frame.node.edges[nEdge], + qNode = frame.qNode.edges[qEdge], + final = node.final && qNode.final, + next = undefined + + if (nEdge in frame.output.edges) { + // an edge already exists for this character + // no need to create a new node, just set the finality + // bit unless this node is already final + next = frame.output.edges[nEdge] + next.final = next.final || final + + } else { + // no edge exists yet, must create one + // set the finality bit and insert it + // into the output + next = new lunr.TokenSet + next.final = final + frame.output.edges[nEdge] = next + } + + stack.push({ + qNode: qNode, + output: next, + node: node + }) + } + } + } + } + + return output +} +lunr.TokenSet.Builder = function () { + this.previousWord = "" + this.root = new lunr.TokenSet + this.uncheckedNodes = [] + this.minimizedNodes = {} +} + +lunr.TokenSet.Builder.prototype.insert = function (word) { + var node, + commonPrefix = 0 + + if (word < this.previousWord) { + throw new Error ("Out of order word insertion") + } + + for (var i = 0; i < word.length && i < this.previousWord.length; i++) { + if (word[i] != this.previousWord[i]) break + commonPrefix++ + } + + this.minimize(commonPrefix) + + if (this.uncheckedNodes.length == 0) { + node = this.root + } else { + node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child + } + + for (var i = commonPrefix; i < word.length; i++) { + var nextNode = new lunr.TokenSet, + char = word[i] + + node.edges[char] = nextNode + + this.uncheckedNodes.push({ + parent: node, + char: char, + child: nextNode + }) + + node = nextNode + } + + node.final = true + this.previousWord = word +} + +lunr.TokenSet.Builder.prototype.finish = function () { + this.minimize(0) +} + +lunr.TokenSet.Builder.prototype.minimize = function (downTo) { + for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) { + var node = this.uncheckedNodes[i], + childKey = node.child.toString() + + if (childKey in this.minimizedNodes) { + node.parent.edges[node.char] = this.minimizedNodes[childKey] + } else { + // Cache the key for this node since + // we know it can't change anymore + node.child._str = childKey + + this.minimizedNodes[childKey] = node.child + } + + this.uncheckedNodes.pop() + } +} +/*! + * lunr.Index + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * An index contains the built index of all documents and provides a query interface + * to the index. + * + * Usually instances of lunr.Index will not be created using this constructor, instead + * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be + * used to load previously built and serialized indexes. + * + * @constructor + * @param {Object} attrs - The attributes of the built search index. + * @param {Object} attrs.invertedIndex - An index of term/field to document reference. + * @param {Object} attrs.documentVectors - Document vectors keyed by document reference. + * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens. + * @param {number} attrs.documentCount - The total number of documents held in the index. + * @param {number} attrs.averageDocumentLength - The average length of all documents in the index. + * @param {number} attrs.b - A parameter for the document scoring algorithm. + * @param {number} attrs.k1 - A parameter for the document scoring algorithm. + * @param {string[]} attrs.fields - The names of indexed document fields. + * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms. + */ +lunr.Index = function (attrs) { + this.invertedIndex = attrs.invertedIndex + this.documentVectors = attrs.documentVectors + this.tokenSet = attrs.tokenSet + this.documentCount = attrs.documentCount + this.averageDocumentLength = attrs.averageDocumentLength + this.b = attrs.b + this.k1 = attrs.k1 + this.fields = attrs.fields + this.pipeline = attrs.pipeline +} + +/** + * A result contains details of a document matching a search query. + * @typedef {Object} lunr.Index~Result + * @property {string} ref - The reference of the document this result represents. + * @property {number} score - A number between 0 and 1 representing how similar this document is to the query. + * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match. + */ + +/** + * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple + * query language which itself is parsed into an instance of lunr.Query. + * + * For programmatically building queries it is advised to directly use lunr.Query, the query language + * is best used for human entered text rather than program generated text. + * + * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported + * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello' + * or 'world', though those that contain both will rank higher in the results. + * + * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can + * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding + * wildcards will increase the number of documents that will be found but can also have a negative + * impact on query performance, especially with wildcards at the beginning of a term. + * + * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term + * hello in the title field will match this query. Using a field not present in the index will lead + * to an error being thrown. + * + * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term + * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported + * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2. + * Avoid large values for edit distance to improve query performance. + * + * @typedef {string} lunr.Index~QueryString + * @example Simple single term query + * hello + * @example Multiple term query + * hello world + * @example term scoped to a field + * title:hello + * @example term with a boost of 10 + * hello^10 + * @example term with an edit distance of 2 + * hello~2 + */ + +/** + * Performs a search against the index using lunr query syntax. + * + * Results will be returned sorted by their score, the most relevant results + * will be returned first. + * + * For more programmatic querying use lunr.Index#query. + * + * @param {lunr.Index~QueryString} queryString - A string containing a lunr query. + * @throws {lunr.QueryParseError} If the passed query string cannot be parsed. + * @returns {lunr.Index~Result[]} + */ +lunr.Index.prototype.search = function (queryString) { + return this.query(function (query) { + var parser = new lunr.QueryParser(queryString, query) + parser.parse() + }) +} + +/** + * A query builder callback provides a query object to be used to express + * the query to perform on the index. + * + * @callback lunr.Index~queryBuilder + * @param {lunr.Query} query - The query object to build up. + * @this lunr.Query + */ + +/** + * Performs a query against the index using the yielded lunr.Query object. + * + * If performing programmatic queries against the index, this method is preferred + * over lunr.Index#search so as to avoid the additional query parsing overhead. + * + * A query object is yielded to the supplied function which should be used to + * express the query to be run against the index. + * + * Note that although this function takes a callback parameter it is _not_ an + * asynchronous operation, the callback is just yielded a query object to be + * customized. + * + * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query. + * @returns {lunr.Index~Result[]} + */ +lunr.Index.prototype.query = function (fn) { + // for each query clause + // * process terms + // * expand terms from token set + // * find matching documents and metadata + // * get document vectors + // * score documents + + var query = new lunr.Query(this.fields), + matchingDocuments = Object.create(null), + queryVector = new lunr.Vector + + fn.call(query, query) + + for (var i = 0; i < query.clauses.length; i++) { + /* + * Unless the pipeline has been disabled for this term, which is + * the case for terms with wildcards, we need to pass the clause + * term through the search pipeline. A pipeline returns an array + * of processed terms. Pipeline functions may expand the passed + * term, which means we may end up performing multiple index lookups + * for a single query term. + */ + var clause = query.clauses[i], + terms = null + + if (clause.usePipeline) { + terms = this.pipeline.runString(clause.term) + } else { + terms = [clause.term] + } + + for (var m = 0; m < terms.length; m++) { + var term = terms[m] + + /* + * Each term returned from the pipeline needs to use the same query + * clause object, e.g. the same boost and or edit distance. The + * simplest way to do this is to re-use the clause object but mutate + * its term property. + */ + clause.term = term + + /* + * From the term in the clause we create a token set which will then + * be used to intersect the indexes token set to get a list of terms + * to lookup in the inverted index + */ + var termTokenSet = lunr.TokenSet.fromClause(clause), + expandedTerms = this.tokenSet.intersect(termTokenSet).toArray() + + for (var j = 0; j < expandedTerms.length; j++) { + /* + * For each term calculate the score as the term relates to the + * query using the same calculation used to score documents during + * indexing. This score will be used to build a vector space + * representation of the query. + * + * Also need to discover the terms index to insert into the query + * vector at the right position + */ + var expandedTerm = expandedTerms[j], + posting = this.invertedIndex[expandedTerm], + termIndex = posting._index, + idf = lunr.idf(posting, this.documentCount), + tf = 1, + score = idf * ((this.k1 + 1) * tf) / (this.k1 * (1 - this.b + this.b * (query.clauses.length / this.averageDocumentLength)) + tf) + + /* + * Upserting the found query term, along with its term index + * into the vector representing the query. It is here that + * any boosts are applied to the score. They could have been + * applied when calculating the score above, but that expression + * is already quite busy. + * + * Using upsert because there could already be an entry in the vector + * for the term we are working with. In that case we just add the scores + * together. + */ + queryVector.upsert(termIndex, score * clause.boost, function (a, b) { return a + b }) + + for (var k = 0; k < clause.fields.length; k++) { + /* + * For each field that this query term is scoped by (by default + * all fields are in scope) we need to get all the document refs + * that have this term in that field. + * + * The posting is the entry in the invertedIndex for the matching + * term from above. + */ + var field = clause.fields[k], + fieldPosting = posting[field], + matchingDocumentRefs = Object.keys(fieldPosting) + + for (var l = 0; l < matchingDocumentRefs.length; l++) { + /* + * All metadata for this term/field/document triple + * are then extracted and collected into an instance + * of lunr.MatchData ready to be returned in the query + * results + */ + var matchingDocumentRef = matchingDocumentRefs[l], + documentMetadata, matchData + + documentMetadata = fieldPosting[matchingDocumentRef] + matchData = new lunr.MatchData (expandedTerm, field, documentMetadata) + + if (matchingDocumentRef in matchingDocuments) { + matchingDocuments[matchingDocumentRef].combine(matchData) + } else { + matchingDocuments[matchingDocumentRef] = matchData + } + + } + } + } + } + } + + var matchingDocumentRefs = Object.keys(matchingDocuments), + results = [] + + for (var i = 0; i < matchingDocumentRefs.length; i++) { + /* + * With all the matching documents found they now need + * to be sorted by their relevance to the query. This + * is done by retrieving the documents vector representation + * and then finding its similarity with the query vector + * that was constructed earlier. + * + * This score, along with the document ref and any metadata + * we collected into a lunr.MatchData instance are stored + * in the results array ready for returning to the caller + */ + var ref = matchingDocumentRefs[i], + documentVector = this.documentVectors[ref], + score = queryVector.similarity(documentVector) + + results.push({ + ref: ref, + score: score, + matchData: matchingDocuments[ref] + }) + } + + return results.sort(function (a, b) { + return b.score - a.score + }) +} + +/** + * Prepares the index for JSON serialization. + * + * The schema for this JSON blob will be described in a + * separate JSON schema file. + * + * @returns {Object} + */ +lunr.Index.prototype.toJSON = function () { + var invertedIndex = Object.keys(this.invertedIndex) + .sort() + .map(function (term) { + return [term, this.invertedIndex[term]] + }, this) + + var documentVectors = Object.keys(this.documentVectors) + .map(function (ref) { + return [ref, this.documentVectors[ref].toJSON()] + }, this) + + return { + version: lunr.version, + averageDocumentLength: this.averageDocumentLength, + b: this.b, + k1: this.k1, + fields: this.fields, + documentVectors: documentVectors, + invertedIndex: invertedIndex, + pipeline: this.pipeline.toJSON() + } +} + +/** + * Loads a previously serialized lunr.Index + * + * @param {Object} serializedIndex - A previously serialized lunr.Index + * @returns {lunr.Index} + */ +lunr.Index.load = function (serializedIndex) { + var attrs = {}, + documentVectors = {}, + serializedVectors = serializedIndex.documentVectors, + documentCount = 0, + invertedIndex = {}, + serializedInvertedIndex = serializedIndex.invertedIndex, + tokenSetBuilder = new lunr.TokenSet.Builder, + pipeline = lunr.Pipeline.load(serializedIndex.pipeline) + + if (serializedIndex.version != lunr.version) { + lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'") + } + + for (var i = 0; i < serializedVectors.length; i++, documentCount++) { + var tuple = serializedVectors[i], + ref = tuple[0], + elements = tuple[1] + + documentVectors[ref] = new lunr.Vector(elements) + } + + for (var i = 0; i < serializedInvertedIndex.length; i++) { + var tuple = serializedInvertedIndex[i], + term = tuple[0], + posting = tuple[1] + + tokenSetBuilder.insert(term) + invertedIndex[term] = posting + } + + tokenSetBuilder.finish() + + attrs.b = serializedIndex.b + attrs.k1 = serializedIndex.k1 + attrs.fields = serializedIndex.fields + attrs.averageDocumentLength = serializedIndex.averageDocumentLength + + attrs.documentCount = documentCount + attrs.documentVectors = documentVectors + attrs.invertedIndex = invertedIndex + attrs.tokenSet = tokenSetBuilder.root + attrs.pipeline = pipeline + + return new lunr.Index(attrs) +} +/*! + * lunr.Builder + * Copyright (C) 2017 Oliver Nightingale + */ + +/** + * lunr.Builder performs indexing on a set of documents and + * returns instances of lunr.Index ready for querying. + * + * All configuration of the index is done via the builder, the + * fields to index, the document reference, the text processing + * pipeline and document scoring parameters are all set on the + * builder before indexing. + * + * @constructor + * @property {string} _ref - Internal reference to the document reference field. + * @property {string[]} _fields - Internal reference to the document fields to index. + * @property {object} invertedIndex - The inverted index maps terms to document fields. + * @property {object} documentTermFrequencies - Keeps track of document term frequencies. + * @property {object} documentLengths - Keeps track of the length of documents added to the index. + * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing. + * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing. + * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index. + * @property {number} documentCount - Keeps track of the total number of documents indexed. + * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75. + * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2. + * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space. + * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index. + */ +lunr.Builder = function () { + this._ref = "id" + this._fields = [] + this.invertedIndex = Object.create(null) + this.documentTermFrequencies = {} + this.documentLengths = {} + this.tokenizer = lunr.tokenizer + this.pipeline = new lunr.Pipeline + this.searchPipeline = new lunr.Pipeline + this.documentCount = 0 + this._b = 0.75 + this._k1 = 1.2 + this.termIndex = 0 + this.metadataWhitelist = [] +} + +/** + * Sets the document field used as the document reference. Every document must have this field. + * The type of this field in the document should be a string, if it is not a string it will be + * coerced into a string by calling toString. + * + * The default ref is 'id'. + * + * The ref should _not_ be changed during indexing, it should be set before any documents are + * added to the index. Changing it during indexing can lead to inconsistent results. + * + * @param {string} ref - The name of the reference field in the document. + */ +lunr.Builder.prototype.ref = function (ref) { + this._ref = ref +} + +/** + * Adds a field to the list of document fields that will be indexed. Every document being + * indexed should have this field. Null values for this field in indexed documents will + * not cause errors but will limit the chance of that document being retrieved by searches. + * + * All fields should be added before adding documents to the index. Adding fields after + * a document has been indexed will have no effect on already indexed documents. + * + * @param {string} field - The name of a field to index in all documents. + */ +lunr.Builder.prototype.field = function (field) { + this._fields.push(field) +} + +/** + * A parameter to tune the amount of field length normalisation that is applied when + * calculating relevance scores. A value of 0 will completely disable any normalisation + * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b + * will be clamped to the range 0 - 1. + * + * @param {number} number - The value to set for this tuning parameter. + */ +lunr.Builder.prototype.b = function (number) { + if (number < 0) { + this._b = 0 + } else if (number > 1) { + this._b = 1 + } else { + this._b = number + } +} + +/** + * A parameter that controls the speed at which a rise in term frequency results in term + * frequency saturation. The default value is 1.2. Setting this to a higher value will give + * slower saturation levels, a lower value will result in quicker saturation. + * + * @param {number} number - The value to set for this tuning parameter. + */ +lunr.Builder.prototype.k1 = function (number) { + this._k1 = number +} + +/** + * Adds a document to the index. + * + * Before adding fields to the index the index should have been fully setup, with the document + * ref and all fields to index already having been specified. + * + * The document must have a field name as specified by the ref (by default this is 'id') and + * it should have all fields defined for indexing, though null or undefined values will not + * cause errors. + * + * @param {object} doc - The document to add to the index. + */ +lunr.Builder.prototype.add = function (doc) { + var docRef = doc[this._ref], + documentTerms = {} + + this.documentCount += 1 + this.documentTermFrequencies[docRef] = documentTerms + this.documentLengths[docRef] = 0 + + for (var i = 0; i < this._fields.length; i++) { + var fieldName = this._fields[i], + field = doc[fieldName], + tokens = this.tokenizer(field), + terms = this.pipeline.run(tokens) + + // store the length of this field for this document + this.documentLengths[docRef] += terms.length + + // calculate term frequencies for this field + for (var j = 0; j < terms.length; j++) { + var term = terms[j] + + if (documentTerms[term] == undefined) { + documentTerms[term] = 0 + } + + documentTerms[term] += 1 + + // add to inverted index + // create an initial posting if one doesn't exist + if (this.invertedIndex[term] == undefined) { + var posting = Object.create(null) + posting["_index"] = this.termIndex + this.termIndex += 1 + + for (var k = 0; k < this._fields.length; k++) { + posting[this._fields[k]] = Object.create(null) + } + + this.invertedIndex[term] = posting + } + + // add an entry for this term/fieldName/docRef to the invertedIndex + if (this.invertedIndex[term][fieldName][docRef] == undefined) { + this.invertedIndex[term][fieldName][docRef] = Object.create(null) + } + + // store all whitelisted metadata about this token in the + // inverted index + for (var l = 0; l < this.metadataWhitelist.length; l++) { + var metadataKey = this.metadataWhitelist[l], + metadata = term.metadata[metadataKey] + + if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) { + this.invertedIndex[term][fieldName][docRef][metadataKey] = [] + } + + this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata) + } + } + + } +} + +/** + * Calculates the average document length for this index + * + * @private + */ +lunr.Builder.prototype.calculateAverageDocumentLengths = function () { + + var documentRefs = Object.keys(this.documentLengths), + numberOfDocuments = documentRefs.length, + allDocumentsLength = 0 + + for (var i = 0; i < numberOfDocuments; i++) { + var documentRef = documentRefs[i] + allDocumentsLength += this.documentLengths[documentRef] + } + + this.averageDocumentLength = allDocumentsLength / numberOfDocuments +} + +/** + * Builds a vector space model of every document using lunr.Vector + * + * @private + */ +lunr.Builder.prototype.createDocumentVectors = function () { + var documentVectors = {}, + docRefs = Object.keys(this.documentTermFrequencies), + docRefsLength = docRefs.length + + for (var i = 0; i < docRefsLength; i++) { + var docRef = docRefs[i], + documentLength = this.documentLengths[docRef], + documentVector = new lunr.Vector, + termFrequencies = this.documentTermFrequencies[docRef], + terms = Object.keys(termFrequencies), + termsLength = terms.length + + for (var j = 0; j < termsLength; j++) { + var term = terms[j], + tf = termFrequencies[term], + termIndex = this.invertedIndex[term]._index, + idf = lunr.idf(this.invertedIndex[term], this.documentCount), + score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (documentLength / this.averageDocumentLength)) + tf), + scoreWithPrecision = Math.round(score * 1000) / 1000 + // Converts 1.23456789 to 1.234. + // Reducing the precision so that the vectors take up less + // space when serialised. Doing it now so that they behave + // the same before and after serialisation. Also, this is + // the fastest approach to reducing a number's precision in + // JavaScript. + + documentVector.insert(termIndex, scoreWithPrecision) + } + + documentVectors[docRef] = documentVector + } + + this.documentVectors = documentVectors +} + +/** + * Creates a token set of all tokens in the index using lunr.TokenSet + * + * @private + */ +lunr.Builder.prototype.createTokenSet = function () { + this.tokenSet = lunr.TokenSet.fromArray( + Object.keys(this.invertedIndex).sort() + ) +} + +/** + * Builds the index, creating an instance of lunr.Index. + * + * This completes the indexing process and should only be called + * once all documents have been added to the index. + * + * @private + * @returns {lunr.Index} + */ +lunr.Builder.prototype.build = function () { + this.calculateAverageDocumentLengths() + this.createDocumentVectors() + this.createTokenSet() + + return new lunr.Index({ + invertedIndex: this.invertedIndex, + documentVectors: this.documentVectors, + tokenSet: this.tokenSet, + averageDocumentLength: this.averageDocumentLength, + documentCount: this.documentCount, + fields: this._fields, + pipeline: this.searchPipeline, + b: this._b, + k1: this._k1 + }) +} + +/** + * Applies a plugin to the index builder. + * + * A plugin is a function that is called with the index builder as its context. + * Plugins can be used to customise or extend the behaviour of the index + * in some way. A plugin is just a function, that encapsulated the custom + * behaviour that should be applied when building the index. + * + * The plugin function will be called with the index builder as its argument, additional + * arguments can also be passed when calling use. The function will be called + * with the index builder as its context. + * + * @param {Function} plugin The plugin to apply. + */ +lunr.Builder.prototype.use = function (fn) { + var args = Array.prototype.slice.call(arguments, 1) + args.unshift(this) + fn.apply(this, args) +} +/** + * Contains and collects metadata about a matching document. + * A single instance of lunr.MatchData is returned as part of every + * lunr.Index~Result. + * + * @constructor + * @property {object} metadata - A collection of metadata associated with this document. + * @see {@link lunr.Index~Result} + */ +lunr.MatchData = function (term, field, metadata) { + this.metadata = {} + this.metadata[term] = {} + this.metadata[term][field] = metadata +} + +/** + * An instance of lunr.MatchData will be created for every term that matches a + * document. However only one instance is required in a lunr.Index~Result. This + * method combines metadata from another instance of lunr.MatchData with this + * objects metadata. + * + * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one. + * @see {@link lunr.Index~Result} + */ +lunr.MatchData.prototype.combine = function (otherMatchData) { + var terms = Object.keys(otherMatchData.metadata) + + for (var i = 0; i < terms.length; i++) { + var term = terms[i], + fields = Object.keys(otherMatchData.metadata[term]) + + if (this.metadata[term] == undefined) { + this.metadata[term] = {} + } + + for (var j = 0; j < fields.length; j++) { + var field = fields[j], + keys = Object.keys(otherMatchData.metadata[term][field]) + + if (this.metadata[term][field] == undefined) { + this.metadata[term][field] = {} + } + + for (var k = 0; k < keys.length; k++) { + var key = keys[k] + + if (this.metadata[term][field][key] == undefined) { + this.metadata[term][field][key] = otherMatchData.metadata[term][field][key] + } else { + this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key]) + } + + } + } + } +} +/** + * A lunr.Query provides a programmatic way of defining queries to be performed + * against a {@link lunr.Index}. + * + * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method + * so the query object is pre-initialized with the right index fields. + * + * @constructor + * @property {lunr.Query~Clause[]} clauses - An array of query clauses. + * @property {string[]} allFields - An array of all available fields in a lunr.Index. + */ +lunr.Query = function (allFields) { + this.clauses = [] + this.allFields = allFields +} + +/** + * A single clause in a {@link lunr.Query} contains a term and details on how to + * match that term against a {@link lunr.Index}. + * + * @typedef {Object} lunr.Query~Clause + * @property {string[]} fields - The fields in an index this clause should be matched against. + * @property {number} boost - Any boost that should be applied when matching this clause. + * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be. + * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline. + */ + +/** + * Adds a {@link lunr.Query~Clause} to this query. + * + * Unless the clause contains the fields to be matched all fields will be matched. In addition + * a default boost of 1 is applied to the clause. + * + * @param {lunr.Query~Clause} clause - The clause to add to this query. + * @returns {lunr.Query} + */ +lunr.Query.prototype.clause = function (clause) { + if (!('fields' in clause)) { + clause.fields = this.allFields + } + + if (!('boost' in clause)) { + clause.boost = 1 + } + + if (!('usePipeline' in clause)) { + clause.usePipeline = true + } + + this.clauses.push(clause) + + return this +} + +/** + * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause} + * to the list of clauses that make up this query. + * + * @param {string} term - The term to add to the query. + * @param {Object} [options] - Any additional properties to add to the query clause. + * @returns {lunr.Query} + */ +lunr.Query.prototype.term = function (term, options) { + var clause = options || {} + clause.term = term + + this.clause(clause) + + return this +} +lunr.QueryParseError = function (message, start, end) { + this.name = "QueryParseError" + this.message = message + this.start = start + this.end = end +} + +lunr.QueryParseError.prototype = new Error +lunr.QueryLexer = function (str) { + this.lexemes = [] + this.str = str + this.length = str.length + this.pos = 0 + this.start = 0 +} + +lunr.QueryLexer.prototype.run = function () { + var state = lunr.QueryLexer.lexText + + while (state) { + state = state(this) + } +} + +lunr.QueryLexer.prototype.emit = function (type) { + this.lexemes.push({ + type: type, + str: this.str.slice(this.start, this.pos), + start: this.start, + end: this.pos + }) + + this.start = this.pos +} + +lunr.QueryLexer.prototype.next = function () { + if (this.pos == this.length) { + return lunr.QueryLexer.EOS + } + + var char = this.str.charAt(this.pos) + this.pos += 1 + return char +} + +lunr.QueryLexer.prototype.width = function () { + return this.pos - this.start +} + +lunr.QueryLexer.prototype.ignore = function () { + if (this.start == this.pos) { + this.pos += 1 + } + + this.start = this.pos +} + +lunr.QueryLexer.prototype.backup = function () { + this.pos -= 1 +} + +lunr.QueryLexer.prototype.acceptDigitRun = function () { + var char, charCode + + do { + char = this.next() + charCode = char.charCodeAt(0) + } while (charCode > 47 && charCode < 58) + + if (char != lunr.QueryLexer.EOS) { + this.backup() + } +} + +lunr.QueryLexer.prototype.more = function () { + return this.pos < this.length +} + +lunr.QueryLexer.EOS = 'EOS' +lunr.QueryLexer.FIELD = 'FIELD' +lunr.QueryLexer.TERM = 'TERM' +lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE' +lunr.QueryLexer.BOOST = 'BOOST' + +lunr.QueryLexer.lexField = function (lexer) { + lexer.backup() + lexer.emit(lunr.QueryLexer.FIELD) + lexer.ignore() + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexTerm = function (lexer) { + if (lexer.width() > 1) { + lexer.backup() + lexer.emit(lunr.QueryLexer.TERM) + } + + lexer.ignore() + + if (lexer.more()) { + return lunr.QueryLexer.lexText + } +} + +lunr.QueryLexer.lexEditDistance = function (lexer) { + lexer.ignore() + lexer.acceptDigitRun() + lexer.emit(lunr.QueryLexer.EDIT_DISTANCE) + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexBoost = function (lexer) { + lexer.ignore() + lexer.acceptDigitRun() + lexer.emit(lunr.QueryLexer.BOOST) + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexEOS = function (lexer) { + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } +} + +// This matches the separator used when tokenising fields +// within a document. These should match otherwise it is +// not possible to search for some tokens within a document. +// +// It is possible for the user to change the separator on the +// tokenizer so it _might_ clash with any other of the special +// characters already used within the search string, e.g. :. +// +// This means that it is possible to change the separator in +// such a way that makes some words unsearchable using a search +// string. +lunr.QueryLexer.termSeparator = lunr.tokenizer.separator + +lunr.QueryLexer.lexText = function (lexer) { + while (true) { + var char = lexer.next() + + if (char == lunr.QueryLexer.EOS) { + return lunr.QueryLexer.lexEOS + } + + if (char == ":") { + return lunr.QueryLexer.lexField + } + + if (char == "~") { + lexer.backup() + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } + return lunr.QueryLexer.lexEditDistance + } + + if (char == "^") { + lexer.backup() + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } + return lunr.QueryLexer.lexBoost + } + + if (char.match(lunr.QueryLexer.termSeparator)) { + return lunr.QueryLexer.lexTerm + } + } +} + +lunr.QueryParser = function (str, query) { + this.lexer = new lunr.QueryLexer (str) + this.query = query + this.currentClause = {} + this.lexemeIdx = 0 +} + +lunr.QueryParser.prototype.parse = function () { + this.lexer.run() + this.lexemes = this.lexer.lexemes + + var state = lunr.QueryParser.parseFieldOrTerm + + while (state) { + state = state(this) + } + + return this.query +} + +lunr.QueryParser.prototype.peekLexeme = function () { + return this.lexemes[this.lexemeIdx] +} + +lunr.QueryParser.prototype.consumeLexeme = function () { + var lexeme = this.peekLexeme() + this.lexemeIdx += 1 + return lexeme +} + +lunr.QueryParser.prototype.nextClause = function () { + var completedClause = this.currentClause + this.query.clause(completedClause) + this.currentClause = {} +} + +lunr.QueryParser.parseFieldOrTerm = function (parser) { + var lexeme = parser.peekLexeme() + + if (lexeme == undefined) { + return + } + + switch (lexeme.type) { + case lunr.QueryLexer.FIELD: + return lunr.QueryParser.parseField + case lunr.QueryLexer.TERM: + return lunr.QueryParser.parseTerm + default: + var errorMessage = "expected either a field or a term, found " + lexeme.type + " with value '" + lexeme.str + "'" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } +} + +lunr.QueryParser.parseField = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + if (parser.query.allFields.indexOf(lexeme.str) == -1) { + var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(), + errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields + + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.fields = [lexeme.str] + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + var errorMessage = "expecting term, found nothing" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + return lunr.QueryParser.parseTerm + default: + var errorMessage = "expecting a field, found '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseTerm = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + parser.currentClause.term = lexeme.str.toLowerCase() + + if (lexeme.str.indexOf("*") != -1) { + parser.currentClause.usePipeline = false + } + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseEditDistance = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + var editDistance = parseInt(lexeme.str, 10) + + if (isNaN(editDistance)) { + var errorMessage = "edit distance must be numeric" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.editDistance = editDistance + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseBoost = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + var boost = parseInt(lexeme.str, 10) + + if (isNaN(boost)) { + var errorMessage = "boost must be numeric" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.boost = boost + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + + /** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ + ;(function (root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like enviroments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + root.lunr = factory() + } + }(this, function () { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return lunr + })) +})(); diff --git a/vendor/assets/javascripts/lunr.min.js b/vendor/assets/javascripts/lunr.min.js index 884d1f2..37cc3ab 100644 --- a/vendor/assets/javascripts/lunr.min.js +++ b/vendor/assets/javascripts/lunr.min.js @@ -1,7 +1,6 @@ /** - * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.7.0 - * Copyright (C) 2016 Oliver Nightingale - * MIT Licensed - * @license + * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.0.3 + * Copyright (C) 2017 Oliver Nightingale + * @license MIT */ -!function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.7.0",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.utils.asString=function(t){return void 0===t||null===t?"":t.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(e){return arguments.length&&null!=e&&void 0!=e?Array.isArray(e)?e.map(function(e){return t.utils.asString(e).toLowerCase()}):e.toString().trim().toLowerCase().split(t.tokenizer.seperator):[]},t.tokenizer.seperator=/[\s\-]+/,t.tokenizer.load=function(t){var e=this.registeredFunctions[t];if(!e)throw new Error("Cannot load un-registered function: "+t);return e},t.tokenizer.label="default",t.tokenizer.registeredFunctions={"default":t.tokenizer},t.tokenizer.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing tokenizer: "+n),e.label=n,this.registeredFunctions[n]=e},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,r=0;n>r;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(en.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();rp;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;n=s;s++){var a=r.charAt(s),u=s-o;(a.match(e.tokenizer.separator)||s==i)&&(u>0&&n.push(new e.Token(r.slice(o,s),{position:[o,u],index:n.length})),o=s+1)}return n},e.tokenizer.separator=/[\s\-]+/,e.Pipeline=function(){this._stack=[]},e.Pipeline.registeredFunctions=Object.create(null),e.Pipeline.registerFunction=function(t,r){r in this.registeredFunctions&&e.utils.warn("Overwriting existing registered function: "+r),t.label=r,e.Pipeline.registeredFunctions[t.label]=t},e.Pipeline.warnIfFunctionNotRegistered=function(t){var r=t.label&&t.label in this.registeredFunctions;r||e.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",t)},e.Pipeline.load=function(t){var r=new e.Pipeline;return t.forEach(function(t){var i=e.Pipeline.registeredFunctions[t];if(!i)throw new Error("Cannot load unregistered function: "+t);r.add(i)}),r},e.Pipeline.prototype.add=function(){var t=Array.prototype.slice.call(arguments);t.forEach(function(t){e.Pipeline.warnIfFunctionNotRegistered(t),this._stack.push(t)},this)},e.Pipeline.prototype.after=function(t,r){e.Pipeline.warnIfFunctionNotRegistered(r);var i=this._stack.indexOf(t);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,r)},e.Pipeline.prototype.before=function(t,r){e.Pipeline.warnIfFunctionNotRegistered(r);var i=this._stack.indexOf(t);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,r)},e.Pipeline.prototype.remove=function(e){var t=this._stack.indexOf(e);-1!=t&&this._stack.splice(t,1)},e.Pipeline.prototype.run=function(e){for(var t=this._stack.length,r=0;t>r;r++){var i=this._stack[r];e=e.reduce(function(t,r,n){var s=i(r,n,e);return void 0===s||""===s?t:t.concat(s)},[])}return e},e.Pipeline.prototype.runString=function(t){var r=new e.Token(t);return this.run([r]).map(function(e){return e.toString()})},e.Pipeline.prototype.reset=function(){this._stack=[]},e.Pipeline.prototype.toJSON=function(){return this._stack.map(function(t){return e.Pipeline.warnIfFunctionNotRegistered(t),t.label})},e.Vector=function(e){this._magnitude=0,this.elements=e||[]},e.Vector.prototype.positionForIndex=function(e){if(0==this.elements.length)return 0;for(var t=0,r=this.elements.length/2,i=r-t,n=Math.floor(i/2),s=this.elements[2*n];i>1&&(e>s&&(t=n),s>e&&(r=n),s!=e);)i=r-t,n=t+Math.floor(i/2),s=this.elements[2*n];return s==e?2*n:s>e?2*n:e>s?2*(n+1):void 0},e.Vector.prototype.insert=function(e,t){this.upsert(e,t,function(){throw"duplicate index"})},e.Vector.prototype.upsert=function(e,t,r){this._magnitude=0;var i=this.positionForIndex(e);this.elements[i]==e?this.elements[i+1]=r(this.elements[i+1],t):this.elements.splice(i,0,e,t)},e.Vector.prototype.magnitude=function(){if(this._magnitude)return this._magnitude;for(var e=0,t=this.elements.length,r=1;t>r;r+=2){var i=this.elements[r];e+=i*i}return this._magnitude=Math.sqrt(e)},e.Vector.prototype.dot=function(e){for(var t=0,r=this.elements,i=e.elements,n=r.length,s=i.length,o=0,a=0,u=0,h=0;n>u&&s>h;)o=r[u],a=i[h],a>o?u+=2:o>a?h+=2:o==a&&(t+=r[u+1]*i[h+1],u+=2,h+=2);return t},e.Vector.prototype.similarity=function(e){return this.dot(e)/(this.magnitude()*e.magnitude())},e.Vector.prototype.toArray=function(){for(var e=new Array(this.elements.length/2),t=1,r=0;ti;i++)r.insert(t[i]);return r.finish(),r.root},e.TokenSet.fromClause=function(t){return"editDistance"in t?e.TokenSet.fromFuzzyString(t.term,t.editDistance):e.TokenSet.fromString(t.term)},e.TokenSet.fromFuzzyString=function(t,r){for(var i=new e.TokenSet,n=[{node:i,editsRemaining:r,str:t}];n.length;){var s=n.pop();if(s.str.length>0){var o,a=s.str.charAt(0);a in s.node.edges?o=s.node.edges[a]:(o=new e.TokenSet,s.node.edges[a]=o),1==s.str.length?o["final"]=!0:n.push({node:o,editsRemaining:s.editsRemaining,str:s.str.slice(1)})}if(s.editsRemaining>0&&s.str.length>1){var u,a=s.str.charAt(1);a in s.node.edges?u=s.node.edges[a]:(u=new e.TokenSet,s.node.edges[a]=u),s.str.length<=2?u["final"]=!0:n.push({node:u,editsRemaining:s.editsRemaining-1,str:s.str.slice(2)})}if(s.editsRemaining>0&&1==s.str.length&&(s.node["final"]=!0),s.editsRemaining>0&&s.str.length>=1){if("*"in s.node.edges)var h=s.node.edges["*"];else{var h=new e.TokenSet;s.node.edges["*"]=h}1==s.str.length?h["final"]=!0:n.push({node:h,editsRemaining:s.editsRemaining-1,str:s.str.slice(1)})}if(s.editsRemaining>0){if("*"in s.node.edges)var d=s.node.edges["*"];else{var d=new e.TokenSet;s.node.edges["*"]=d}0==s.str.length?d["final"]=!0:n.push({node:d,editsRemaining:s.editsRemaining-1,str:s.str})}if(s.editsRemaining>0&&s.str.length>1){var c,l=s.str.charAt(0),p=s.str.charAt(1);p in s.node.edges?c=s.node.edges[p]:(c=new e.TokenSet,s.node.edges[p]=c),1==s.str.length?c["final"]=!0:n.push({node:c,editsRemaining:s.editsRemaining-1,str:l+s.str.slice(2)})}}return i},e.TokenSet.fromString=function(t){for(var r=new e.TokenSet,i=r,n=!1,s=0,o=t.length;o>s;s++){var a=t[s],u=s==o-1;if("*"==a)n=!0,r.edges[a]=r,r["final"]=u;else{var h=new e.TokenSet;h["final"]=u,r.edges[a]=h,r=h,n&&(r.edges["*"]=i)}}return i},e.TokenSet.prototype.toArray=function(){for(var e=[],t=[{prefix:"",node:this}];t.length;){var r=t.pop(),i=Object.keys(r.node.edges),n=i.length;r.node["final"]&&e.push(r.prefix);for(var s=0;n>s;s++){var o=i[s];t.push({prefix:r.prefix.concat(o),node:r.node.edges[o]})}}return e},e.TokenSet.prototype.toString=function(){if(this._str)return this._str;for(var e=this["final"]?"1":"0",t=Object.keys(this.edges).sort(),r=t.length,i=0;r>i;i++){var n=t[i],s=this.edges[n];e=e+n+s.id}return e},e.TokenSet.prototype.intersect=function(t){for(var r=new e.TokenSet,i=void 0,n=[{qNode:t,output:r,node:this}];n.length;){i=n.pop();for(var s=Object.keys(i.qNode.edges),o=s.length,a=Object.keys(i.node.edges),u=a.length,h=0;o>h;h++)for(var d=s[h],c=0;u>c;c++){var l=a[c];if(l==d||"*"==d){var p=i.node.edges[l],f=i.qNode.edges[d],m=p["final"]&&f["final"],y=void 0;l in i.output.edges?(y=i.output.edges[l],y["final"]=y["final"]||m):(y=new e.TokenSet,y["final"]=m,i.output.edges[l]=y),n.push({qNode:f,output:y,node:p})}}}return r},e.TokenSet.Builder=function(){this.previousWord="",this.root=new e.TokenSet,this.uncheckedNodes=[],this.minimizedNodes={}},e.TokenSet.Builder.prototype.insert=function(t){var r,i=0;if(t=e;t--){var r=this.uncheckedNodes[t],i=r.child.toString();i in this.minimizedNodes?r.parent.edges[r["char"]]=this.minimizedNodes[i]:(r.child._str=i,this.minimizedNodes[i]=r.child),this.uncheckedNodes.pop()}},e.Index=function(e){this.invertedIndex=e.invertedIndex,this.documentVectors=e.documentVectors,this.tokenSet=e.tokenSet,this.documentCount=e.documentCount,this.averageDocumentLength=e.averageDocumentLength,this.b=e.b,this.k1=e.k1,this.fields=e.fields,this.pipeline=e.pipeline},e.Index.prototype.search=function(t){return this.query(function(r){var i=new e.QueryParser(t,r);i.parse()})},e.Index.prototype.query=function(t){var r=new e.Query(this.fields),i=Object.create(null),n=new e.Vector;t.call(r,r);for(var s=0;se?this._b=0:e>1?this._b=1:this._b=e},e.Builder.prototype.k1=function(e){this._k1=e},e.Builder.prototype.add=function(e){var t=e[this._ref],r={};this.documentCount+=1,this.documentTermFrequencies[t]=r,this.documentLengths[t]=0;for(var i=0;ii;i++){var n=e[i];r+=this.documentLengths[n]}this.averageDocumentLength=r/t},e.Builder.prototype.createDocumentVectors=function(){for(var t={},r=Object.keys(this.documentTermFrequencies),i=r.length,n=0;i>n;n++){for(var s=r[n],o=this.documentLengths[s],a=new e.Vector,u=this.documentTermFrequencies[s],h=Object.keys(u),d=h.length,c=0;d>c;c++){var l=h[c],p=u[l],f=this.invertedIndex[l]._index,m=e.idf(this.invertedIndex[l],this.documentCount),y=m*((this._k1+1)*p)/(this._k1*(1-this._b+this._b*(o/this.averageDocumentLength))+p),v=Math.round(1e3*y)/1e3;a.insert(f,v)}t[s]=a}this.documentVectors=t},e.Builder.prototype.createTokenSet=function(){this.tokenSet=e.TokenSet.fromArray(Object.keys(this.invertedIndex).sort())},e.Builder.prototype.build=function(){return this.calculateAverageDocumentLengths(),this.createDocumentVectors(),this.createTokenSet(),new e.Index({invertedIndex:this.invertedIndex,documentVectors:this.documentVectors,tokenSet:this.tokenSet,averageDocumentLength:this.averageDocumentLength,documentCount:this.documentCount,fields:this._fields,pipeline:this.searchPipeline,b:this._b,k1:this._k1})},e.Builder.prototype.use=function(e){var t=Array.prototype.slice.call(arguments,1);t.unshift(this),e.apply(this,t)},e.MatchData=function(e,t,r){this.metadata={},this.metadata[e]={},this.metadata[e][t]=r},e.MatchData.prototype.combine=function(e){for(var t=Object.keys(e.metadata),r=0;r47&&58>r);t!=e.QueryLexer.EOS&&this.backup()},e.QueryLexer.prototype.more=function(){return this.pos1&&(t.backup(),t.emit(e.QueryLexer.TERM)),t.ignore(),t.more()?e.QueryLexer.lexText:void 0},e.QueryLexer.lexEditDistance=function(t){return t.ignore(),t.acceptDigitRun(),t.emit(e.QueryLexer.EDIT_DISTANCE),e.QueryLexer.lexText},e.QueryLexer.lexBoost=function(t){return t.ignore(),t.acceptDigitRun(),t.emit(e.QueryLexer.BOOST),e.QueryLexer.lexText},e.QueryLexer.lexEOS=function(t){t.width()>0&&t.emit(e.QueryLexer.TERM)},e.QueryLexer.termSeparator=e.tokenizer.separator,e.QueryLexer.lexText=function(t){for(;;){var r=t.next();if(r==e.QueryLexer.EOS)return e.QueryLexer.lexEOS;if(":"==r)return e.QueryLexer.lexField;if("~"==r)return t.backup(),t.width()>0&&t.emit(e.QueryLexer.TERM),e.QueryLexer.lexEditDistance;if("^"==r)return t.backup(),t.width()>0&&t.emit(e.QueryLexer.TERM),e.QueryLexer.lexBoost;if(r.match(e.QueryLexer.termSeparator))return e.QueryLexer.lexTerm}},e.QueryParser=function(t,r){this.lexer=new e.QueryLexer(t),this.query=r,this.currentClause={},this.lexemeIdx=0},e.QueryParser.prototype.parse=function(){this.lexer.run(),this.lexemes=this.lexer.lexemes;for(var t=e.QueryParser.parseFieldOrTerm;t;)t=t(this);return this.query},e.QueryParser.prototype.peekLexeme=function(){return this.lexemes[this.lexemeIdx]},e.QueryParser.prototype.consumeLexeme=function(){var e=this.peekLexeme();return this.lexemeIdx+=1,e},e.QueryParser.prototype.nextClause=function(){var e=this.currentClause;this.query.clause(e),this.currentClause={}},e.QueryParser.parseFieldOrTerm=function(t){var r=t.peekLexeme();if(void 0!=r)switch(r.type){case e.QueryLexer.FIELD:return e.QueryParser.parseField;case e.QueryLexer.TERM:return e.QueryParser.parseTerm;default:var i="expected either a field or a term, found "+r.type+" with value '"+r.str+"'";throw new e.QueryParseError(i,r.start,r.end)}},e.QueryParser.parseField=function(t){var r=t.consumeLexeme();if(void 0!=r){if(-1==t.query.allFields.indexOf(r.str)){var i=t.query.allFields.map(function(e){return"'"+e+"'"}).join(),n="unrecognised field '"+r.str+"', possible fields: "+i;throw new e.QueryParseError(n,r.start,r.end)}t.currentClause.fields=[r.str];var s=t.peekLexeme();if(void 0==s){var n="expecting term, found nothing";throw new e.QueryParseError(n,r.start,r.end)}switch(s.type){case e.QueryLexer.TERM:return e.QueryParser.parseTerm;default:var n="expecting a field, found '"+s.type+"'";throw new e.QueryParseError(n,s.start,s.end)}}},e.QueryParser.parseTerm=function(t){var r=t.consumeLexeme();if(void 0!=r){t.currentClause.term=r.str.toLowerCase(),-1!=r.str.indexOf("*")&&(t.currentClause.usePipeline=!1);var i=t.peekLexeme();if(void 0==i)return void t.nextClause();switch(i.type){case e.QueryLexer.TERM:return t.nextClause(),e.QueryParser.parseTerm;case e.QueryLexer.FIELD:return t.nextClause(),e.QueryParser.parseField;case e.QueryLexer.EDIT_DISTANCE:return e.QueryParser.parseEditDistance;case e.QueryLexer.BOOST:return e.QueryParser.parseBoost;default:var n="Unexpected lexeme type '"+i.type+"'";throw new e.QueryParseError(n,i.start,i.end)}}},e.QueryParser.parseEditDistance=function(t){var r=t.consumeLexeme();if(void 0!=r){var i=parseInt(r.str,10);if(isNaN(i)){var n="edit distance must be numeric";throw new e.QueryParseError(n,r.start,r.end)}t.currentClause.editDistance=i;var s=t.peekLexeme();if(void 0==s)return void t.nextClause();switch(s.type){case e.QueryLexer.TERM:return t.nextClause(),e.QueryParser.parseTerm;case e.QueryLexer.FIELD:return t.nextClause(),e.QueryParser.parseField;case e.QueryLexer.EDIT_DISTANCE:return e.QueryParser.parseEditDistance;case e.QueryLexer.BOOST:return e.QueryParser.parseBoost;default:var n="Unexpected lexeme type '"+s.type+"'";throw new e.QueryParseError(n,s.start,s.end)}}},e.QueryParser.parseBoost=function(t){var r=t.consumeLexeme();if(void 0!=r){var i=parseInt(r.str,10);if(isNaN(i)){var n="boost must be numeric";throw new e.QueryParseError(n,r.start,r.end)}t.currentClause.boost=i;var s=t.peekLexeme();if(void 0==s)return void t.nextClause();switch(s.type){case e.QueryLexer.TERM:return t.nextClause(),e.QueryParser.parseTerm;case e.QueryLexer.FIELD:return t.nextClause(),e.QueryParser.parseField;case e.QueryLexer.EDIT_DISTANCE:return e.QueryParser.parseEditDistance;case e.QueryLexer.BOOST:return e.QueryParser.parseBoost;default:var n="Unexpected lexeme type '"+s.type+"'";throw new e.QueryParseError(n,s.start,s.end)}}},function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():e.lunr=t()}(this,function(){return e})}();