2013-05-03 05:31:51 +00:00
|
|
|
/**
|
2013-05-12 15:19:25 +00:00
|
|
|
* lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.3.1
|
2013-05-03 05:31:51 +00:00
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
* MIT Licensed
|
|
|
|
* @license
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Convenience function for instantiating a new lunr index and configuring it
|
|
|
|
* with the default pipeline functions and the passed config function.
|
|
|
|
*
|
|
|
|
* When using this convenience function a new index will be created with the
|
|
|
|
* following functions already in the pipeline:
|
|
|
|
*
|
|
|
|
* lunr.StopWordFilter - filters out any stop words before they enter the
|
|
|
|
* index
|
|
|
|
*
|
|
|
|
* lunr.stemmer - stems the tokens before entering the index.
|
|
|
|
*
|
|
|
|
* Example:
|
|
|
|
*
|
|
|
|
* var idx = lunr(function () {
|
|
|
|
* this.field('title', 10)
|
|
|
|
* this.field('tags', 100)
|
|
|
|
* this.field('body')
|
|
|
|
*
|
|
|
|
* this.ref('cid')
|
|
|
|
*
|
|
|
|
* this.pipeline.add(function () {
|
|
|
|
* // some custom pipeline function
|
|
|
|
* })
|
|
|
|
*
|
|
|
|
* })
|
|
|
|
*
|
|
|
|
* @param {Function} config A function that will be called with the new instance
|
|
|
|
* of the lunr.Index as both its context and first parameter. It can be used to
|
|
|
|
* customize the instance of new lunr.Index.
|
|
|
|
* @namespace
|
|
|
|
* @module
|
|
|
|
* @returns {lunr.Index}
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
var lunr = function (config) {
|
|
|
|
var idx = new lunr.Index
|
|
|
|
|
|
|
|
idx.pipeline.add(lunr.stopWordFilter, lunr.stemmer)
|
|
|
|
|
|
|
|
if (config) config.call(idx, idx)
|
|
|
|
|
|
|
|
return idx
|
|
|
|
}
|
|
|
|
|
2013-05-12 15:19:25 +00:00
|
|
|
lunr.version = "0.3.1"
|
2013-05-03 05:31:51 +00:00
|
|
|
|
|
|
|
if (typeof module !== 'undefined') {
|
|
|
|
module.exports = lunr
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.tokenizer
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A function for splitting a string into tokens ready to be inserted into
|
|
|
|
* the search index.
|
|
|
|
*
|
|
|
|
* @module
|
|
|
|
* @param {String} str The string to convert into tokens
|
|
|
|
* @returns {Array}
|
|
|
|
*/
|
|
|
|
lunr.tokenizer = function (str) {
|
|
|
|
if (Array.isArray(str)) return str
|
|
|
|
|
|
|
|
var str = str.replace(/^\s+/, '')
|
|
|
|
|
|
|
|
for (var i = str.length - 1; i >= 0; i--) {
|
|
|
|
if (/\S/.test(str.charAt(i))) {
|
|
|
|
str = str.substring(0, i + 1)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return str
|
|
|
|
.split(/\s+/)
|
|
|
|
.map(function (token) {
|
|
|
|
return token.replace(/^\W+/, '').replace(/\W+$/, '').toLowerCase()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.Pipeline
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.Pipelines maintain an ordered list of functions to be applied to all
|
|
|
|
* tokens in documents entering the search index and queries being ran against
|
|
|
|
* the index.
|
|
|
|
*
|
|
|
|
* An instance of lunr.Index created with the lunr shortcut will contain a
|
|
|
|
* pipeline with a stop word filter and an English language stemmer. Extra
|
|
|
|
* functions can be added before or after either of these functions or these
|
|
|
|
* default functions can be removed.
|
|
|
|
*
|
|
|
|
* When run the pipeline will call each function in turn, passing a token, the
|
|
|
|
* index of that token in the original list of all tokens and finally a list of
|
|
|
|
* all the original tokens.
|
|
|
|
*
|
|
|
|
* The output of functions in the pipeline will be passed to the next function
|
|
|
|
* in the pipeline. To exclude a token from entering the index the function
|
|
|
|
* should return undefined, the rest of the pipeline will not be called with
|
|
|
|
* this token.
|
|
|
|
*
|
|
|
|
* For serialisation of pipelines to work, all functions used in an instance of
|
|
|
|
* a pipeline should be registered with lunr.Pipeline. Registered functions can
|
|
|
|
* then be loaded. If trying to load a serialised pipeline that uses functions
|
|
|
|
* that are not registered an error will be thrown.
|
|
|
|
*
|
|
|
|
* If not planning on serialising the pipeline then registering pipeline functions
|
|
|
|
* is not necessary.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
*/
|
|
|
|
lunr.Pipeline = function () {
|
|
|
|
this._stack = []
|
|
|
|
}
|
|
|
|
|
|
|
|
lunr.Pipeline.registeredFunctions = {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Register a function with the pipeline.
|
|
|
|
*
|
|
|
|
* Functions that are used in the pipeline should be registered if the pipeline
|
|
|
|
* needs to be serialised, or a serialised pipeline needs to be loaded.
|
|
|
|
*
|
|
|
|
* Registering a function does not add it to a pipeline, functions must still be
|
|
|
|
* added to instances of the pipeline for them to be used when running a pipeline.
|
|
|
|
*
|
|
|
|
* @param {Function} fn The function to check for.
|
|
|
|
* @param {String} label The label to register this function with
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.registerFunction = function (fn, label) {
|
|
|
|
if (console && console.warn && (label in this.registeredFunctions)) {
|
|
|
|
console.warn('Overwriting existing registered function: ' + label)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn.label = label
|
|
|
|
lunr.Pipeline.registeredFunctions[fn.label] = fn
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Warns if the function is not registered as a Pipeline function.
|
|
|
|
*
|
|
|
|
* @param {Function} fn The function to check for.
|
|
|
|
* @private
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
|
|
|
|
var isRegistered = fn.label && (fn.label in this.registeredFunctions)
|
|
|
|
|
|
|
|
if (!isRegistered && console && console.warn) {
|
|
|
|
console.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads a previously serialised pipeline.
|
|
|
|
*
|
|
|
|
* All functions to be loaded must already be registered with lunr.Pipeline.
|
|
|
|
* If any function from the serialised data has not been registered then an
|
|
|
|
* error will be thrown.
|
|
|
|
*
|
|
|
|
* @param {Object} serialised The serialised pipeline to load.
|
|
|
|
* @returns {lunr.Pipeline}
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.load = function (serialised) {
|
|
|
|
var pipeline = new lunr.Pipeline
|
|
|
|
|
|
|
|
serialised.forEach(function (fnName) {
|
|
|
|
var fn = lunr.Pipeline.registeredFunctions[fnName]
|
|
|
|
|
|
|
|
if (fn) {
|
|
|
|
pipeline.add(fn)
|
|
|
|
} else {
|
|
|
|
throw new Error ('Cannot load un-registered function: ' + fnName)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return pipeline
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds new functions to the end of the pipeline.
|
|
|
|
*
|
|
|
|
* Logs a warning if the function has not been registered.
|
|
|
|
*
|
|
|
|
* @param {Function} functions Any number of functions to add to the pipeline.
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.add = function () {
|
|
|
|
var fns = Array.prototype.slice.call(arguments)
|
|
|
|
|
|
|
|
fns.forEach(function (fn) {
|
|
|
|
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
|
|
|
|
this._stack.push(fn)
|
|
|
|
}, this)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds a single function after a function that already exists in the
|
|
|
|
* pipeline.
|
|
|
|
*
|
|
|
|
* Logs a warning if the function has not been registered.
|
|
|
|
*
|
|
|
|
* @param {Function} existingFn A function that already exists in the pipeline.
|
|
|
|
* @param {Function} newFn The new function to add to the pipeline.
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.after = function (existingFn, newFn) {
|
|
|
|
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
|
|
|
|
|
|
|
|
var pos = this._stack.indexOf(existingFn) + 1
|
|
|
|
this._stack.splice(pos, 0, newFn)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds a single function before a function that already exists in the
|
|
|
|
* pipeline.
|
|
|
|
*
|
|
|
|
* Logs a warning if the function has not been registered.
|
|
|
|
*
|
|
|
|
* @param {Function} existingFn A function that already exists in the pipeline.
|
|
|
|
* @param {Function} newFn The new function to add to the pipeline.
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.before = function (existingFn, newFn) {
|
|
|
|
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
|
|
|
|
|
|
|
|
var pos = this._stack.indexOf(existingFn)
|
|
|
|
this._stack.splice(pos, 0, newFn)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Removes a function from the pipeline.
|
|
|
|
*
|
|
|
|
* @param {Function} fn The function to remove from the pipeline.
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.remove = function (fn) {
|
|
|
|
var pos = this._stack.indexOf(fn)
|
|
|
|
this._stack.splice(pos, 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Runs the current list of functions that make up the pipeline against the
|
|
|
|
* passed tokens.
|
|
|
|
*
|
|
|
|
* @param {Array} tokens The tokens to run through the pipeline.
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.run = function (tokens) {
|
|
|
|
var out = [],
|
|
|
|
tokenLength = tokens.length,
|
|
|
|
stackLength = this._stack.length
|
|
|
|
|
|
|
|
for (var i = 0; i < tokenLength; i++) {
|
|
|
|
var token = tokens[i]
|
|
|
|
|
|
|
|
for (var j = 0; j < stackLength; j++) {
|
|
|
|
token = this._stack[j](token, i, tokens)
|
|
|
|
if (token === void 0) break
|
|
|
|
};
|
|
|
|
|
|
|
|
if (token !== void 0) out.push(token)
|
|
|
|
};
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a representation of the pipeline ready for serialisation.
|
|
|
|
*
|
|
|
|
* Logs a warning if the function has not been registered.
|
|
|
|
*
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf Pipeline
|
|
|
|
*/
|
|
|
|
lunr.Pipeline.prototype.toJSON = function () {
|
|
|
|
return this._stack.map(function (fn) {
|
|
|
|
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
|
|
|
|
|
|
|
|
return fn.label
|
|
|
|
})
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.Vector
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.Vectors wrap arrays and add vector related operations for the array
|
|
|
|
* elements.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
* @param {Array} elements Elements that make up the vector.
|
|
|
|
*/
|
|
|
|
lunr.Vector = function (elements) {
|
|
|
|
this.elements = elements
|
|
|
|
|
|
|
|
for (var i = 0; i < elements.length; i++) {
|
|
|
|
if (!(i in this.elements)) this.elements[i] = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates the magnitude of this vector.
|
|
|
|
*
|
|
|
|
* @returns {Number}
|
|
|
|
* @memberOf Vector
|
|
|
|
*/
|
|
|
|
lunr.Vector.prototype.magnitude = function () {
|
|
|
|
if (this._magnitude) return this._magnitude
|
|
|
|
|
|
|
|
var sumOfSquares = 0,
|
|
|
|
elems = this.elements,
|
|
|
|
len = elems.length,
|
|
|
|
el
|
|
|
|
|
|
|
|
for (var i = 0; i < len; i++) {
|
|
|
|
el = elems[i]
|
|
|
|
sumOfSquares += el * el
|
|
|
|
};
|
|
|
|
|
|
|
|
return this._magnitude = Math.sqrt(sumOfSquares)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates the dot product of this vector and another vector.
|
|
|
|
*
|
|
|
|
* @param {lunr.Vector} otherVector The vector to compute the dot product with.
|
|
|
|
* @returns {Number}
|
|
|
|
* @memberOf Vector
|
|
|
|
*/
|
|
|
|
lunr.Vector.prototype.dot = function (otherVector) {
|
|
|
|
var elem1 = this.elements,
|
|
|
|
elem2 = otherVector.elements,
|
|
|
|
length = elem1.length,
|
|
|
|
dotProduct = 0
|
|
|
|
|
|
|
|
for (var i = 0; i < length; i++) {
|
|
|
|
dotProduct += elem1[i] * elem2[i]
|
|
|
|
};
|
|
|
|
|
|
|
|
return dotProduct
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates the cosine similarity between this vector and another
|
|
|
|
* vector.
|
|
|
|
*
|
|
|
|
* @param {lunr.Vector} otherVector The other vector to calculate the
|
|
|
|
* similarity with.
|
|
|
|
* @returns {Number}
|
|
|
|
* @memberOf Vector
|
|
|
|
*/
|
|
|
|
lunr.Vector.prototype.similarity = function (otherVector) {
|
|
|
|
return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts this vector back into an array.
|
|
|
|
*
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf Vector
|
|
|
|
*/
|
|
|
|
lunr.Vector.prototype.toArray = function () {
|
|
|
|
return this.elements
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.SortedSet
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.SortedSets are used to maintain an array of uniq values in a sorted
|
|
|
|
* order.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
*/
|
|
|
|
lunr.SortedSet = function () {
|
|
|
|
this.length = 0
|
|
|
|
this.elements = []
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads a previously serialised sorted set.
|
|
|
|
*
|
|
|
|
* @param {Array} serialisedData The serialised set to load.
|
|
|
|
* @returns {lunr.SortedSet}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.load = function (serialisedData) {
|
|
|
|
var set = new this
|
|
|
|
|
|
|
|
set.elements = serialisedData
|
|
|
|
set.length = serialisedData.length
|
|
|
|
|
|
|
|
return set
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Inserts new items into the set in the correct position to maintain the
|
|
|
|
* order.
|
|
|
|
*
|
|
|
|
* @param {Object} The objects to add to this set.
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.add = function () {
|
|
|
|
Array.prototype.slice.call(arguments).forEach(function (element) {
|
|
|
|
if (~this.indexOf(element)) return
|
|
|
|
this.elements.splice(this.locationFor(element), 0, element)
|
|
|
|
}, this)
|
|
|
|
|
|
|
|
this.length = this.elements.length
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts this sorted set into an array.
|
|
|
|
*
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.toArray = function () {
|
|
|
|
return this.elements.slice()
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new array with the results of calling a provided function on every
|
|
|
|
* element in this sorted set.
|
|
|
|
*
|
|
|
|
* Delegates to Array.prototype.map and has the same signature.
|
|
|
|
*
|
|
|
|
* @param {Function} fn The function that is called on each element of the
|
|
|
|
* set.
|
|
|
|
* @param {Object} ctx An optional object that can be used as the context
|
|
|
|
* for the function fn.
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.map = function (fn, ctx) {
|
|
|
|
return this.elements.map(fn, ctx)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Executes a provided function once per sorted set element.
|
|
|
|
*
|
|
|
|
* Delegates to Array.prototype.forEach and has the same signature.
|
|
|
|
*
|
|
|
|
* @param {Function} fn The function that is called on each element of the
|
|
|
|
* set.
|
|
|
|
* @param {Object} ctx An optional object that can be used as the context
|
|
|
|
* @memberOf SortedSet
|
|
|
|
* for the function fn.
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.forEach = function (fn, ctx) {
|
|
|
|
return this.elements.forEach(fn, ctx)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the index at which a given element can be found in the
|
|
|
|
* sorted set, or -1 if it is not present.
|
|
|
|
*
|
|
|
|
* @param {Object} elem The object to locate in the sorted set.
|
|
|
|
* @param {Number} start An optional index at which to start searching from
|
|
|
|
* within the set.
|
|
|
|
* @param {Number} end An optional index at which to stop search from within
|
|
|
|
* the set.
|
|
|
|
* @returns {Number}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.indexOf = function (elem, start, end) {
|
|
|
|
var start = start || 0,
|
|
|
|
end = end || this.elements.length,
|
|
|
|
sectionLength = end - start,
|
|
|
|
pivot = start + Math.floor(sectionLength / 2),
|
|
|
|
pivotElem = this.elements[pivot]
|
|
|
|
|
|
|
|
if (sectionLength <= 1) {
|
|
|
|
if (pivotElem === elem) {
|
|
|
|
return pivot
|
|
|
|
} else {
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pivotElem < elem) return this.indexOf(elem, pivot, end)
|
|
|
|
if (pivotElem > elem) return this.indexOf(elem, start, pivot)
|
|
|
|
if (pivotElem === elem) return pivot
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the position within the sorted set that an element should be
|
|
|
|
* inserted at to maintain the current order of the set.
|
|
|
|
*
|
|
|
|
* This function assumes that the element to search for does not already exist
|
|
|
|
* in the sorted set.
|
|
|
|
*
|
|
|
|
* @param {Object} elem The elem to find the position for in the set
|
|
|
|
* @param {Number} start An optional index at which to start searching from
|
|
|
|
* within the set.
|
|
|
|
* @param {Number} end An optional index at which to stop search from within
|
|
|
|
* the set.
|
|
|
|
* @returns {Number}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.locationFor = function (elem, start, end) {
|
|
|
|
var start = start || 0,
|
|
|
|
end = end || this.elements.length,
|
|
|
|
sectionLength = end - start,
|
|
|
|
pivot = start + Math.floor(sectionLength / 2),
|
|
|
|
pivotElem = this.elements[pivot]
|
|
|
|
|
|
|
|
if (sectionLength <= 1) {
|
|
|
|
if (pivotElem > elem) return pivot
|
|
|
|
if (pivotElem < elem) return pivot + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pivotElem < elem) return this.locationFor(elem, pivot, end)
|
|
|
|
if (pivotElem > elem) return this.locationFor(elem, start, pivot)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new lunr.SortedSet that contains the elements in the intersection
|
|
|
|
* of this set and the passed set.
|
|
|
|
*
|
|
|
|
* @param {lunr.SortedSet} otherSet The set to intersect with this set.
|
|
|
|
* @returns {lunr.SortedSet}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.intersect = function (otherSet) {
|
|
|
|
var intersectSet = new lunr.SortedSet,
|
|
|
|
i = 0, j = 0,
|
|
|
|
a_len = this.length, b_len = otherSet.length,
|
|
|
|
a = this.elements, b = otherSet.elements
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
if (i > a_len - 1 || j > b_len - 1) break
|
|
|
|
|
|
|
|
if (a[i] === b[j]) {
|
|
|
|
intersectSet.add(a[i])
|
|
|
|
i++, j++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if (a[i] < b[j]) {
|
|
|
|
i++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if (a[i] > b[j]) {
|
|
|
|
j++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
return intersectSet
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Makes a copy of this set
|
|
|
|
*
|
|
|
|
* @returns {lunr.SortedSet}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.clone = function () {
|
|
|
|
var clone = new lunr.SortedSet
|
|
|
|
|
|
|
|
clone.elements = this.toArray()
|
|
|
|
clone.length = clone.elements.length
|
|
|
|
|
|
|
|
return clone
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new lunr.SortedSet that contains the elements in the union
|
|
|
|
* of this set and the passed set.
|
|
|
|
*
|
|
|
|
* @param {lunr.SortedSet} otherSet The set to union with this set.
|
|
|
|
* @returns {lunr.SortedSet}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.union = function (otherSet) {
|
|
|
|
var longSet, shortSet, unionSet
|
|
|
|
|
|
|
|
if (this.length >= otherSet.length) {
|
|
|
|
longSet = this, shortSet = otherSet
|
|
|
|
} else {
|
|
|
|
longSet = otherSet, shortSet = this
|
|
|
|
}
|
|
|
|
|
|
|
|
unionSet = longSet.clone()
|
|
|
|
|
|
|
|
unionSet.add.apply(unionSet, shortSet.toArray())
|
|
|
|
|
|
|
|
return unionSet
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a representation of the sorted set ready for serialisation.
|
|
|
|
*
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf SortedSet
|
|
|
|
*/
|
|
|
|
lunr.SortedSet.prototype.toJSON = function () {
|
|
|
|
return this.toArray()
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.Index
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.Index is object that manages a search index. It contains the indexes
|
|
|
|
* and stores all the tokens and document lookups. It also provides the main
|
|
|
|
* user facing API for the library.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
*/
|
|
|
|
lunr.Index = function () {
|
|
|
|
this._fields = []
|
|
|
|
this._ref = 'id'
|
|
|
|
this.pipeline = new lunr.Pipeline
|
|
|
|
this.documentStore = new lunr.Store
|
|
|
|
this.tokenStore = new lunr.TokenStore
|
|
|
|
this.corpusTokens = new lunr.SortedSet
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads a previously serialised index.
|
|
|
|
*
|
|
|
|
* Issues a warning if the index being imported was serialised
|
|
|
|
* by a different version of lunr.
|
|
|
|
*
|
|
|
|
* @param {Object} serialisedData The serialised set to load.
|
|
|
|
* @returns {lunr.Index}
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.load = function (serialisedData) {
|
|
|
|
if (serialisedData.version !== lunr.version && console && console.warn) {
|
|
|
|
console.warn('version mismatch: current ' + lunr.version + ' importing ' + serialisedData.version)
|
|
|
|
}
|
|
|
|
|
|
|
|
var idx = new this
|
|
|
|
|
|
|
|
idx._fields = serialisedData.fields
|
|
|
|
idx._ref = serialisedData.ref
|
|
|
|
|
|
|
|
idx.documentStore = lunr.Store.load(serialisedData.documentStore)
|
|
|
|
idx.tokenStore = lunr.TokenStore.load(serialisedData.tokenStore)
|
|
|
|
idx.corpusTokens = lunr.SortedSet.load(serialisedData.corpusTokens)
|
|
|
|
idx.pipeline = lunr.Pipeline.load(serialisedData.pipeline)
|
|
|
|
|
|
|
|
return idx
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds a field to the list of fields that will be searchable within documents
|
|
|
|
* in the index.
|
|
|
|
*
|
|
|
|
* An optional boost param can be passed to affect how much tokens in this field
|
|
|
|
* rank in search results, by default the boost value is 1.
|
|
|
|
*
|
|
|
|
* Fields should be added before any documents are added to the index, fields
|
|
|
|
* that are added after documents are added to the index will only apply to new
|
|
|
|
* documents added to the index.
|
|
|
|
*
|
|
|
|
* @param {String} fieldName The name of the field within the document that
|
|
|
|
* should be indexed
|
|
|
|
* @param {Number} boost An optional boost that can be applied to terms in this
|
|
|
|
* field.
|
|
|
|
* @returns {lunr.Index}
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.field = function (fieldName, opts) {
|
|
|
|
var opts = opts || {},
|
|
|
|
field = { name: fieldName, boost: opts.boost || 1 }
|
|
|
|
|
|
|
|
this._fields.push(field)
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets the property used to uniquely identify documents added to the index,
|
|
|
|
* by default this property is 'id'.
|
|
|
|
*
|
|
|
|
* This should only be changed before adding documents to the index, changing
|
|
|
|
* the ref property without resetting the index can lead to unexpected results.
|
|
|
|
*
|
|
|
|
* @param {String} refName The property to use to uniquely identify the
|
|
|
|
* documents in the index.
|
|
|
|
* @returns {lunr.Index}
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.ref = function (refName) {
|
|
|
|
this._ref = refName
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add a document to the index.
|
|
|
|
*
|
|
|
|
* This is the way new documents enter the index, this function will run the
|
|
|
|
* fields from the document through the index's pipeline and then add it to
|
|
|
|
* the index, it will then show up in search results.
|
|
|
|
*
|
|
|
|
* @param {Object} doc The document to add to the index.
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.add = function (doc) {
|
|
|
|
var docTokens = {},
|
|
|
|
allDocumentTokens = new lunr.SortedSet,
|
|
|
|
docRef = doc[this._ref]
|
|
|
|
|
|
|
|
this._fields.forEach(function (field) {
|
|
|
|
var fieldTokens = this.pipeline.run(lunr.tokenizer(doc[field.name]))
|
|
|
|
|
|
|
|
docTokens[field.name] = fieldTokens
|
|
|
|
lunr.SortedSet.prototype.add.apply(allDocumentTokens, fieldTokens)
|
|
|
|
}, this)
|
|
|
|
|
|
|
|
this.documentStore.set(docRef, allDocumentTokens)
|
|
|
|
lunr.SortedSet.prototype.add.apply(this.corpusTokens, allDocumentTokens.toArray())
|
|
|
|
|
|
|
|
for (var i = 0; i < allDocumentTokens.length; i++) {
|
|
|
|
var token = allDocumentTokens.elements[i]
|
|
|
|
var tf = this._fields.reduce(function (memo, field) {
|
|
|
|
var tokenCount = docTokens[field.name].filter(function (t) { return t === token }).length,
|
|
|
|
fieldLength = docTokens[field.name].length
|
|
|
|
|
|
|
|
return memo + (tokenCount / fieldLength * field.boost)
|
|
|
|
}, 0)
|
|
|
|
|
|
|
|
this.tokenStore.add(token, { ref: docRef, tf: tf })
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Removes a document from the index.
|
|
|
|
*
|
|
|
|
* To make sure documents no longer show up in search results they can be
|
|
|
|
* removed from the index using this method.
|
|
|
|
*
|
|
|
|
* The document passed only needs to have the same ref property value as the
|
|
|
|
* document that was added to the index, they could be completely different
|
|
|
|
* objects.
|
|
|
|
*
|
|
|
|
* @param {Object} doc The document to remove from the index.
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.remove = function (doc) {
|
2013-05-12 15:19:25 +00:00
|
|
|
var docRef = doc[this._ref]
|
|
|
|
|
|
|
|
if (!this.documentStore.has(docRef)) return
|
|
|
|
|
|
|
|
var docTokens = this.documentStore.get(docRef)
|
2013-05-03 05:31:51 +00:00
|
|
|
|
|
|
|
this.documentStore.remove(docRef)
|
|
|
|
|
|
|
|
docTokens.forEach(function (token) {
|
|
|
|
this.tokenStore.remove(token, docRef)
|
|
|
|
}, this)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Updates a document in the index.
|
|
|
|
*
|
|
|
|
* When a document contained within the index gets updated, fields changed,
|
|
|
|
* added or removed, to make sure it correctly matched against search queries,
|
|
|
|
* it should be updated in the index.
|
|
|
|
*
|
|
|
|
* This method is just a wrapper around `remove` and `add`
|
|
|
|
*
|
|
|
|
* @param {Object} doc The document to update in the index.
|
|
|
|
* @see Index.prototype.remove
|
|
|
|
* @see Index.prototype.add
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.update = function (doc) {
|
|
|
|
this.remove(doc)
|
|
|
|
this.add(doc)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates the inverse document frequency for a token within the index.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to calculate the idf of.
|
|
|
|
* @see Index.prototype.idf
|
|
|
|
* @private
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.idf = function (term) {
|
|
|
|
var documentFrequency = Object.keys(this.tokenStore.get(term)).length
|
|
|
|
|
|
|
|
if (documentFrequency === 0) {
|
|
|
|
return 1
|
|
|
|
} else {
|
|
|
|
return 1 + Math.log(this.tokenStore.length / documentFrequency)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Searches the index using the passed query.
|
|
|
|
*
|
|
|
|
* Queries should be a string, multiple words are allowed and will lead to an
|
|
|
|
* AND based query, e.g. `idx.search('foo bar')` will run a search for
|
|
|
|
* documents containing both 'foo' and 'bar'.
|
|
|
|
*
|
|
|
|
* All query tokens are passed through the same pipeline that document tokens
|
|
|
|
* are passed through, so any language processing involved will be run on every
|
|
|
|
* query term.
|
|
|
|
*
|
|
|
|
* Each query term is expanded, so that the term 'he' might be expanded to
|
|
|
|
* 'hello' and 'help' if those terms were already included in the index.
|
|
|
|
*
|
|
|
|
* Matching documents are returned as an array of objects, each object contains
|
|
|
|
* the matching document ref, as set for this index, and the similarity score
|
|
|
|
* for this document against the query.
|
|
|
|
*
|
|
|
|
* @param {String} query The query to search the index with.
|
|
|
|
* @returns {Object}
|
|
|
|
* @see Index.prototype.idf
|
|
|
|
* @see Index.prototype.documentVector
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.search = function (query) {
|
|
|
|
var queryTokens = this.pipeline.run(lunr.tokenizer(query)),
|
|
|
|
queryArr = new Array (this.corpusTokens.length),
|
|
|
|
documentSets = [],
|
|
|
|
fieldBoosts = this._fields.reduce(function (memo, f) { return memo + f.boost }, 0)
|
|
|
|
|
|
|
|
var hasSomeToken = queryTokens.some(function (token) {
|
|
|
|
return this.tokenStore.has(token)
|
|
|
|
}, this)
|
|
|
|
|
|
|
|
if (!hasSomeToken) return []
|
|
|
|
|
|
|
|
queryTokens
|
|
|
|
.forEach(function (token, i, tokens) {
|
|
|
|
var tf = 1 / tokens.length * this._fields.length * fieldBoosts,
|
|
|
|
self = this
|
|
|
|
|
|
|
|
var set = this.tokenStore.expand(token).reduce(function (memo, key) {
|
|
|
|
var pos = self.corpusTokens.indexOf(key),
|
|
|
|
idf = self.idf(key),
|
|
|
|
exactMatchBoost = (key === token ? 10 : 1),
|
|
|
|
set = new lunr.SortedSet
|
|
|
|
|
|
|
|
// calculate the query tf-idf score for this token
|
|
|
|
// applying an exactMatchBoost to ensure these rank
|
|
|
|
// higher than expanded terms
|
|
|
|
if (pos > -1) queryArr[pos] = tf * idf * exactMatchBoost
|
|
|
|
|
|
|
|
// add all the documents that have this key into a set
|
|
|
|
Object.keys(self.tokenStore.get(key)).forEach(function (ref) { set.add(ref) })
|
|
|
|
|
|
|
|
return memo.union(set)
|
|
|
|
}, new lunr.SortedSet)
|
|
|
|
|
|
|
|
documentSets.push(set)
|
|
|
|
}, this)
|
|
|
|
|
|
|
|
var documentSet = documentSets.reduce(function (memo, set) {
|
|
|
|
return memo.intersect(set)
|
|
|
|
})
|
|
|
|
|
|
|
|
var queryVector = new lunr.Vector (queryArr)
|
|
|
|
|
|
|
|
return documentSet
|
|
|
|
.map(function (ref) {
|
|
|
|
return { ref: ref, score: queryVector.similarity(this.documentVector(ref)) }
|
|
|
|
}, this)
|
|
|
|
.sort(function (a, b) {
|
|
|
|
return b.score - a.score
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generates a vector containing all the tokens in the document matching the
|
|
|
|
* passed documentRef.
|
|
|
|
*
|
|
|
|
* The vector contains the tf-idf score for each token contained in the
|
|
|
|
* document with the passed documentRef. The vector will contain an element
|
|
|
|
* for every token in the indexes corpus, if the document does not contain that
|
|
|
|
* token the element will be 0.
|
|
|
|
*
|
|
|
|
* @param {Object} documentRef The ref to find the document with.
|
|
|
|
* @returns {lunr.Vector}
|
|
|
|
* @private
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.documentVector = function (documentRef) {
|
|
|
|
var documentTokens = this.documentStore.get(documentRef),
|
|
|
|
documentTokensLength = documentTokens.length,
|
|
|
|
documentArr = new Array (this.corpusTokens.length)
|
|
|
|
|
|
|
|
for (var i = 0; i < documentTokensLength; i++) {
|
|
|
|
var token = documentTokens.elements[i],
|
|
|
|
tf = this.tokenStore.get(token)[documentRef].tf,
|
|
|
|
idf = this.idf(token)
|
|
|
|
|
|
|
|
documentArr[this.corpusTokens.indexOf(token)] = tf * idf
|
|
|
|
};
|
|
|
|
|
|
|
|
return new lunr.Vector (documentArr)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a representation of the index ready for serialisation.
|
|
|
|
*
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf Index
|
|
|
|
*/
|
|
|
|
lunr.Index.prototype.toJSON = function () {
|
|
|
|
return {
|
|
|
|
version: lunr.version,
|
|
|
|
fields: this._fields,
|
|
|
|
ref: this._ref,
|
|
|
|
documentStore: this.documentStore.toJSON(),
|
|
|
|
tokenStore: this.tokenStore.toJSON(),
|
|
|
|
corpusTokens: this.corpusTokens.toJSON(),
|
|
|
|
pipeline: this.pipeline.toJSON()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*!
|
|
|
|
* lunr.Store
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.Store is a simple key-value store used for storing sets of tokens for
|
|
|
|
* documents stored in index.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
* @module
|
|
|
|
*/
|
|
|
|
lunr.Store = function () {
|
|
|
|
this.store = {}
|
|
|
|
this.length = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads a previously serialised store
|
|
|
|
*
|
|
|
|
* @param {Object} serialisedData The serialised store to load.
|
|
|
|
* @returns {lunr.Store}
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.load = function (serialisedData) {
|
|
|
|
var store = new this
|
|
|
|
|
|
|
|
store.length = serialisedData.length
|
|
|
|
store.store = Object.keys(serialisedData.store).reduce(function (memo, key) {
|
|
|
|
memo[key] = lunr.SortedSet.load(serialisedData.store[key])
|
|
|
|
return memo
|
|
|
|
}, {})
|
|
|
|
|
|
|
|
return store
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Stores the given tokens in the store against the given id.
|
|
|
|
*
|
|
|
|
* @param {Object} id The key used to store the tokens against.
|
|
|
|
* @param {Object} tokens The tokens to store against the key.
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.prototype.set = function (id, tokens) {
|
|
|
|
this.store[id] = tokens
|
|
|
|
this.length = Object.keys(this.store).length
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieves the tokens from the store for a given key.
|
|
|
|
*
|
|
|
|
* @param {Object} id The key to lookup and retrieve from the store.
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.prototype.get = function (id) {
|
|
|
|
return this.store[id]
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks whether the store contains a key.
|
|
|
|
*
|
|
|
|
* @param {Object} id The id to look up in the store.
|
|
|
|
* @returns {Boolean}
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.prototype.has = function (id) {
|
|
|
|
return id in this.store
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Removes the value for a key in the store.
|
|
|
|
*
|
|
|
|
* @param {Object} id The id to remove from the store.
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.prototype.remove = function (id) {
|
|
|
|
if (!this.has(id)) return
|
|
|
|
|
|
|
|
delete this.store[id]
|
|
|
|
this.length--
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a representation of the store ready for serialisation.
|
|
|
|
*
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf Store
|
|
|
|
*/
|
|
|
|
lunr.Store.prototype.toJSON = function () {
|
|
|
|
return {
|
|
|
|
store: this.store,
|
|
|
|
length: this.length
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*!
|
|
|
|
* lunr.stemmer
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.stemmer is an english language stemmer, this is a JavaScript
|
|
|
|
* implementation of the PorterStemmer taken from http://tartaurs.org/~martin
|
|
|
|
*
|
|
|
|
* @module
|
|
|
|
* @param {String} str The string to stem
|
|
|
|
* @returns {String}
|
|
|
|
* @see lunr.Pipeline
|
|
|
|
*/
|
|
|
|
lunr.stemmer = (function(){
|
|
|
|
var step2list = {
|
|
|
|
"ational" : "ate",
|
|
|
|
"tional" : "tion",
|
|
|
|
"enci" : "ence",
|
|
|
|
"anci" : "ance",
|
|
|
|
"izer" : "ize",
|
|
|
|
"bli" : "ble",
|
|
|
|
"alli" : "al",
|
|
|
|
"entli" : "ent",
|
|
|
|
"eli" : "e",
|
|
|
|
"ousli" : "ous",
|
|
|
|
"ization" : "ize",
|
|
|
|
"ation" : "ate",
|
|
|
|
"ator" : "ate",
|
|
|
|
"alism" : "al",
|
|
|
|
"iveness" : "ive",
|
|
|
|
"fulness" : "ful",
|
|
|
|
"ousness" : "ous",
|
|
|
|
"aliti" : "al",
|
|
|
|
"iviti" : "ive",
|
|
|
|
"biliti" : "ble",
|
|
|
|
"logi" : "log"
|
|
|
|
},
|
|
|
|
|
|
|
|
step3list = {
|
|
|
|
"icate" : "ic",
|
|
|
|
"ative" : "",
|
|
|
|
"alize" : "al",
|
|
|
|
"iciti" : "ic",
|
|
|
|
"ical" : "ic",
|
|
|
|
"ful" : "",
|
|
|
|
"ness" : ""
|
|
|
|
},
|
|
|
|
|
|
|
|
c = "[^aeiou]", // consonant
|
|
|
|
v = "[aeiouy]", // vowel
|
|
|
|
C = c + "[^aeiouy]*", // consonant sequence
|
|
|
|
V = v + "[aeiou]*", // vowel sequence
|
|
|
|
|
|
|
|
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
|
|
|
|
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
|
|
|
|
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
|
|
|
|
s_v = "^(" + C + ")?" + v; // vowel in stem
|
|
|
|
|
|
|
|
return function (w) {
|
|
|
|
var stem,
|
|
|
|
suffix,
|
|
|
|
firstch,
|
|
|
|
re,
|
|
|
|
re2,
|
|
|
|
re3,
|
|
|
|
re4;
|
|
|
|
|
|
|
|
if (w.length < 3) { return w; }
|
|
|
|
|
|
|
|
firstch = w.substr(0,1);
|
|
|
|
if (firstch == "y") {
|
|
|
|
w = firstch.toUpperCase() + w.substr(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 1a
|
|
|
|
re = /^(.+?)(ss|i)es$/;
|
|
|
|
re2 = /^(.+?)([^s])s$/;
|
|
|
|
|
|
|
|
if (re.test(w)) { w = w.replace(re,"$1$2"); }
|
|
|
|
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
|
|
|
|
|
|
|
|
// Step 1b
|
|
|
|
re = /^(.+?)eed$/;
|
|
|
|
re2 = /^(.+?)(ed|ing)$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
re = new RegExp(mgr0);
|
|
|
|
if (re.test(fp[1])) {
|
|
|
|
re = /.$/;
|
|
|
|
w = w.replace(re,"");
|
|
|
|
}
|
|
|
|
} else if (re2.test(w)) {
|
|
|
|
var fp = re2.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
re2 = new RegExp(s_v);
|
|
|
|
if (re2.test(stem)) {
|
|
|
|
w = stem;
|
|
|
|
re2 = /(at|bl|iz)$/;
|
|
|
|
re3 = new RegExp("([^aeiouylsz])\\1$");
|
|
|
|
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
|
|
|
if (re2.test(w)) { w = w + "e"; }
|
|
|
|
else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
|
|
|
|
else if (re4.test(w)) { w = w + "e"; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 1c
|
|
|
|
re = /^(.+?)y$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
re = new RegExp(s_v);
|
|
|
|
if (re.test(stem)) { w = stem + "i"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 2
|
|
|
|
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
suffix = fp[2];
|
|
|
|
re = new RegExp(mgr0);
|
|
|
|
if (re.test(stem)) {
|
|
|
|
w = stem + step2list[suffix];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 3
|
|
|
|
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
suffix = fp[2];
|
|
|
|
re = new RegExp(mgr0);
|
|
|
|
if (re.test(stem)) {
|
|
|
|
w = stem + step3list[suffix];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 4
|
|
|
|
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
|
|
|
|
re2 = /^(.+?)(s|t)(ion)$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
re = new RegExp(mgr1);
|
|
|
|
if (re.test(stem)) {
|
|
|
|
w = stem;
|
|
|
|
}
|
|
|
|
} else if (re2.test(w)) {
|
|
|
|
var fp = re2.exec(w);
|
|
|
|
stem = fp[1] + fp[2];
|
|
|
|
re2 = new RegExp(mgr1);
|
|
|
|
if (re2.test(stem)) {
|
|
|
|
w = stem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 5
|
|
|
|
re = /^(.+?)e$/;
|
|
|
|
if (re.test(w)) {
|
|
|
|
var fp = re.exec(w);
|
|
|
|
stem = fp[1];
|
|
|
|
re = new RegExp(mgr1);
|
|
|
|
re2 = new RegExp(meq1);
|
|
|
|
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
|
|
|
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
|
|
|
|
w = stem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
re = /ll$/;
|
|
|
|
re2 = new RegExp(mgr1);
|
|
|
|
if (re.test(w) && re2.test(w)) {
|
|
|
|
re = /.$/;
|
|
|
|
w = w.replace(re,"");
|
|
|
|
}
|
|
|
|
|
|
|
|
// and turn initial Y back to y
|
|
|
|
|
|
|
|
if (firstch == "y") {
|
|
|
|
w = firstch.toLowerCase() + w.substr(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return w;
|
|
|
|
}
|
|
|
|
})();
|
|
|
|
|
|
|
|
lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
|
|
|
|
/*!
|
|
|
|
* lunr.stopWordFilter
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.stopWordFilter is an English language stop word list filter, any words
|
|
|
|
* contained in the list will not be passed through the filter.
|
|
|
|
*
|
|
|
|
* This is intended to be used in the Pipeline. If the token does not pass the
|
|
|
|
* filter then undefined will be returned.
|
|
|
|
*
|
|
|
|
* @module
|
|
|
|
* @param {String} token The token to pass through the filter
|
|
|
|
* @returns {String}
|
|
|
|
* @see lunr.Pipeline
|
|
|
|
*/
|
|
|
|
lunr.stopWordFilter = function (token) {
|
|
|
|
if (lunr.stopWordFilter.stopWords.indexOf(token) === -1) return token
|
|
|
|
}
|
|
|
|
|
|
|
|
lunr.stopWordFilter.stopWords = new lunr.SortedSet
|
|
|
|
lunr.stopWordFilter.stopWords.length = 119
|
|
|
|
lunr.stopWordFilter.stopWords.elements = [
|
|
|
|
"a",
|
|
|
|
"able",
|
|
|
|
"about",
|
|
|
|
"across",
|
|
|
|
"after",
|
|
|
|
"all",
|
|
|
|
"almost",
|
|
|
|
"also",
|
|
|
|
"am",
|
|
|
|
"among",
|
|
|
|
"an",
|
|
|
|
"and",
|
|
|
|
"any",
|
|
|
|
"are",
|
|
|
|
"as",
|
|
|
|
"at",
|
|
|
|
"be",
|
|
|
|
"because",
|
|
|
|
"been",
|
|
|
|
"but",
|
|
|
|
"by",
|
|
|
|
"can",
|
|
|
|
"cannot",
|
|
|
|
"could",
|
|
|
|
"dear",
|
|
|
|
"did",
|
|
|
|
"do",
|
|
|
|
"does",
|
|
|
|
"either",
|
|
|
|
"else",
|
|
|
|
"ever",
|
|
|
|
"every",
|
|
|
|
"for",
|
|
|
|
"from",
|
|
|
|
"get",
|
|
|
|
"got",
|
|
|
|
"had",
|
|
|
|
"has",
|
|
|
|
"have",
|
|
|
|
"he",
|
|
|
|
"her",
|
|
|
|
"hers",
|
|
|
|
"him",
|
|
|
|
"his",
|
|
|
|
"how",
|
|
|
|
"however",
|
|
|
|
"i",
|
|
|
|
"if",
|
|
|
|
"in",
|
|
|
|
"into",
|
|
|
|
"is",
|
|
|
|
"it",
|
|
|
|
"its",
|
|
|
|
"just",
|
|
|
|
"least",
|
|
|
|
"let",
|
|
|
|
"like",
|
|
|
|
"likely",
|
|
|
|
"may",
|
|
|
|
"me",
|
|
|
|
"might",
|
|
|
|
"most",
|
|
|
|
"must",
|
|
|
|
"my",
|
|
|
|
"neither",
|
|
|
|
"no",
|
|
|
|
"nor",
|
|
|
|
"not",
|
|
|
|
"of",
|
|
|
|
"off",
|
|
|
|
"often",
|
|
|
|
"on",
|
|
|
|
"only",
|
|
|
|
"or",
|
|
|
|
"other",
|
|
|
|
"our",
|
|
|
|
"own",
|
|
|
|
"rather",
|
|
|
|
"said",
|
|
|
|
"say",
|
|
|
|
"says",
|
|
|
|
"she",
|
|
|
|
"should",
|
|
|
|
"since",
|
|
|
|
"so",
|
|
|
|
"some",
|
|
|
|
"than",
|
|
|
|
"that",
|
|
|
|
"the",
|
|
|
|
"their",
|
|
|
|
"them",
|
|
|
|
"then",
|
|
|
|
"there",
|
|
|
|
"these",
|
|
|
|
"they",
|
|
|
|
"this",
|
|
|
|
"tis",
|
|
|
|
"to",
|
|
|
|
"too",
|
|
|
|
"twas",
|
|
|
|
"us",
|
|
|
|
"wants",
|
|
|
|
"was",
|
|
|
|
"we",
|
|
|
|
"were",
|
|
|
|
"what",
|
|
|
|
"when",
|
|
|
|
"where",
|
|
|
|
"which",
|
|
|
|
"while",
|
|
|
|
"who",
|
|
|
|
"whom",
|
|
|
|
"why",
|
|
|
|
"will",
|
|
|
|
"with",
|
|
|
|
"would",
|
|
|
|
"yet",
|
|
|
|
"you",
|
|
|
|
"your"
|
|
|
|
]
|
|
|
|
|
|
|
|
lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
|
|
|
|
/*!
|
|
|
|
* lunr.stemmer
|
|
|
|
* Copyright (C) 2013 Oliver Nightingale
|
|
|
|
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lunr.TokenStore is used for efficient storing and lookup of the reverse
|
|
|
|
* index of token to document ref.
|
|
|
|
*
|
|
|
|
* @constructor
|
|
|
|
*/
|
|
|
|
lunr.TokenStore = function () {
|
|
|
|
this.root = { docs: {} }
|
|
|
|
this.length = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads a previously serialised token store
|
|
|
|
*
|
|
|
|
* @param {Object} serialisedData The serialised token store to load.
|
|
|
|
* @returns {lunr.TokenStore}
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.load = function (serialisedData) {
|
|
|
|
var store = new this
|
|
|
|
|
|
|
|
store.root = serialisedData.root
|
|
|
|
store.length = serialisedData.length
|
|
|
|
|
|
|
|
return store
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds a new token doc pair to the store.
|
|
|
|
*
|
|
|
|
* By default this function starts at the root of the current store, however
|
|
|
|
* it can start at any node of any token store if required.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to store the doc under
|
|
|
|
* @param {Object} doc The doc to store against the token
|
|
|
|
* @param {Object} root An optional node at which to start looking for the
|
|
|
|
* correct place to enter the doc, by default the root of this lunr.TokenStore
|
|
|
|
* is used.
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.add = function (token, doc, root) {
|
|
|
|
var root = root || this.root,
|
|
|
|
key = token[0],
|
|
|
|
rest = token.slice(1)
|
|
|
|
|
|
|
|
if (!(key in root)) root[key] = {docs: {}}
|
|
|
|
|
|
|
|
if (rest.length === 0) {
|
|
|
|
root[key].docs[doc.ref] = doc
|
|
|
|
this.length += 1
|
|
|
|
return
|
|
|
|
} else {
|
|
|
|
return this.add(rest, doc, root[key])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks whether this key is contained within this lunr.TokenStore.
|
|
|
|
*
|
|
|
|
* By default this function starts at the root of the current store, however
|
|
|
|
* it can start at any node of any token store if required.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to check for
|
|
|
|
* @param {Object} root An optional node at which to start
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.has = function (token, root) {
|
|
|
|
var root = root || this.root,
|
|
|
|
key = token[0],
|
|
|
|
rest = token.slice(1)
|
|
|
|
|
|
|
|
if (!(key in root)) return false
|
|
|
|
|
|
|
|
if (rest.length === 0) {
|
|
|
|
return true
|
|
|
|
} else {
|
|
|
|
return this.has(rest, root[key])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieve a node from the token store for a given token.
|
|
|
|
*
|
|
|
|
* By default this function starts at the root of the current store, however
|
|
|
|
* it can start at any node of any token store if required.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to get the node for.
|
|
|
|
* @param {Object} root An optional node at which to start.
|
|
|
|
* @returns {Object}
|
|
|
|
* @see TokenStore.prototype.get
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.getNode = function (token, root) {
|
|
|
|
var root = root || this.root,
|
|
|
|
key = token[0],
|
|
|
|
rest = token.slice(1)
|
|
|
|
|
|
|
|
if (!(key in root)) return {}
|
|
|
|
|
|
|
|
if (rest.length === 0) {
|
|
|
|
return root[key]
|
|
|
|
} else {
|
|
|
|
return this.getNode(rest, root[key])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieve the documents for a node for the given token.
|
|
|
|
*
|
|
|
|
* By default this function starts at the root of the current store, however
|
|
|
|
* it can start at any node of any token store if required.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to get the documents for.
|
|
|
|
* @param {Object} root An optional node at which to start.
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.get = function (token, root) {
|
|
|
|
return this.getNode(token, root).docs || {}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove the document identified by ref from the token in the store.
|
|
|
|
*
|
|
|
|
* By default this function starts at the root of the current store, however
|
|
|
|
* it can start at any node of any token store if required.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to get the documents for.
|
|
|
|
* @param {String} ref The ref of the document to remove from this token.
|
|
|
|
* @param {Object} root An optional node at which to start.
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.remove = function (token, ref, root) {
|
|
|
|
var root = root || this.root,
|
|
|
|
key = token[0],
|
|
|
|
rest = token.slice(1)
|
|
|
|
|
|
|
|
if (!(key in root)) return
|
|
|
|
|
|
|
|
if (rest.length === 0) {
|
|
|
|
delete root[key].docs[ref]
|
|
|
|
} else {
|
|
|
|
return this.remove(rest, ref, root[key])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Find all the possible suffixes of the passed token using tokens
|
|
|
|
* currently in the store.
|
|
|
|
*
|
|
|
|
* @param {String} token The token to expand.
|
|
|
|
* @returns {Array}
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.expand = function (token, memo) {
|
|
|
|
var root = this.getNode(token),
|
|
|
|
docs = root.docs || {},
|
|
|
|
memo = memo || []
|
|
|
|
|
|
|
|
if (Object.keys(docs).length) memo.push(token)
|
|
|
|
|
|
|
|
Object.keys(root)
|
|
|
|
.forEach(function (key) {
|
|
|
|
if (key === 'docs') return
|
|
|
|
|
|
|
|
memo.concat(this.expand(token + key, memo))
|
|
|
|
}, this)
|
|
|
|
|
|
|
|
return memo
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a representation of the token store ready for serialisation.
|
|
|
|
*
|
|
|
|
* @returns {Object}
|
|
|
|
* @memberOf TokenStore
|
|
|
|
*/
|
|
|
|
lunr.TokenStore.prototype.toJSON = function () {
|
|
|
|
return {
|
|
|
|
root: this.root,
|
|
|
|
length: this.length
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|