LICENSE)

Lightweight Full-Text Indexing and Searching Library.

Features

- Multiple fields full-text indexing and searching.
- Per-field score boosting.
- BM25 ranking function to rank
matching documents.
- Trie based dynamic
Inverted Index.
- Configurable tokenizer and term filter.
- Free text queries with query expansion.
- Small memory footprint.

Example

``js import { createIndex, indexAdd } from "ndx"; import { indexQuery } from "ndx/query";

const termFilter = (term) => term.toLowerCase();

function createDocumentIndex(fields) { //createIndex()creates an index data structure. // First argument specifies how many different fields we want to index. const index = createIndex( fields.length, // Tokenizer is a function that breaks text into words, phrases, symbols, // or other meaningful elements called tokens. (s) => s.split(" "), // Filter is a function that processes tokens and returns terms, terms are // used in Inverted Index to index documents. termFilter, ); //fieldGettersis an array with functions that will be used to retrieve // data from different fields. const fieldGetters = fields.map((f) => (doc) => doc[f.name]); //fieldBoostFactorsis an array of boost factors for each field, in this // example all fields will have identical weight. const fieldBoostFactors = fields.map(() => 1);

return { index, //add()will add documents to the index. add(doc) { indexAdd( index, fieldGetters, // Docum ent key, it can be an unique document id or a refernce to a // document if you want to store all documents in memory. doc.id, // Document. doc, ); }, //remove()will remove documents from the index. remove(id) { // When document is removed we are just marking document id as being // removed. Index data structure still contains references to the removed // document. indexRemove(index, removed, id); if (removed.size > 10) { //indexVacuum()removes all references to removed documents from the // index. indexVacuum(index, removed); } },

// search()will be used to perform queries. search(q) { return indexQuery( index, fieldBoostFactors, // BM25 ranking function constants: // BM25 k1 constant, controls non-linear term frequency normalization // (saturation). 1.2, // BM25 b constant, controls to what degree document length normalizes // tf values. 0.75, q, ); } }; }

// Create a document index that will index contentfield. const index = createDocumentIndex([{ name: "content" }]);

const docs = [ { "id": "1", "content": "Lorem ipsum dolor", }, { "id": "2", "content": "Lorem ipsum", } ];

// Add documents to the index. docs.forEach((d) => { index.add(d); });

// Perform a search query. index.search("Lorem"); // => [{ key: "2" , score: ... }, { key: "1", score: ... } ] // // document with an id"2" is ranked higher because it has a "content"// field with a less number of terms than document with an id"1".

index.search("dolor"); // => [{ key: "1", score: ... }]`

`$3`

ndx` library doesn't provide any tokenizers or filters. There are other
libraries that implement tokenizers, for example
Natural has a good collection of
tokenizers and stemmers.

License

MIT

Example

js
import { createIndex, indexAdd } from "ndx";
import { indexQuery } from "ndx/query";

const termFilter = (term) => term.toLowerCase();

function createDocumentIndex(fields) {
  //

createIndex()

 creates an index data structure.
  // First argument specifies how many different fields we want to index.
  const index = createIndex(
    fields.length,
    // Tokenizer is a function that breaks text into words, phrases, symbols,
    // or other meaningful elements called tokens.
    (s) => s.split(" "),
    // Filter is a function that processes tokens and returns terms, terms are
    // used in Inverted Index to index documents.
    termFilter,
  );
  //

fieldGetters

 is an array with functions that will be used to retrieve
  // data from different fields.
  const fieldGetters = fields.map((f) => (doc) => doc[f.name]);
  //

fieldBoostFactors

 is an array of boost factors for each field, in this
  // example all fields will have identical weight.
  const fieldBoostFactors = fields.map(() => 1);

  return {
    index,
    //

add()

 will add documents to the index.
    add(doc) {
      indexAdd(
        index,
        fieldGetters,
        // Docum  ent key, it can be an unique document id or a refernce to a
        // document if you want to store all documents in memory.
        doc.id,
        // Document.
        doc,
      );
    },
    //

remove()

 will remove documents from the index.
    remove(id) {
      // When document is removed we are just marking document id as being
      // removed. Index data structure still contains references to the removed
      // document.
      indexRemove(index, removed, id);
      if (removed.size > 10) {
        //

indexVacuum()

 removes all references to removed documents from the
        // index.
        indexVacuum(index, removed);
      }
    },

// search()

 will be used to perform queries.
    search(q) {
      return indexQuery(
        index,
        fieldBoostFactors,
        // BM25 ranking function constants:
        // BM25 k1 constant, controls non-linear term frequency normalization
        // (saturation).
        1.2,
        // BM25 b constant, controls to what degree document length normalizes
        // tf values.
        0.75,
        q,
      );
    }
  };
}

// Create a document index that will index content

 field.
const index = createDocumentIndex([{ name: "content" }]);

const docs = [
  {
    "id": "1",
    "content": "Lorem ipsum dolor",
  },
  {
    "id": "2",
    "content": "Lorem ipsum",
  }
];

// Add documents to the index.
docs.forEach((d) => { index.add(d); });

// Perform a search query.
index.search("Lorem");
// => [{ key: "2" , score: ... }, { key: "1", score: ... } ]
//
// document with an id

"2" is ranked higher because it has a "content"


// field with a less number of terms than document with an id

"1".

index.search("dolor");
// => [{ key: "1", score: ... }]

$3

ndx` library doesn't provide any tokenizers or filters. There are other
libraries that implement tokenizers, for example
Natural has a good collection of
tokenizers and stemmers.

ndx

ndx · ![GitHub license](https://github.com/ndx-search/ndx/blob/master/LICENSE)

Features

Example

$3

License

ndx

ndx · ![GitHub license](https://github.com/ndx-search/ndx/blob/master/LICENSE)

Features

Example

$3

License

Dist Tags

`$3`

`$3`