SimHash implementation for detecting near-duplicate text using SipHash-2- function
npm install @counterrealist/simhash-wasmbash
npm install @counterrealis/simhash-wasm
`Usage
`typescript
const { SimHash } = require("@counterrealis/simhash-wasm");const simhash = new SimHash(3);
// Text to compare
const text1 = "khan academy";
const text2 = "khan academia";
// Compute BigInteger hashes
const bigIntHash1: BigInteger = simhash.compute(text1); // 182883240033146189889226648883436234289n
const bigIntHash2: BigInteger = simhash.compute(text2); // 188200070891594117632711953576407656125n
// Calculate similarity between BigInteger hashes
const bigIntSimilarity: number = simhash.similarity(bigIntHash1, bigIntHash2); // 0.8203125
// Compute hexadecimal hashes
const hexHash1: string = simhash.compute_hex(text1); // "899607e4844c4236a88584c4ca58a631"
const hexHash2: string = simhash.compute_hex(text2); // "8d9603e494480e34e8e7a5edfa58e6bd"
// Calculate similarity between hexadecimal hashes
const hexSimilarity: number = simhash.similarity_from_hex(hexHash1, hexHash2); // 0.8203125
// Free WebAssembly memory when done
simhash.free();
``