Public
Edited
Oct 28, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
SearchTree = {
// The alphabet whose keys represent entries.
const ALPHABET = "abcdefghijklmnopqrstuvwxyz";
// The maximum number of results for an entry.
const MAX_RESULTS_COUNT = 10;
// The number of bits needed to store an integer `n <= MAX_RESULTS_COUNT`.
const MAX_RESULTS_COUNT_BITS = Math.ceil(Math.log2(MAX_RESULTS_COUNT + 1));
// The number of bits needed to store the alphabet bitmap and the number of results.
const BITS_PER_NODE = ALPHABET.length + MAX_RESULTS_COUNT_BITS;
// The number of bytes needed to store the alphabet bitmap and the number of results.
const BYTES_PER_NODE = Math.ceil(BITS_PER_NODE / 8);

// The number of bytes needed to store the alphabet bitmap.
const BYTES_FOR_ALPHABET = Math.ceil(ALPHABET.length / 8);
// The number of bytes needed to store an integer `n <= MAX_RESULTS_COUNT`.
const BYTES_FOR_RESULTS_COUNT = Math.ceil(MAX_RESULTS_COUNT_BITS / 8);
// The number of bits to skip (using a shift) when reading the number of results.
const BITS_TO_SKIP_FOR_RESULTS_COUNT = ALPHABET.length % 8;

// The number of bytes needed to store one edge, i.e. an offset to another node.
const BYTES_PER_EDGE = 4; // 32 bits
// The number of bytes needed to store one result, i.e. the index of an input entry.
const BYTES_PER_RESULT = 4; // 32 bits

function buildSearchTree(entries) {
// Normalize input.
entries = entries.map(([k, v]) => [k.toLowerCase(), v]).sort((a, b) => a[0] - b[0]);
// Build tree.
const root = { _: [] };
let nodesCount = 1;
let edgesCount = 0;
let resultsCount = 0;
for (const [fullKey, value] of entries) {
let node = root;
for (let i = 0; i < fullKey.length; i++) {
const char = fullKey[i];
if (char in node) {
node = node[char];
} else {
nodesCount++;
edgesCount++;
node = node[char] = { _: [] };
}
}
node._.push(value);
resultsCount++;
}

// Encode tree to buffer.
const treeBuffer = new Uint8Array(
nodesCount * BYTES_PER_NODE + edgesCount * BYTES_PER_EDGE + resultsCount * BYTES_PER_RESULT,
);
const treeBufferView = new DataView(treeBuffer.buffer);
let currentOffset = 0;
const encodeSearchTree = (offsetOffset, node, textSoFar) => {
if (offsetOffset > 0) {
// Do not write offset for root node.
treeBufferView.setUint32(offsetOffset, currentOffset);
}

// Compute and write bits for the node.
let childrenCount = 0;
let nodeBits = 0n;
for (let i = 0; i < ALPHABET.length; i++) {
const char = ALPHABET[i];
if (char in node) {
nodeBits |= 1n << BigInt(i);
childrenCount++;
}
}

const resultsCount = node._.length;

if (resultsCount > MAX_RESULTS_COUNT) {
throw Object.assign(
new Error("number of results exceeds MAX_RESULTS_COUNT"),
{ resultsCount, MAX_RESULTS_COUNT, key: textSoFar },
);
}

nodeBits |= BigInt(resultsCount) << BigInt(ALPHABET.length);
treeBufferView.setBigUint64(currentOffset, nodeBits);
currentOffset += BYTES_PER_NODE;

// Make some room for children; these values will be filled later.
const childrenStartOffset = currentOffset;

currentOffset += childrenCount * BYTES_PER_EDGE;

// Write results.
for (const result of node._) {
treeBufferView.setUint32(currentOffset, result);
currentOffset += BYTES_PER_RESULT;
}

// Write children.
for (let i = 0; i < ALPHABET.length; i++) {
const char = ALPHABET[i];
if (char in node) {
encodeSearchTree(childrenStartOffset + i * BYTES_PER_EDGE, node[char], char);
}
}
};

encodeSearchTree(0, root, "");
return treeBuffer;
}

function search(tree, prefix, treeOffset, prefixOffset) {
if (prefix.length === prefixOffset) {
// Result found, collect results with BFS.
const queue = [treeOffset];
const results = [];

while (queue.length > 0) {
const nodeOffset = queue.pop();

// Add results.
const edgesCount = popcount(tree, nodeOffset, BYTES_FOR_ALPHABET, ALPHABET.length - BYTES_FOR_ALPHABET * 8);
const resultsCount = ALPHABET.length % 8 === 0
? getAnyUint(tree, nodeOffset + BYTES_FOR_ALPHABET, BYTES_FOR_RESULTS_COUNT)
: getAnyUint(tree, nodeOffset + BYTES_FOR_ALPHABET - 1, BYTES_FOR_RESULTS_COUNT) >> (ALPHABET.length % 8);
const resultsOffset = nodeOffset + edgesCount * BYTES_PER_EDGE;

for (let i = 0; i < resultsCount; i++) {
const resultOffset = resultsOffset + i * BYTES_PER_RESULT;
const result = tree.getUint32(resultOffset);

results.push(result);
}

// Enqueue children.
let childOffset = nodeOffset + BYTES_PER_NODE;

for (let i = 0; i < ALPHABET.length; i += 8) {
const iters = Math.min(i + 8, ALPHABET.length) - i;
const byte = tree.getUint8(nodeOffset + i / 8);

for (let j = 0; j < iters; j++) {
if ((1 << j) & byte) {
queue.push(childOffset);
childOffset += BYTES_PER_EDGE;
}
}
}
}

return results;
}

// Compute index of character to find.
const searchIndex = prefix[prefixOffset];
const searchIndexBytesToSkip = Math.floor(searchIndex / 8);
const searchIndexInByte = searchIndex % 8;

const hasSearchIndexChild = (tree.getUint8(treeOffset + searchIndexBytesToSkip) & (1 << searchIndexInByte)) !== 0;

if (!hasSearchIndexChild) {
return [];
}

const bitsToSkipAtEnd = ALPHABET.length - searchIndex;
const searchIndexInTree = popcount(tree, treeOffset, BYTES_FOR_ALPHABET - Math.floor(bitsToSkipAtEnd / 8), bitsToSkipAtEnd % 8);

return search(tree, prefix, searchIndexInTree, prefixOffset + 1);
}

return {
build: buildSearchTree,
search(tree, prefix) {
const prefixIndices = [];

for (const char of prefix) {
const lowerChar = char.toLowerCase();
const alphabetIndex = ALPHABET.indexOf(lowerChar);

if (alphabetIndex === -1) {
throw new Error("input not in alphabet");
}

prefixIndices.push(alphabetIndex);
}

return search(new DataView(tree.buffer), prefixIndices, 0, 0);
},
};
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more