ents = {
let t1 = performance.now();
let data = corpusEN[0];
let span = data.length;
let bits = 4,
divs = 1 / bits;
let calc = (span) => bits * Math.ceil(span * divs);
let refs = {};
let dict = {};
let init = Array.from({ length: span }, () => []);
for (let i = 0; i < span; ++i) {
let line = data[i].split(" ");
let size = line.length;
for (let j = 0; j < size; ++j) {
let word = line[j];
if (word.length < 2) continue;
let list = refs[word];
list ? list.push(i) : (refs[word] = list = [i]);
init[i].push(list)
}
}
let flat = Array.from({ length: span }, () => "");
let keys = Object.keys(refs);
span = keys.length;
for (let i = 0; i < span; ++i) {
let line = keys[i];
line = transliterable(line) ? encodeURIComponent(line) : line;
let copy = line;
for (let j = 0; j < bits; ++j) {
line += " ".repeat(j + 1) + copy;
}
line = line.padEnd(calc(line.length), " ");
flat[i] = line;
}
let pads = bits * 0;
let mark = String.fromCodePoint(0).repeat(pads);
let rope = flat.join(mark) + mark;
let size = rope.length;
let [view, exit] = byteLense(size, bits);
let utf8 = new TextEncoder();
utf8.encodeInto(rope, exit);
let freq = new Map();
let from, till = 0, ceil = Math.ceil;
for (let i = 0; i < span; ++i) {
let word = keys[i];
dict[word] = [];
from = till;
till += ceil((flat[i].length + pads) * divs);
for (let n = from; n < till; ++n) {
let hash = view[n];
let data = freq.get(hash);
data
? data.at(-1) == i ? null : data.push(i)
: freq.set(hash, (data = [i]));
dict[word].push(data);
}
}
return {
t: performance.now() - t1,
dict,
refs,
init,
};
}