Public
Edited
Mar 29
Paused
Insert cell
Insert cell
Insert cell
Insert cell
// original algorithm
function spaceSaving(counters, inputStream) {
const e = Array(counters);
const c = Array(counters).fill(0);
const t0 = performance.now();
for (const x of inputStream) {
let i = e.indexOf(x);
if (i === -1) e[(i = d3.leastIndex(c))] = x;
c[i]++;
}
return { elements: e, counts: c, time: Math.floor(performance.now() - t0) };
}
Insert cell
// A variant I'm playing with — maybe 5% faster in some cases
function spaceSavingSorted(k, inputStream) {
const e = Array();
const t0 = performance.now();
const c = Array(k).fill(0);
for (const x of inputStream) {
let i = e.indexOf(x);
if (i === -1) e[(i = Math.min(k - 1, e.length))] = x;
c[i]++;
while (i > 0 && c[i - 1] < c[i]) {
[e[i], e[i - 1]] = [e[i - 1], e[i]];
[c[i], c[i - 1]] = [c[i - 1], c[i]];
i--;
}
}
return {
elements: e,
counts: c,
time: Math.floor(performance.now() - t0)
};
}
Insert cell
spaced = spaceSavingSorted(counters, dataStream)
Insert cell
// ground truth
truth = d3.sort(
d3.rollup(
dataStream,
(l) => l.length,
(d) => d
),
(d) => -d[1]
)
Insert cell
dataStream = {
const r = d3.randomGamma(10);
return Uint8Array.from({ length: 100_000 }, r);
}
Insert cell
Insert cell
import { DuckDBClient_latest as DuckDBClient } from "@mootari/duckdbclient"
Insert cell
db = DuckDBClient.of({ numbers: Array.from(dataStream, (i) => ({ i })) })
Insert cell
db
SELECT UNNEST (APPROX_TOP_K(i, ${counters}::int)) as topK from numbers
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more