Public
Edited
Mar 29
Paused
Insert cell
Insert cell
Insert cell
Insert cell
// original algorithm
function spaceSaving(counters, inputStream) {
const e = Array(counters);
const c = Array(counters).fill(0);
const t0 = performance.now();
for (const x of inputStream) {
let i = e.indexOf(x);
if (i === -1) e[(i = d3.leastIndex(c))] = x;
c[i]++;
}
return { elements: e, counts: c, time: Math.floor(performance.now() - t0) };
}
Insert cell
// A variant I'm playing with — maybe 5% faster in some cases
function spaceSavingSorted(k, inputStream) {
const e = Array();
const t0 = performance.now();
const c = Array(k).fill(0);
for (const x of inputStream) {
let i = e.indexOf(x);
if (i === -1) e[(i = Math.min(k - 1, e.length))] = x;
c[i]++;
while (i > 0 && c[i - 1] < c[i]) {
[e[i], e[i - 1]] = [e[i - 1], e[i]];
[c[i], c[i - 1]] = [c[i - 1], c[i]];
i--;
}
}
return {
elements: e,
counts: c,
time: Math.floor(performance.now() - t0)
};
}
Insert cell
spaced = spaceSavingSorted(counters, dataStream)
Insert cell
// ground truth
truth = d3.sort(
d3.rollup(
dataStream,
(l) => l.length,
(d) => d
),
(d) => -d[1]
)
Insert cell
dataStream = {
const r = d3.randomGamma(10);
return Uint8Array.from({ length: 100_000 }, r);
}
Insert cell
Insert cell
import { DuckDBClient_latest as DuckDBClient } from "@mootari/duckdbclient"
Insert cell
db = DuckDBClient.of({ numbers: Array.from(dataStream, (i) => ({ i })) })
Insert cell
db
SELECT UNNEST (APPROX_TOP_K(i, ${counters}::int)) as topK from numbers
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more