Public
Edited
Jan 8, 2024
Insert cell
Insert cell
Create some base data to use for the examples.
Insert cell
baseData = {
const fn = (_, i) => ({ id: i, value: i });
return {
million: Array.from({ length: 1_000_000 }).map(fn),
hundredK: Array.from({ length: 100_000 }).map(fn),
tenK: Array.from({ length: 10_000 }).map(fn),
oneK: Array.from({ length: 1_000 }).map(fn),
fiveHundred: Array.from({ length: 500 }).map(fn)
};
}
Insert cell
Insert cell
pipelines = ({
timesThree(collection) {
return collection.map((x) => ({
id: x.id,
value: x.value * 3
}));
},
timesThreeOdd(collection) {
return collection
.map((x) => ({
id: x.id,
value: x.value * 3
}))
.filter((x) => x.value % 2 == 1);
},
oddlyDivisible(collection) {
return collection
.filter((x) => x.value % 3 === 0)
.filter((x) => x.value % 5 === 0)
.filter((x) => x.value % 7 === 0);
}
})
Insert cell
Insert cell
vanillaBench = {
const data = copyData(baseData);
const nextNum = predictableGenerator(1);
const ret = [];
let id = 5_000_000;

function getTask(collectionName, pipelineName) {
return () => {
const collection = data[collectionName];
// push a random value
collection.push({
id: ++id,
value: nextNum()
});
// re-run compute over the updated collection
return pipelines[pipelineName](collection);
};
}

// for each collection and each pipeline, run a micro-benchmark
for (const collectionName of Object.keys(data)) {
for (const pipelineName of Object.keys(pipelines)) {
ret.push([
`${collectionName} ${pipelineName}`,
...microBench(getTask(collectionName, pipelineName))
]);
}
}
return ret;
}
Insert cell
vanillaBench
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
Insert cell
materialite = new (
await import("https://esm.sh/@vlcn.io/materialite@3.0.0")
).Materialite()
Insert cell
Insert cell
Insert cell
materialiteBench = {
const data = convertToSources(materialite, baseData);
const ret = [];
const nextNum = predictableGenerator(1);
let id = 5_000_000;

function getTask(collectionName, pipelineName) {
const source = data[collectionName];
// Materialized views take a comaprator so we know where to place new enetries into the view.
// The default data structure that represents the view is a persistent tree. You can use mutable data structures.
// `limit` and `after` will be available eventually so you can materialize partial views into plain old JS arrays.
const view = pipelines[pipelineName](source.stream).materialize(
(l, r) => l.id - r.id
);

// adding a number to a source automatically runs the downstream
// compute pipeline and returns an updated view.
return () => {
source.add({
id: ++id,
value: nextNum()
});
return view.value;
};
}

// run all the benchmarks
for (const collectionName of Object.keys(data)) {
for (const pipelineName of Object.keys(pipelines)) {
ret.push([
`${collectionName} ${pipelineName}`,
...microBench(getTask(collectionName, pipelineName))
]);
}
}
return ret;
}
Insert cell
materialiteBench
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
comparePerf
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
compareSamples
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
function microBench(task, iterations) {
if (iterations == null) {
iterations = 10;
}

let lastResult;
const timesAndMem = Array.from({ length: iterations }).map(() => {
const start = performance.now();
const memBefore = performance.memory.usedJSHeapSize;
lastResult = task();
const memAfter = performance.memory.usedJSHeapSize;
return [performance.now() - start, Math.abs(memAfter - memBefore)];
});
const times = timesAndMem.map((x) => x[0]);
const mem = timesAndMem.map((x) => x[1]);
const sumTime = times.reduce((l, r) => l + r, 0);
const avgTime = sumTime / iterations;
const sumMem = mem.reduce((l, r) => l + r, 0);
const avgMem = sumMem / iterations;
return [
Math.min(...times),
Math.max(...times),
avgTime,
Math.min(...mem),
Math.max(...mem),
avgMem,
sampleArray([...lastResult], 10).map((x) => x && x.value)
];
}
Insert cell
function copyData(baseData) {
return {
million: baseData.million.concat(),
hundredK: baseData.hundredK.concat(),
tenK: baseData.tenK.concat(),
oneK: baseData.oneK.concat(),
fiveHundred: baseData.fiveHundred.concat()
};
}
Insert cell
function convertToSources(materialite, baseData) {
let id = 0;
const ret = {};
for (const [key, value] of Object.entries(baseData)) {
const source = materialite.newUnorderedSet((x) => x.id);
// materialite currently uses set semantics so if we want duplicate entries we need unique ids.
// TODO: add concept of array sources.
// TODO: this won't work -- our pipelines are expecting numbers!
materialite.tx(() => {
value.forEach((v) => source.add(v));
});
ret[key] = source;
}
return ret;
}
Insert cell
function predictableGenerator(seed) {
return () => {
let x = Math.sin(seed++) * 10000;
return Math.floor(x);
};
}
Insert cell
function sampleArray(items, n) {
const ret = [items[0]];
const totalItems = items.length - 2;
const interval = Math.floor(totalItems / (n - 2));
for (var i = 1; i < n - 1; i++) {
ret.push(items[i * interval]);
}
ret.push(items[items.length - 1]);
return ret;
}
Insert cell
function interleave(a1, a2) {
const ret = [];
for (let i = 0; i < a1.length; ++i) {
ret.push(a1[i], a2[i]);
}
return ret;
}
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more