Unlisted
Edited
May 24, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
N = 20_000 // universe size
Insert cell
1
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function estimate(N, n) {
// compute stats about the EF representation for n elements from universe size N
const info = stats(N, n);
// compute the size and number of the high bit blocks
const blockSize = 2 ** info.loBits;
const numBlocks = Math.ceil(N / blockSize);
// generate a random sequence with approximately n values in [0, N)
const values = generateValues(N, n);
// group them by high bit block and count the number of elements
const lengthPerBlock = d3.rollup(
values,
(g) => g.length,
(d) => d >>> info.loBits
);
// compute the expected number of elements accessed for linear search
const linear =
d3.sum(lengthPerBlock.values(), (k) => (k + 1) / 2) / numBlocks;
// compute the expected number of elements accessed for log (binary) search
const log =
d3.sum(lengthPerBlock.values(), (k) => Math.log2(k) + 1 / (k + 1)) /
numBlocks;
return {
values,
linear,
log,
lengthPerBlock,
info,
numBlocks,
blockSize,
info
};
}
Insert cell
values = est.values
Insert cell
r = d3.range(N / 100, N + 1, N / 100)
Insert cell
linear = est.linear
Insert cell
log = est.log
Insert cell
data = r.map((n) => estimate(N, n))
Insert cell
info = est.info
Insert cell
est = (regenerate, estimate(N, n))
Insert cell
sim = {
// count the number of elements probed for random arrays of length 2**power
let count = 0;
let iters = 10_000;
let power = 4;
const xs = new Array(2 ** power);
for (let i = 0; i < iters; i++) {
// randomize the array
for (let j = 0; j < xs.length; j++) xs[j] = Math.random();
// search for a random element and accumulate the number of accesses
binarySearchAfterAccess(
(i) => (count++, xs[i]),
Math.random(),
0,
xs.length
);
}
// return the number of accesses per binary search
return count / iters;
}
Insert cell
nReal = values.length
Insert cell
percent = d3.format(".2%")
Insert cell
bpeFormat = d3.format(".2f")
Insert cell
comma = d3.format(",.2f")
Insert cell
icomma = d3.format(",")
Insert cell
lengthPerBlock = est.lengthPerBlock
Insert cell
blockLengthDistribution = {
const m = d3.rollup(
est.lengthPerBlock.values(),
(g) => g.length,
(d) => d
);
m.set(0, est.numBlocks - est.lengthPerBlock.size);
return m;
}
Insert cell
blockSize = est.blockSize
Insert cell
numBlocks = est.numBlocks
Insert cell
generateValues = (N, n) => {
let sample = new Uint32Array(2 * N);
let prob = n / N;
let count = 0;
for (let i = 0; i < N; i++) if (Math.random() < prob) sample[count++] = i;
return sample.slice(0, count);
}
Insert cell
stats(1e6, 1e4)
Insert cell
10000 / 2 ** 14
Insert cell
import { stats } from "70f2503017f350a5"
Insert cell
import { Plot } from "@observablehq/plot-0-6-7"
Insert cell
import { binarySearchAfterAccess } from "@yurivish/bitvectors-with-runs-3"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more