Public
Edited
May 21, 2024
1 fork
2 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
hamletGenerator = {
const hamlet = (await FileAttachment("hamlet.txt").text())
.split(/\s+/)
.map(stripPunctuation)
.map(porterStemmer)
.filter((x) => x);
return function* hamletWordGenerator() {
for (let word of hamlet) yield word;
};
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
thresh = Math.ceil((12 / ε ** 2) * Math.log2((8 * m) / δ))
// thresh = 100 // uncomment this and comment the above to hard-code instead
Insert cell
Insert cell
function countDistinctCVM(a, thresh) {
let p = 1;
const X = new Set();
for (let item of a) {
X.delete(item);
if (Math.random() < p) X.add(item);
if (X.size === thresh) {
for (let item of X) if (Math.random() < 0.5) X.delete(item);
p /= 2;
}
}
return X.size / p;
}
Insert cell
approxDistinctCount = countDistinctCVM(hamletGenerator(), thresh)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
thresh
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more