Public
Edited
May 21, 2024
1 fork
2 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
hamletGenerator = {
const hamlet = (await FileAttachment("hamlet.txt").text())
.split(/\s+/)
.map(stripPunctuation)
.map(porterStemmer)
.filter((x) => x);
return function* hamletWordGenerator() {
for (let word of hamlet) yield word;
};
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
thresh = Math.ceil((12 / ε ** 2) * Math.log2((8 * m) / δ))
// thresh = 100 // uncomment this and comment the above to hard-code instead
Insert cell
Insert cell
function countDistinctCVM(a, thresh) {
let p = 1;
const X = new Set();
for (let item of a) {
X.delete(item);
if (Math.random() < p) X.add(item);
if (X.size === thresh) {
for (let item of X) if (Math.random() < 0.5) X.delete(item);
p /= 2;
}
}
return X.size / p;
}
Insert cell
approxDistinctCount = countDistinctCVM(hamletGenerator(), thresh)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
thresh
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more