Public
Edited
May 16, 2024
1 star
Insert cell
Insert cell
Insert cell
Insert cell
workset = "640e74be300000d90b5ce31b"
Insert cell
viewof tokenCutoff = Inputs.text({label: "Cutoff"})
Insert cell
Insert cell
ef = fetch(`https://tools.htrc.illinois.edu/ef-api/worksets/${workset}/volumes?pos=false`).then(resp => resp.json())
Insert cell
Insert cell
data = {
const pagesWithTokens = ef.data.flatMap(vol => vol.features.pages.map(p => p.body.tokensCount)?.filter(p => p != null));
const punctuation = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/;
const aggregateTokenCounts = Object.entries(pagesWithTokens.reduce((a, b) => {
Object.entries(b).forEach(([k, v]) => {
const _k = k.toLowerCase();
a[_k] = (a[_k] || 0) + v;
});
return a;
}, {}));
return aggregateTokenCounts
.filter(([t,]) => !(stopwords.has(t) || punctuation.test(t)))
.sort(([, a], [, b]) => d3.descending(a, b))
.filter(([_, size]) => size > tokenCutoff)
.map(([token, size]) => ({text: token, size}));
}
Insert cell
stopwords = new Set("er,many,without,let,put,de,got,like,get,i,me,men,my,myself,we,us,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,which,who,whom,whose,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,having,do,does,did,doing,will,would,should,can,could,ought,i'm,you're,he's,she's,it's,we're,they're,i've,you've,we've,they've,i'd,you'd,he'd,she'd,we'd,they'd,i'll,you'll,he'll,she'll,we'll,they'll,isn't,aren't,wasn't,weren't,hasn't,haven't,hadn't,doesn't,don't,didn't,won't,wouldn't,shan't,shouldn't,can't,cannot,couldn't,mustn't,let's,that's,who's,what's,here's,there's,when's,where's,why's,how's,a,an,the,and,but,if,or,because,as,until,while,of,at,by,for,with,about,against,between,into,through,during,before,after,above,below,to,from,up,upon,down,in,out,on,off,over,under,again,further,then,once,here,there,when,where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,same,so,than,too,very,say,says,said,shall,the".split(","))
Insert cell
Insert cell
Insert cell
d3Cloud = require("d3-cloud@1")
Insert cell
function cleanId(id) {
const i = id.indexOf('.');
if (i == -1) throw `Invalid clean htid: ${id}`;
const lib = id.substring(0, i);
const libId = id.substring(i+1);
return `${lib}.${libId.replaceAll(':', '+').replaceAll('/', '=').replaceAll('.', ',')}`;
}
Insert cell
cleanId('gri.ark:/13960/t9575374f')
Insert cell
cleanId('mdp.39015030736071')
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more