Public
Edited
May 16, 2024
1 star
Insert cell
Insert cell
Insert cell
Insert cell
workset = "640e74be300000d90b5ce31b"
Insert cell
viewof tokenCutoff = Inputs.text({label: "Cutoff"})
Insert cell
Insert cell
ef = fetch(`https://tools.htrc.illinois.edu/ef-api/worksets/${workset}/volumes?pos=false`).then(resp => resp.json())
Insert cell
Insert cell
data = {
const pagesWithTokens = ef.data.flatMap(vol => vol.features.pages.map(p => p.body.tokensCount)?.filter(p => p != null));
const punctuation = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/;
const aggregateTokenCounts = Object.entries(pagesWithTokens.reduce((a, b) => {
Object.entries(b).forEach(([k, v]) => {
const _k = k.toLowerCase();
a[_k] = (a[_k] || 0) + v;
});
return a;
}, {}));
return aggregateTokenCounts
.filter(([t,]) => !(stopwords.has(t) || punctuation.test(t)))
.sort(([, a], [, b]) => d3.descending(a, b))
.filter(([_, size]) => size > tokenCutoff)
.map(([token, size]) => ({text: token, size}));
}
Insert cell
stopwords = new Set("er,many,without,let,put,de,got,like,get,i,me,men,my,myself,we,us,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,which,who,whom,whose,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,having,do,does,did,doing,will,would,should,can,could,ought,i'm,you're,he's,she's,it's,we're,they're,i've,you've,we've,they've,i'd,you'd,he'd,she'd,we'd,they'd,i'll,you'll,he'll,she'll,we'll,they'll,isn't,aren't,wasn't,weren't,hasn't,haven't,hadn't,doesn't,don't,didn't,won't,wouldn't,shan't,shouldn't,can't,cannot,couldn't,mustn't,let's,that's,who's,what's,here's,there's,when's,where's,why's,how's,a,an,the,and,but,if,or,because,as,until,while,of,at,by,for,with,about,against,between,into,through,during,before,after,above,below,to,from,up,upon,down,in,out,on,off,over,under,again,further,then,once,here,there,when,where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,same,so,than,too,very,say,says,said,shall,the".split(","))
Insert cell
Insert cell
Insert cell
d3Cloud = require("d3-cloud@1")
Insert cell
function cleanId(id) {
const i = id.indexOf('.');
if (i == -1) throw `Invalid clean htid: ${id}`;
const lib = id.substring(0, i);
const libId = id.substring(i+1);
return `${lib}.${libId.replaceAll(':', '+').replaceAll('/', '=').replaceAll('.', ',')}`;
}
Insert cell
cleanId('gri.ark:/13960/t9575374f')
Insert cell
cleanId('mdp.39015030736071')
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more