Published
Edited
Aug 2, 2022
Importers
5 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
textbooks = ['mdp.39015029925362']
Insert cell
Insert cell
counts.groupby("htid").rollup({count: d => op.sum(d.count)}).view()
Insert cell
counts.filter(d=>d.token=="first").groupby(["htid"]).rollup({count: d => op.sum(d.count)}).view()
Insert cell
counts.filter(d=>d.token=="first").view()
Insert cell
lib = {
const l = await new Library(textbooks).ready
return l
}
Insert cell
md`# Library functions

These are the general library functions.

`
Insert cell
Type JavaScript, then Shift-Enter. Ctrl-space for more options. Arrow ↑/↓ to switch modes.

Insert cell
class Library {

constructor(ids) {
this.volumes = new Map()
if (ids) {
this.add_ids(ids)
}
}

get ready() {
// Awaits downloads, returns the item.
return Promise.all(
this.vols.map(d => d.ready))
.then(d => this)
}
get vols() {
return [...this.volumes.values()]
}
get meta_table() {
const m = {}
for (let k of ["contributor", "title", "pubDate", "pubPlace", "htid"]) {
m[k] = []
this.vols.map(v => {
m[k].push(k == "htid" ? v.htid : v.meta(k))
})
}
return aq.table(m)
}
add_ids(ids) {
this._table = undefined
for (let id of ids) {
this.volumes.set(id, new Volume(id))
}
return this
}

repr() {
let output = "<h3>Books plotted</h3><ol>"
for (let vol of this.volumes.values()) {
const element = `<li>${vol.repr()}</li>`
output += `\n${element}\n`
}
output += "</ol>"
return output
}
get table() {
if (this._table) {
return this._table
}
const tables = []
for (let [id, vol] of this.volumes.entries()) {
tables.push(vol.table)
}
return tables[0].concat(tables.slice(1))
}
}

Insert cell
function extract_name_fields(entry) {
const vals = []
if (entry === undefined) {return ""}
if (entry.sort) {
for (let v of entry) {
vals.push(extract_name_fields(v))
}
}
else if (entry.name) {
vals.push(entry.name)
} else {
vals.push(entry)
}
return vals.join("; ")
}
Insert cell
v = new Volume("mdp.39015082968309")
Insert cell
v.table.view()
Insert cell
class Volume {
constructor(htid, cors_prefix = `https://bmschmidt-cors-observable.herokuapp.com/`) {
this.htid = htid
this.cors_prefix = cors_prefix
this.download()
}

get ready() {
return this.download().then(d => this)
}
download() {
if (this._json) {
return Promise.resolve(this._json)
}
if (this._underway) {return this._underway}
this._underway = fetch(this.cors_prefix + `https://data.analytics.hathitrust.org/features-2020.03/${this.id_to_stubbytree(this.htid)}`)
.then(function(response) {
return response.arrayBuffer();
})
.then(data => {
console.log(data)
return bz2.decompress(new Uint8Array(data))
}
)
.then(array => {
const json = JSON.parse(new TextDecoder().decode(array));
this._json = json;
return this._json
})
return this._underway
}
meta(k) {
return extract_name_fields(this._json.metadata[k])
}
repr() {
// Return an HTML representation of the book suitable for linking back to Hathi.
let { title, pubDate, names, imprint, volumeIdentifier } = this._json.metadata
if (names === undefined) {names = []}
return `${this.meta("contributor")} <em>${this.meta("title")}</em>. (${this.meta("pubPlace")}, ${this.meta("pubDate")}) <code><a href=https://babel.hathitrust.org/cgi/pt?id=${this.htid}>${this.htid}</a></code>`
}

id_to_stubbytree(htid) {
const htid_clean = htid.replace("/", "+").replace(":", "=")
const [libid, volid] = htid.split('.')
const volid_clean = volid.replace("/", "+").replace(":", "=")

const filename = volid_clean
return [libid, volid_clean.split("").filter((d, i) => i % 3 == 0).join(""), htid_clean + ".json.bz2"].join("/")
}
get table() {
if (this._table) {return this._table}
if (!this._json) {throw "Table called before data loaded"}

const tokens = []
const pages = []
const counts = []
const sections = []
const poses = []
let i = 0;
for (let page of this._json.features.pages) {
let seq = +page.seq;
for (let section of ['header', 'footer', 'body']) {
const sect = page[section]
if (sect === null || sect === undefined) {continue}
Object.entries(sect.tokenPosCount).forEach((o) => {
const [word, v] = o
Object.entries(v).forEach((o2) => {
const [pos, count] = o2;
tokens[i] = word;
pages[i] = seq;
counts[i] = count;
sections[i] = section;
poses[i] = pos;
i++;
})
})}
}
this._table = aq.table({token: tokens, sequence: pages, count: counts, section:sections, pos: poses, htid: Array(tokens.length).fill(this.htid)})
return this._table
}
}
Insert cell
import {aq, op} from '@uwdata/arquero'
Insert cell
bz2 = require('bz2@1.0.1/index.js').catch(() => window["bz2"])
Insert cell
md`# Tests

Here are some unit tests.

`
Insert cell
d3 = require('d3', 'd3-fetch', 'd3-array')
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more