class Volume {
constructor(htid, cors_prefix = `https://bmschmidt-cors-observable.herokuapp.com/`) {
this.htid = htid
this.cors_prefix = cors_prefix
this.download()
}
get ready() {
return this.download().then(d => this)
}
download() {
if (this._json) {
return Promise.resolve(this._json)
}
if (this._underway) {return this._underway}
this._underway = fetch(this.cors_prefix + `https://data.analytics.hathitrust.org/features-2020.03/${this.id_to_stubbytree(this.htid)}`)
.then(function(response) {
return response.arrayBuffer();
})
.then(data => {
console.log(data)
return bz2.decompress(new Uint8Array(data))
}
)
.then(array => {
const json = JSON.parse(new TextDecoder().decode(array));
this._json = json;
return this._json
})
return this._underway
}
meta(k) {
return extract_name_fields(this._json.metadata[k])
}
repr() {
let { title, pubDate, names, imprint, volumeIdentifier } = this._json.metadata
if (names === undefined) {names = []}
return `${this.meta("contributor")} <em>${this.meta("title")}</em>. (${this.meta("pubPlace")}, ${this.meta("pubDate")}) <code><a href=https://babel.hathitrust.org/cgi/pt?id=${this.htid}>${this.htid}</a></code>`
}
id_to_stubbytree(htid) {
const htid_clean = htid.replace("/", "+").replace(":", "=")
const [libid, volid] = htid.split('.')
const volid_clean = volid.replace("/", "+").replace(":", "=")
const filename = volid_clean
return [libid, volid_clean.split("").filter((d, i) => i % 3 == 0).join(""), htid_clean + ".json.bz2"].join("/")
}
get table() {
if (this._table) {return this._table}
if (!this._json) {throw "Table called before data loaded"}
const tokens = []
const pages = []
const counts = []
const sections = []
const poses = []
let i = 0;
for (let page of this._json.features.pages) {
let seq = +page.seq;
for (let section of ['header', 'footer', 'body']) {
const sect = page[section]
if (sect === null || sect === undefined) {continue}
Object.entries(sect.tokenPosCount).forEach((o) => {
const [word, v] = o
Object.entries(v).forEach((o2) => {
const [pos, count] = o2;
tokens[i] = word;
pages[i] = seq;
counts[i] = count;
sections[i] = section;
poses[i] = pos;
i++;
})
})}
}
this._table = aq.table({token: tokens, sequence: pages, count: counts, section:sections, pos: poses, htid: Array(tokens.length).fill(this.htid)})
return this._table
}
}