Published unlisted
Jun 11, 2019
colors = d3.scaleOrdinal(d3.schemeAccent)
viewof CLOSENESS_THRESHOLD = slider({'range': [0, 1], 'value': 0.2})
viewof STEP = slider({'min': 1, 'max': 50, 'value': 10})
margin = ({top: 30, right: 80, bottom: 30, left: 30});
width = 700 - margin.left - margin.right
height = 1000 - - margin.bottom
embed({data: {values: sim_mat}, "mark": "rect",
"mark": "line",
"encoding": {
"y": {
"field": "sim",
"type": "quantitative"
"x": {
"field": "rank",
"type": "quantitative"
"row": {"field": "k2"},
"column": {"field": "k1"},
"detail": {"field": "interaction"},
"color": {"field": "k1"}
},"transform": [
"filter": "datum.k1 != datum.k2"
"as": "interaction", "calculate": "'' + datum.k1 + datum.k2"
"groupby": ["k1", "k2"],
"window": [{
"op": "rank",
"as": "rank",
"sort": [{ "field": "sim", "order": "descending" }]
}, {
"filter": `datum.rank <= ${Math.sqrt(sim_mat.length) * 4 / Math.sqrt(ids.length)}`
"width":400/ids.length, "height": 400/ids.length, "title": `Windowed similarity`})
metrics = new Object({"jaccard": jaccard, "cosine": cosine})
cosine = function(a, b) {
// Cosine distance between two key-value arrays.
return d3.sum(Object.keys(a).map(k => {
const v = a[k];
return b[k] ? b[k] * v : 0
Math.sqrt(d3.sum(Object.values(a).map(d => d*d)))/
Math.sqrt(d3.sum(Object.values(b).map(d => d*d)))
import { Library } from '@bmschmidt/javascript-bindings-to-the-hathi-features-data'
import { jaccard, similarity_matrix } from '@bmschmidt/book-visualizations-sandbox'
library = new Library().fetch_all(ids)
function full_similarity_matrix(library) {
const { m } = library;
const books = Array.from(m.values())
function label(book) {
const { data } = book;
return `${data.metadata.pubDate}-${data.metadata.title}-${}`
books.sort((a, b) => (label(a) > label(b)))

let data = []
// Compare every book to every other book.
books.forEach((first, i) => {
console.log(`Building sims for ${}`)
books.forEach((second, j) => {
const prefs = {'doc': 'chunk', 'size': chunk_size}
if (i <= j) {
const aa = first.count(prefs)//.filter(d => Math.random() < (1 - drop_share))
const bb = second.count(prefs)//.filter(d => Math.random() < (1 - drop_share))
const dists = similarity_matrix(aa, bb, label(first), label(second))
data = data.concat(dists)

return data

drop_share = .97
sim_mat = full_similarity_matrix(library)

vegalite = require("@observablehq/vega-lite@0.1")

embed = require('vega-embed')
d3Fetch = require('d3-fetch')
d3 = require('d3@5')
import {slider, select} from "@jashkenas/inputs"
