Published unlisted
Edited
Jun 11, 2019
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
colors = d3.scaleOrdinal(d3.schemeAccent)
Insert cell
colors.domain()
Insert cell
viewof CLOSENESS_THRESHOLD = slider({'range': [0, 1], 'value': 0.2})
Insert cell
viewof STEP = slider({'min': 1, 'max': 50, 'value': 10})
Insert cell
FIXED_ITEM = 1
Insert cell
Insert cell
Insert cell
Insert cell
margin = ({top: 30, right: 80, bottom: 30, left: 30});
Insert cell
width = 700 - margin.left - margin.right
Insert cell
height = 1000 - margin.top - margin.bottom
Insert cell
sim_mat
Insert cell
Insert cell
embed({data: {values: sim_mat}, "mark": "rect",
"mark": "line",
"encoding": {
"y": {
"field": "sim",
"type": "quantitative"
},
"x": {
"field": "rank",
"type": "quantitative"
},
"row": {"field": "k2"},
"column": {"field": "k1"},
"detail": {"field": "interaction"},
"color": {"field": "k1"}
},"transform": [
{
"filter": "datum.k1 != datum.k2"
},
{
"as": "interaction", "calculate": "'' + datum.k1 + datum.k2"
},
{
"groupby": ["k1", "k2"],
"window": [{
"op": "rank",
"as": "rank",
}],
"sort": [{ "field": "sim", "order": "descending" }]
}, {
"filter": `datum.rank <= ${Math.sqrt(sim_mat.length) * 4 / Math.sqrt(ids.length)}`
}
],
"width":400/ids.length, "height": 400/ids.length, "title": `Windowed similarity`})
Insert cell
Insert cell
Insert cell
metrics = new Object({"jaccard": jaccard, "cosine": cosine})
Insert cell
cosine = function(a, b) {
// Cosine distance between two key-value arrays.
return d3.sum(Object.keys(a).map(k => {
const v = a[k];
return b[k] ? b[k] * v : 0
}))/
Math.sqrt(d3.sum(Object.values(a).map(d => d*d)))/
Math.sqrt(d3.sum(Object.values(b).map(d => d*d)))
}
Insert cell
import { Library } from '@bmschmidt/javascript-bindings-to-the-hathi-features-data'
Insert cell
import { jaccard, similarity_matrix } from '@bmschmidt/book-visualizations-sandbox'
Insert cell
library = new Library().fetch_all(ids)
Insert cell
function full_similarity_matrix(library) {
const { m } = library;
const books = Array.from(m.values())
function label(book) {
const { data } = book;
return `${data.metadata.pubDate}-${data.metadata.title}-${book.id}`
}
books.sort((a, b) => (label(a) > label(b)))

let data = []
// Compare every book to every other book.
books.forEach((first, i) => {
console.log(`Building sims for ${first.id}`)
books.forEach((second, j) => {
const prefs = {'doc': 'chunk', 'size': chunk_size}
if (i <= j) {
const aa = first.count(prefs)//.filter(d => Math.random() < (1 - drop_share))
const bb = second.count(prefs)//.filter(d => Math.random() < (1 - drop_share))
const dists = similarity_matrix(aa, bb, label(first), label(second))
data = data.concat(dists)
}
})
})

return data
}

Insert cell
drop_share = .97
Insert cell
sim_mat = full_similarity_matrix(library)

Insert cell
vegalite = require("@observablehq/vega-lite@0.1")

Insert cell
embed = require('vega-embed')
Insert cell
d3Fetch = require('d3-fetch')
Insert cell
d3 = require('d3@5')
Insert cell
import {slider, select} from "@jashkenas/inputs"
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more