Published
Edited
Jun 7, 2022
Importers
7 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
chart = {
const body = html`
<head></head>
<body>
<div class="background chart" style="padding: 25px">${Array.from(words, d => html`
<span class="word">${formatTerm(d)}</span>
`)}
</div>
</body>`

return body
}
Insert cell
Insert cell
words = {
const data = nlp(input).terms().json()
//word.lower is the token word from the target text with case and punctuation removed. word.lower is then compared to the corpus word frequency list (wikiFreq below) which will then be used to set the formatting

//example object:
//0: Object {text: "ALICE", terms: Array(1), lower: "alice"}
//terms: Array(1) [0: Object {text: "ALICE", tags: Array(0), pre: "", post: " "}
data.forEach(d => d.lower = d.terms[0].text.toLowerCase())
dataParse(data)
return data
}
Insert cell
// word frequency and count never got used for this visualization, we mostly care about rank. They are left in because they are intersting properties that could be useful.
dataParse = function(arr){
const counts = d3.rollup(arr, v => v.length, d => d.lower)
arr.forEach(function(d){
const match = d.lower

d.count = counts.get(match) // sets the count of each word

// need a "try" condition for freqency and rank since .find() will fail if it encounters a word outside the given wikipedia data. When .find() fails set the word to lowest frequency (1) / highest rank (20,000) possible (ie. not in 20,000 most common words
try { d.freq = wikiFreq.find((obj) => obj.word === match).freq } catch {d.freq = 1} // freq of use on wikipedia
try { d.rank = wikiFreq.find((obj) => obj.word === match).rank } catch {d.rank = wikiFreq.length + 1 } // rank
})
}
Insert cell
//Via en.wiktionary.org/wiki/Wiktionary:Frequency_lists/PG/2006/04/1-10000
//20,000 most frequently used words on Wikipedia! Frequency is occurences per billion words while Rank is the relative usage of the word (ie. rank 1 - the most used english word, rank 2 - 2nd most used, ect.)

//There *may* be some issues with this list, lemmatization is assumbed to be captured in this list (ie. the list is based on pure word frequencies, run, ran and running have different frequencies.)

wikiFreq = Object.assign(d3.csvParse(await FileAttachment("wordFrequencyWikipedia.csv").text(),
({Word, Frequency_per_billion, Rank}) =>
({word: Word, freq: +Frequency_per_billion, rank: +Rank})
))
Insert cell
//formats the text before entering html to remove some unwanted pre/post word characters and add some line breaks/italics
formatTerm = function(d){
var term = d.terms[0].pre.concat(d.terms[0].text, d.terms[0].post)
if (d.terms[0].text.includes("_")) {term = "<i>" + term.replace(/_/g,"") + "</i>"}
if (d.terms[0].post.includes("\n\n")) {term = term + "<br><br>"}
return term;
}
Insert cell
//script that changes the font properties based on the dropdown
update = {
const elements = document.getElementsByClassName("word")
words.forEach(function(d, i){
const rank = d.rank;
(textAxis == "weight" || textAxis == "weightWidth" || textAxis == "weightXhigh")?
elements[i].style.setProperty("--text-wght", rankWeight(d.rank))
:elements[i].style.setProperty("--text-wght", 80);
(textAxis == "width" || textAxis == "weightWidth")?
elements[i].style.setProperty("--text-wdth", rankWidth(d.rank))
:elements[i].style.setProperty("--text-wdth", 400);
(textAxis == "xhigh" || textAxis == "weightXhigh" )?
elements[i].style.setProperty("--text-YTLC", rankYTLC(d.rank))
:elements[i].style.setProperty("--text-YTLC", 500);
})
}
Insert cell
Insert cell
rankWeight = d3.scaleQuantile()
.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]
.range([50, 100, 100, 150, 150, 150, 200, 200, 250]) // Amstelvar weight range (0, 250)
Insert cell
rankWidth = d3.scaleQuantile()
.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]
.range([100, 175, 175, 250, 250, 250, 325, 325, 400]) // Amstelvar width range (50, 400)
Insert cell
rankYTLC = d3.scaleQuantile()
.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]
.range([450, 487.5, 487.5, 525, 525, 525, 562.5, 562.5, 600]) // Amstelvar width range (50, 400)
Insert cell
Insert cell
d3 = {
const d3 = await require("d3@5", "d3-array@2");
return d3;
}
Insert cell
// nlp = {
// let nlp = await require('compromise/builds/compromise-tokenize.js')
// let plugin = await require('compromise-paragraphs')
// nlp.extend(plugin)
// return nlp
// }
Insert cell
nlp = require('compromise')
Insert cell
nlp.version
Insert cell
import {select} from "@jashkenas/inputs"
Insert cell
import {textarea} from "@jashkenas/inputs"
Insert cell
placeholder = (await FileAttachment("Alice in Wonderland.txt").text()).slice(0, 1510)
Insert cell
//VARIABLE FONTS! If this visualization isn't working you might need to update or switch browsers (tested on firefox & chrome). I was able to get this running thanks to other some example experiments with v-fonts done by (@jmahabal/variable-fonts-playground).

//Amstelvar (https://github.com/TypeNetwork/Amstelvar) is a variable fonts with many low level controls: 15+ variable axes to use. The axes names are based on this (https://variationsguide.typenetwork.com/) proposal for standardizing v-fonts. The quick rundown is that any "Opaque" variable is conrolling how thick the black of a single glyph is while any "Transparent" variable controls how much white negative space there is. X-height is a common element discussed in font design but it is almost always fixed in most font families. With YTLC (Y Transparent Lower Case) we can actually set the x-height on Amstelvar.

html`
<link href="https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap" rel="stylesheet">
<style>

@font-face {
/* https://github.com/TypeNetwork/Amstelvar */
/* using the gstatic uploads created by observablehq.com/@jmahabal/variable-fonts-playground */
/* easier than uploading my own but might be the source of some discrepancies with dev docs? */
/* also possibly the reason I can't use "font-variation-settings: normal" and need to define each individually */
font-family: 'Amstelvar';
src: url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.eot);
src: url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.eot?#iefix) format('embedded-opentype'),
url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.woff2) format('woff2'),
url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.woff) format('woff'),
url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.ttf) format('truetype');
font-weight: 300 900;
font-stretch: 35% 100%;
font-style: normal;
font-display: swap;
}

.background {fill: #fffbeb; background-color: #fffbeb;}

.title {
font: 24px "Lato", sans-serif;
fill: #263c54;
font-weight: 700;
}

.other {
font: 20px "Lato", sans-serif;
fill: #263c54;
font-weight: 400;
}

.chart {
font-family: "Amstelvar", serif;
--text-wght: 80;
--text-wdth: 400;
--text-YTLC: 400;
}

.word {
fill: #263c54;
font-variation-settings: 'wght' var(--text-wght),
'wdth' var(--text-wdth),
'YTLC' var(--text-YTLC);
}

</style>`
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more