Word Skimming / abebrath

abebrath

Workspace

Published

Visualizing with Text

Edited

Jun 7, 2022

Importers

7 stars

Visualizing with Text

Word Skimming

Quantitative Headlines Word Associations Stem and Leaf Multivariate Labelled Cartogram Pokémon and Sets Microtext Line Chart Scatterplot of Text Labels

chart = {

const body = html`

<body>

<div class="background chart" style="padding: 25px">${Array.from(words, d => html`

<span class="word">${formatTerm(d)}</span>

`)}

</div>

</body>`

return body

}

words = {

const data = nlp(input).terms().json()

//word.lower is the token word from the target text with case and punctuation removed. word.lower is then compared to the corpus word frequency list (wikiFreq below) which will then be used to set the formatting

//example object:

//0: Object {text: "ALICE", terms: Array(1), lower: "alice"}

//terms: Array(1) [0: Object {text: "ALICE", tags: Array(0), pre: "", post: " "}

data.forEach(d => d.lower = d.terms[0].text.toLowerCase())

dataParse(data)

return data

}

// word frequency and count never got used for this visualization, we mostly care about rank. They are left in because they are intersting properties that could be useful.

dataParse = function(arr){

const counts = d3.rollup(arr, v => v.length, d => d.lower)

arr.forEach(function(d){

const match = d.lower

d.count = counts.get(match) // sets the count of each word

// need a "try" condition for freqency and rank since .find() will fail if it encounters a word outside the given wikipedia data. When .find() fails set the word to lowest frequency (1) / highest rank (20,000) possible (ie. not in 20,000 most common words

try { d.freq = wikiFreq.find((obj) => obj.word === match).freq } catch {d.freq = 1} // freq of use on wikipedia

try { d.rank = wikiFreq.find((obj) => obj.word === match).rank } catch {d.rank = wikiFreq.length + 1 } // rank

})

}

//Via en.wiktionary.org/wiki/Wiktionary:Frequency_lists/PG/2006/04/1-10000

//20,000 most frequently used words on Wikipedia! Frequency is occurences per billion words while Rank is the relative usage of the word (ie. rank 1 - the most used english word, rank 2 - 2nd most used, ect.)

//There *may* be some issues with this list, lemmatization is assumbed to be captured in this list (ie. the list is based on pure word frequencies, run, ran and running have different frequencies.)

wikiFreq = Object.assign(d3.csvParse(await FileAttachment("wordFrequencyWikipedia.csv").text(),

({Word, Frequency_per_billion, Rank}) =>

({word: Word, freq: +Frequency_per_billion, rank: +Rank})

))

//formats the text before entering html to remove some unwanted pre/post word characters and add some line breaks/italics

formatTerm = function(d){

var term = d.terms[0].pre.concat(d.terms[0].text, d.terms[0].post)

if (d.terms[0].text.includes("_")) {term = "<i>" + term.replace(/_/g,"") + "</i>"}

if (d.terms[0].post.includes("\n\n")) {term = term + "<br><br>"}

return term;

}

//script that changes the font properties based on the dropdown

update = {

const elements = document.getElementsByClassName("word")

words.forEach(function(d, i){

const rank = d.rank;

(textAxis == "weight" || textAxis == "weightWidth" || textAxis == "weightXhigh")?

elements[i].style.setProperty("--text-wght", rankWeight(d.rank))

:elements[i].style.setProperty("--text-wght", 80);

(textAxis == "width" || textAxis == "weightWidth")?

elements[i].style.setProperty("--text-wdth", rankWidth(d.rank))

:elements[i].style.setProperty("--text-wdth", 400);

(textAxis == "xhigh" || textAxis == "weightXhigh" )?

elements[i].style.setProperty("--text-YTLC", rankYTLC(d.rank))

:elements[i].style.setProperty("--text-YTLC", 500);

})

}

rankWeight = d3.scaleQuantile()

.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]

.range([50, 100, 100, 150, 150, 150, 200, 200, 250]) // Amstelvar weight range (0, 250)

rankWidth = d3.scaleQuantile()

.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]

.range([100, 175, 175, 250, 250, 250, 325, 325, 400]) // Amstelvar width range (50, 400)

rankYTLC = d3.scaleQuantile()

.domain(words.map(d => d.rank)) // Alice in Wonderland: [1, 20000]

.range([450, 487.5, 487.5, 525, 525, 525, 562.5, 562.5, 600]) // Amstelvar width range (50, 400)

d3 = {

const d3 = await require("d3@5", "d3-array@2");

return d3;

}

// nlp = {

// let nlp = await require('compromise/builds/compromise-tokenize.js')

// let plugin = await require('compromise-paragraphs')

// nlp.extend(plugin)

// return nlp

// }

nlp = require('compromise')

nlp.version

import {select} from "@jashkenas/inputs"

import {textarea} from "@jashkenas/inputs"

placeholder = (await FileAttachment("Alice in Wonderland.txt").text()).slice(0, 1510)

//VARIABLE FONTS! If this visualization isn't working you might need to update or switch browsers (tested on firefox & chrome). I was able to get this running thanks to other some example experiments with v-fonts done by (@jmahabal/variable-fonts-playground).

//Amstelvar (https://github.com/TypeNetwork/Amstelvar) is a variable fonts with many low level controls: 15+ variable axes to use. The axes names are based on this (https://variationsguide.typenetwork.com/) proposal for standardizing v-fonts. The quick rundown is that any "Opaque" variable is conrolling how thick the black of a single glyph is while any "Transparent" variable controls how much white negative space there is. X-height is a common element discussed in font design but it is almost always fixed in most font families. With YTLC (Y Transparent Lower Case) we can actually set the x-height on Amstelvar.

html`

<style>

@font-face {

/* https://github.com/TypeNetwork/Amstelvar */

/* using the gstatic uploads created by observablehq.com/@jmahabal/variable-fonts-playground */

/* easier than uploading my own but might be the source of some discrepancies with dev docs? */

/* also possibly the reason I can't use "font-variation-settings: normal" and need to define each individually */

font-family: 'Amstelvar';

src: url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.eot);

src: url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.eot?#iefix) format('embedded-opentype'),

url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.woff2) format('woff2'),

url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.woff) format('woff'),

url(//fonts.gstatic.com/ea/amstelvaralpha/v2/AmstelvarAlpha-VF.ttf) format('truetype');

font-weight: 300 900;

font-stretch: 35% 100%;

font-style: normal;

font-display: swap;

}

.background {fill: #fffbeb; background-color: #fffbeb;}

.title {

font: 24px "Lato", sans-serif;

fill: #263c54;

font-weight: 700;

}

.other {

font: 20px "Lato", sans-serif;

fill: #263c54;

font-weight: 400;

}

.chart {

font-family: "Amstelvar", serif;

--text-wght: 80;

--text-wdth: 400;

--text-YTLC: 400;

}

.word {

fill: #263c54;

font-variation-settings: 'wght' var(--text-wght),

'wdth' var(--text-wdth),

'YTLC' var(--text-YTLC);

}

</style>`

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more