Tidy Data in JavaScript / UW Interactive Data Lab

UW Interactive Data Lab

Data visualization and analysis at the University of Washington, Seattle.

Workspace

Published

Edited

May 12, 2021

2 forks

Importers

76 stars

table1.view()

table2.view({ height: 400 })

table3.view()

table4a.view()

table4b.view()

// Compute rate per 10,000

table1

.derive({ rate: d => d.cases / d.population * 10000 })

.view()

// Compute cases per year

table1

.groupby('year')

.rollup({ cases: op.sum('cases') })

.view()

// Visualize changes over time

vl.markLine({ point: true })

.data(table1)

.encode(

vl.x().fieldQ('year').axis({ tickMinStep: 1, format: 'd' }),

vl.y().fieldQ('cases'),

vl.color().fieldN('country')

)

.render()

table4a.view()

viewof fold4a = table4a

.fold(['1999', '2000'], { as: ['year', 'cases'] })

.view()

viewof fold4b = table4b

.fold(['1999', '2000'], { as: ['year', 'population'] })

.view()

fold4a

.join_left(fold4b)

.view()

table2.view(10)

table2

.groupby('country', 'year')

.pivot('type', 'count')

.view()

table3.view()

table3

.spread({ rate: d => op.split(d.rate, '/') }, { as: ['cases', 'population' ] })

.view()

// this time coercing split values to numbers

table3

.spread({ rate: d => op.split(d.rate, '/') }, { as: ['cases', 'population' ] })

.derive({ cases: d => +d.cases, population: d => +d.population })

.view()

viewof table5 = table3

.spread({ year: d => op.match(d.year, /.{2}/g) }, { as: ['century', 'year'] })

.view()

table5

.derive({ new: d => `${d.century}_${d.year}` }, { before: 'year' })

.select(aq.not('year', 'century'))

.view();

table5

.derive({ new: d => +`${d.century}${d.year}` }, { before: 'year' })

.select(aq.not('year', 'century'))

.view();

stocks = aq.table({

year: [2015, 2015, 2015, 2015, 2016, 2016, 2016],

qtr: [ 1, 2, 3, 4, 2, 3, 4],

return: [1.88, 0.59, 0.35, null, 0.92, 0.17, 2.66]

})

stocks.view()

stocks

.groupby('qtr')

.pivot('year', 'return')

.view()

stocks

.groupby('qtr')

.pivot('year', 'return')

.fold(['2015', '2016'], { as: ['year', 'return'] })

.view()

stocks

.groupby('qtr')

.pivot('year', 'return')

.fold(['2015', '2016'], { as: ['year', 'return'] })

.filter(d => d.return != null)

.view()

stocks

.groupby('qtr')

.impute({ return: () => 0 })

.view()

// expand rows to include all year x qtr combinations

stocks

.impute({ return: () => null }, { expand: ['year', 'qtr'] })

.orderby('year', 'qtr')

.view()

viewof treatment = aq.table({

person: ['Derrick Whitmore', null, null, 'Katherine Burke'],

treatment: [1, 2, 3, 1],

response: [7, 10, 9, 4]

}).view()

// fill missing values in a downward direction

treatment

.derive({ person: op.fill_down('person') })

.view()

// Note: NA values from R show up here as 'NA' strings

who.view(10)

viewof who1 = who

.fold(aq.range('new_sp_m014', 'newrel_f65'), { as: ['key', 'cases' ] })

.filter(d => d.cases !== 'NA')

.view(10)

who1

.groupby('key')

.count()

.orderby('key')

.view(10)

viewof who2 = who1

.derive({ key: d => op.replace(d.key, 'newrel', 'new_rel') })

.view(10)

viewof who3 = who2

.spread({ key: d => op.split(d.key, '_') }, { as: ['new', 'type', 'sexage'] })

.view(10)

// check count of unique new values -- it matches the total row count!

who3

.groupby('new')

.count()

.view()

// drop the new, iso2, and iso3 columns

who4 = who3

.select(aq.not('new', 'iso2', 'iso3'))

viewof who5 = who4

.derive({

sex: d => d.sexage[0],

age: d => op.slice(d.sexage, 1)

})

.select(aq.not('sexage'))

.view(10)

// collapse operations above into a single pipeline

who

.fold(aq.range('new_sp_m014', 'newrel_f65'), { as: ['key', 'cases' ] })

.filter(d => d.cases !== 'NA')

.derive({ key: d => op.replace(d.key, 'newrel', 'new_rel') })

.spread({ key: d => op.split(d.key, '_') }, { as: ['new', 'type', 'sexage'] })

.derive({

sex: d => d.sexage[0],

age: d => op.slice(d.sexage, 1)

})

.select(aq.not('new', 'iso2', 'iso3', 'sexage'))

.view(10)

// further compress using regular expression matching

who

.fold(aq.range('new_sp_m014', 'newrel_f65'), { as: ['key', 'cases' ] })

.filter(d => d.cases !== 'NA')

.reify() // reallocates new table without filtered rows

.derive({ key: d => op.replace(d.key, 'newrel', 'new_rel') })

.spread({ key: d => op.match(d.key, /new_([a-z]{2})_(m|f)(\d+)/) }, { as: ['new', 'type', 'sex', 'age'] })

.select(aq.not('new', 'iso2', 'iso3'))

.view(10)

aq.fromCSV(crime_csv(), { header: false }) // <- parse CSV data, provided by the crime_csv method

.view(10)

aq.fromCSV(crime_csv(), { header: false, names: ['year', 'rate'] }) // <- provide column names

.view(10)

aq.fromCSV(crime_csv(), { header: false, names: ['year', 'rate'] })

.filter(d => d.year != null || d.rate != null) // <-- remove if both columns are empty

.view(10)

aq.fromCSV(crime_csv(), { header: false, names: ['year', 'rate'] })

.filter(d => d.year != null || d.rate != null)

.derive({

state: d => op.match(d.year, /Reported crime in (.*)/, 1) // <- extract state name

}, { before: 0 }) // <- make new column the first column in the output table

.view(10)

aq.fromCSV(crime_csv(), { header: false, names: ['year', 'rate'] })

.filter(d => d.year != null || d.rate != null)

.derive({

state: d => op.fill_down(op.match(d.year, /Reported crime in (.*)/, 1)) // <- also fill in state names

}, { before: 0 })

.view(10)

aq.fromCSV(crime_csv(), { header: false, names: ['year', 'rate'] })

.filter(d => d.year != null || d.rate != null)

.derive({

state: d => op.fill_down(op.match(d.year, /Reported crime in (.*)/, 1)) // <- extract state name

}, { before: 0 })

.filter(d => d.rate != null) // <-- or, we could delete when year column starts with "Reported crime in"

.view(100)

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more