tidy.js – Intro & Demo / Peter Beshai

Peter Beshai

Workspace

Published

tidy.js

Edited

Feb 24, 2021

4 forks

Importers

123 stars

output = tidy(

cars,

groupBy(

['cyl', 'gear'],

[

summarize({

n: n(),

mpg: mean('mpg'),

hp: mean('hp'),

wt: mean('wt')

})

]

select(['cyl', 'gear', everything()]),

arrange([desc('n'), desc('mpg')])

)

table(

tidy(

table1,

mutate({

rate: d => (d.cases / d.population) * 10000,

'|': '|', // visual spacer for table()

// alternatively, use the `rate()` function for safety around division by 0 and nully values

// rate2: rate('cases', 'population'),

rate2: rate(d => d.cases * 10000, 'population'),

// or the TMath.rate function directly:

rate3: d => TMath.rate(d.cases, d.population) * 10000

})

)

table(tidy(table1, count('year', { wt: 'cases' })))

table(table4a)

table(

tidy(

table4a,

pivotLonger({

cols: ['1999', '2000'],

namesTo: 'year',

valuesTo: 'cases'

})

)

table(

tidy(

table4b,

pivotLonger({

cols: ['1999', '2000'],

namesTo: 'year',

valuesTo: 'population'

})

)

{

let tidy4a = tidy(

table4a,

pivotLonger({

cols: ['1999', '2000'],

namesTo: 'year',

valuesTo: 'cases'

})

);

let tidy4b = tidy(

table4b,

pivotLonger({

cols: ['1999', '2000'],

namesTo: 'year',

valuesTo: 'population'

})

);

return table(tidy(tidy4a, leftJoin(tidy4b)));

}

table(table2)

table(tidy(table2, pivotWider({ namesFrom: 'type', valuesFrom: 'count' })))

// or re-arrange

table(

tidy(

table2,

pivotWider({ namesFrom: 'type', valuesFrom: 'count' }),

select(['country', 'year', everything()])

)

table(table3)

// tidy doesn't provide a separate command (yet?), but you could use mutate

table(

tidy(

table3,

mutate({

rate: d => d.rate.split('/'),

cases: d => +d.rate[0],

population: d => +d.rate[1]

}),

select('-rate')

)

// or you could use map

table(

tidy(

table3,

map(d => {

const [cases, population] = d.rate.split('/').map(x => +x);

return { ...d, cases, population };

}),

select('-rate')

)

// there's no unite (yet), but a mutate + select will do the trick

table(

tidy(

table5,

mutate({ new: d => `${d.century}_${d.year}` }),

select(['-century', '-year'])

)

table(

tidy(

table5,

mutate({ new: d => +`${d.century}${d.year}` }),

select(['-century', '-year'])

)

table(stocks)

table(tidy(stocks, pivotWider({ namesFrom: 'year', valuesFrom: 'return' })))

// drop out missing values

table(

tidy(

stocks,

pivotWider({ namesFrom: 'year', valuesFrom: 'return' }),

pivotLonger({

cols: ['2015', '2016'],

namesTo: 'year',

valuesTo: 'return'

}),

// pivotLonger does not have values_drop_na at the moment

filter(d => d.return != null)

)

// use complete to make missing values explicit

table(tidy(stocks, complete(['year', 'qtr'])))

treatment = [

{ person: 'Derrick Whitmore', treatment: 1, response: 7 },

{ treatment: 2, response: 10 },

{ person: undefined, treatment: 3, response: 9 },

{ person: 'Katherine Burke', treatment: 1, response: 4 }

]

table(treatment)

// fill in columns with last seen values

table(tidy(treatment, fill('person')))

table(who.slice(0, 20))

who1 = tidy(

who,

pivotLonger({ cols: startsWith('new'), namesTo: 'key', valuesTo: 'cases' }),

filter(d => d.cases !== 'NA')

)

table(who1.slice(0, 20))

table(tidy(who1, count('key')))

who2 = tidy(who1, mutate({ key: d => d.key.replace('newrel', 'new_rel') }))

table(who2.filter(d => d.key.includes('new_rel')).slice(0, 15))

who3 = tidy(

who2,

mutate({

key: d => d.key.split('_'),

new: d => d.key[0],

type: d => d.key[1],

sexage: d => d.key[2]

}),

select(['-key'])

)

table(who3.slice(0, 6))

table(tidy(who3, count('new')))

who4 = tidy(who3, select(['-new', '-iso2', '-iso3']))

who5 = tidy(

who4,

mutate({

sexage: d => [d.sexage[0], d.sexage.substring(1)],

sex: d => d.sexage[0],

age: d => +d.sexage[1]

}),

select('-sexage')

)

table(who5.slice(0, 10))

// all together now

table(

tidy(

who,

pivotLonger({ cols: startsWith('new'), namesTo: 'key', valuesTo: 'cases' }),

filter(d => d.cases !== 'NA'),

mutate({ key: d => d.key.replace('newrel', 'new_rel') }),

// separate equivalent:

mutate({

key: d => d.key.split('_'),

new: d => d.key[0],

type: d => d.key[1],

sexage: d => d.key[2]

}),

// separate equivalent:

mutate({

sexage: d => [d.sexage[0], d.sexage.substring(1)],

sex: d => d.sexage[0],

age: d => +d.sexage[1]

}),

select(['-key', '-new', '-iso2', '-iso3', '-sexage'])

).slice(0, 20)

)

viewof tidyjsfile = html`<input type=file accept=".js">`

d3 = require('d3-array', 'd3-dsv')

module = require(URL.createObjectURL(tidyjsfile))

import {

tidy,

startsWith,

groupBy,

summarize,

sum,

mutate,

select,

mean,

everything,

arrange,

desc,

filter,

rate,

TMath,

pivotWider,

pivotLonger,

leftJoin,

count,

map,

complete,

fill

} from '@pbeshai/tidyjs' // observable import

module_attachment = require(await FileAttachment("tidy.js").url())

import {Table} from "@observablehq/inputs"

// switch from tmcw table to observablehq and just use same name

table = Table

table1csv = `country,year,cases,population

Afghanistan,1999,745,19987071

Afghanistan,2000,2666,20595360

Brazil,1999,37737,172006362

Brazil,2000,80488,174504898

China,1999,212258,1272915272

China,2000,213766,1280428583`

table1 = d3.csvParse(table1csv, d => ({ ...d, year: +d.year, cases: +d.cases, population: +d.population }))

table2csv = `country,year,type,count

Afghanistan,1999,cases,745

Afghanistan,1999,population,19987071

Afghanistan,2000,cases,2666

Afghanistan,2000,population,20595360

Brazil,1999,cases,37737

Brazil,1999,population,172006362

Brazil,2000,cases,80488

Brazil,2000,population,174504898

China,1999,cases,212258

China,1999,population,1272915272

China,2000,cases,213766

China,2000,population,1280428583`

table2 = d3.csvParse(table2csv, d => ({ ...d, year: +d.year, count: +d.count }))

table3csv=`country,year,rate

Afghanistan,1999,745/19987071

Afghanistan,2000,2666/20595360

Brazil,1999,37737/172006362

Brazil,2000,80488/174504898

China,1999,212258/1272915272

China,2000,213766/1280428583

table3 = d3.csvParse(table3csv, d => ({ ...d, year: +d.year }))

table4acsv = `country,1999,2000

Afghanistan,745,2666

Brazil,37737,80488

China,212258,213766`

table4a = d3.csvParse(table4acsv, d => ({ ...d, '1999': +d['1999'], '2000': +d['2000'] }))

table4bcsv = `country,1999,2000

Afghanistan,19987071,20595360

Brazil,172006362,174504898

China,1272915272,1280428583`

table4b = d3.csvParse(table4bcsv, d => ({ ...d, '1999': +d['1999'], '2000': +d['2000'] }))

table5 = tidy(

table3,

mutate({

century: d => `${Math.floor(d.year / 100)}`,

year: d => `${d.year % 100}`.padStart(2, '0')

}),

select(['country', 'century', 'year', 'rate'])

)

whocsv = await FileAttachment("who.csv").text()

who = d3.csvParse(whocsv, d => ({ ...d, year: +d.year,}))

stocks = [

{ year: 2015, qtr: 1, return: 1.88 },

{ year: 2015, qtr: 2, return: 0.59 },

{ year: 2015, qtr: 3, return: 0.35 },

{ year: 2015, qtr: 4, return: null },

{ year: 2016, qtr: 2, return: 0.92 },

{ year: 2016, qtr: 3, return: 0.17 },

{ year: 2016, qtr: 4, return: 2.66 }

]

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more