Public
Edited
Dec 12, 2023
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
aq.table({
country: ['USA', 'USA', 'Canada', 'Canada'],
medal: ['gold', 'silver', 'gold', 'silver'],
count: [10, 20, 7, 26]
})
.groupby('country') // ensures country is retained as a column
.pivot('medal', 'count') // medal provides keys (column names), count provides values
.view()
Insert cell
Insert cell
aq.table({
country: ['USA', 'USA', 'Canada', 'Canada'],
medal: ['gold', 'silver', 'gold', 'silver'],
count: [10, 20, 7, 26]
})
// note there is no groupby, we will collapse across countries!
.pivot('medal', { total: d => op.sum(d.count) }) // sum counts within a cell
.view()
Insert cell
Insert cell
Insert cell
aq.table({
country: ['USA', 'Canada'],
gold: [10, 7],
silver: [20, 26]
})
.fold(aq.not('country'), { as: ['medal', 'count'] })
.view()
Insert cell
Insert cell
Insert cell
aq.table({ w: ["if I've said it once", "I've said it a thousand times"] })
.derive({ w: d => op.split(d.w, ' ') }) // split on spaces into arrays, replace previous column
.unroll('w') // unroll arrays, with one row per value
.groupby('w') // group by words
.count() // count word occurrences
.view()
Insert cell
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
return aq.fromCSV(
'date\n1/1/2000\n06/01/2010\n12/10/2020',
{ parse: { date: d3.timeParse('%m/%d/%Y') } }
)
.view()
}
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
// create and register a date parser under the name 'parse_mdy'
// use { override: true } to suppress errors if we re-evaluate this cell
aq.addFunction('parse_mdy', d3.timeParse('%m/%d/%Y'), { override: true });
return aq
.table({ date: ['1/1/2000', '06/01/2010', '12/10/2020'] })
.derive({ date: d => op.parse_mdy(d.date) })
.view()
}
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
const parseMDY = d3.timeParse('%m/%d/%Y');
// escape our derive expression to call the function as-is, including closures
return aq
.table({ date: ['1/1/2000', '06/01/2010', '12/10/2020'] })
.derive({ date: aq.escape(d => parseMDY(d.date)) })
.view()
}
Insert cell
Insert cell
function normalize_column(name) {
return name.toLowerCase() // map to lower case
.replace(/[%#$£()\'\"]/g, '') // remove unwanted characters
.replace(/[ /,+.*:\-\r\n@]/g, '_') // replace spacing and punctuation with an underscore
.replace(/_+/g, '_') // collapse repeated underscores
.normalize('NFD') // perform unicode normalization
.replace(/[\u0300-\u036f]/g, ''); // strip accents from characters
}
Insert cell
Insert cell
function normalize(table) {
const name = table.columnNames();
return aq.table({ name, norm: name.map(normalize_column) }) // create table of names & normalized names
.groupby('norm') // group by normalized name
.derive({ index: op.row_number(), count: op.count() }) // count duplicates, generate index for each
.objects() // generate an array of { name, norm } objects
.map(o => ({ [o.name]: o.norm + (o.count > 1 ? `_${o.index}` : '') })); // rename, adding index as needed
}
Insert cell
Insert cell
aq.table({ 'FOO%+BAr': [1], 'FOO++BAr': [2], 'COL.1': [3], 'käse': [4] })
.select(normalize)
.columnNames()
Insert cell
Insert cell
Insert cell
aq.table({
region: ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
quarter: ['Q1', 'Q2', 'Q3', 'Q4', 'Q1', 'Q2', 'Q3', 'Q4'],
sales: [6, 5, 3, 2, 4, 8, 2, 6]
})
.groupby('quarter') // we want to compute percentage of sales for that quarter
.derive({ percent: d => d.sales / op.sum(d.sales) || 0 }) // include `|| 0` in case sum is zero
.orderby('quarter') // sort by quarter to ease comparison
.view()
Insert cell
Insert cell
aq.table({ x: [1, 1, 2, 2, 3, 3, 4, 4, 5, 5] })
.orderby('x')
.derive({ ra: aq.rolling(d => op.sum(d.x)) })
.view()
Insert cell
Insert cell
aq.table({ x: [1.1, 2.2, 3.2, 4.3, 6.5, 8.6, 9.7, 10.1] })
.orderby('x')
.derive({ ra: aq.rolling(d => op.average(d.x), [-2, 0]) })
.view()
Insert cell
Insert cell
Insert cell
aq.bin('x', { maxbins: 10 })
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 4, 5, 5, 5, 6, 6, 8, 9, 10] })
.groupby({
bin_0: aq.bin('x', {maxbins: 10}),
bin_1: aq.bin('x', {maxbins: 10, offset: 1})
})
.count()
.view()
Insert cell
Insert cell
// Note: T\the aq.bin and op.bins calls should have the same parameters!
aq.table({ x: [1.1, 3.2, 4.1, 4.2, 5.1, 5.3, 5.4, 6.1, 6.6, 8.8, 9.3, 10] })
.groupby({ bin: aq.bin('x') })
.count()
.impute(
{ count: () => 0 }, // set imputed counts to zero
{ expand: { bin: d => op.sequence(...op.bins(d.bin)) } } // include rows for all bin values
// Note: op.bins(d.bin) -> [start, stop, step]
// so op.sequence(...op.bins(d.bin)) -> op.sequence(start, stop, step)
)
.orderby('bin')
.view();
Insert cell
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.rollup({
average: d => op.average(d.x),
mean: d => op.mean(d.x), // mean and average are synonyms
geom_mean: d => op.exp(op.mean(op.log(d.x))),
harm_mean: d => 1 / op.mean(1 / d.x)
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, NaN, 4, 6, 8, 9, 10] })
.rollup({
samples: d => op.valid(d.x), // invalid values are excluded!
stdev: d => op.stdev(d.x),
stderr: d => op.stdev(d.x) / op.sqrt(op.valid(d.x))
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.rollup({
lo: d => op.mean(d.x) - 1.96 * op.stdev(d.x) / op.sqrt(op.valid(d.x)),
hi: d => op.mean(d.x) + 1.96 * op.stdev(d.x) / op.sqrt(op.valid(d.x))
})
.view()
Insert cell
Insert cell
Insert cell
// compute 1000 bootstrapped mean estimates
// then find quantile boundaries for 95% CI
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.sample(aq.frac(1000), { replace: true })
.derive({ id: d => op.row_number() % 1000 })
.groupby('id')
.rollup({ mu: op.mean('x') })
.rollup({
lo: op.quantile('mu', 0.025), // exclude the bottom 2.5%
hi: op.quantile('mu', 0.975) // exclude the top 2.5%
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ z: ({ x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ dev: d => d.x - op.median(d.x) })
.rollup({ mad: d => op.median(op.abs(d.dev)) })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ dev: d => d.x - op.median(d.x) })
.derive({ dev: d => d.dev / op.median(op.abs(d.dev)) || 0 })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 1, 2, 2, 3, 4, 5],
b: [1, 2, 2, 1, 3, 4, 10]
})
.derive({
za: ({ a: x }) => (x - op.mean(x)) / op.stdev(x) || 0,
zb: ({ b: x }) => (x - op.mean(x)) / op.stdev(x) || 0,
})
.derive({
dm: d => {
const s = op.covariance(d.za, d.zb) || 0; // internally calculated once and reused
const di = 1 / (1 - s * s); // inverse determinant of covariance matrix
return op.sqrt(di * (d.za * d.za - 2 * s * d.za * d.zb + d.zb * d.zb));
}
})
.select(aq.not('za', 'zb'))
.view(10)
Insert cell
Insert cell
aq.table({ x: [1, 1, 1, 2, 2, 3] })
.groupby('x').count({ as: 'n' }) // get counts
.derive({ p: d => d.n / op.sum(d.n) }) // from counts to probabilities
.rollup({ H: d => -op.sum(d.p * op.log2(d.p)) }) // entropy
.view()
Insert cell
Insert cell
// first compute histograms for (a, b), a, and b
// then compute mutual information I(a, b) and entropy H(a, b)
// using both we can also compute a normalized distance
aq.table({
a: [1, 1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 8, 7, 6, 5]
})
.groupby('a', 'b').count({as: 'n'})
.groupby('a').derive({ nx: d => op.sum(d.n) })
.groupby('b').derive({ ny: d => op.sum(d.n) })
.ungroup().derive({ s: d => op.sum(d.n) })
.rollup({
I: d => op.sum((d.n / d.s) * op.log2((d.n / d.nx) * (d.s / d.ny))),
H: d => -op.sum((d.n / d.s) * op.log2(d.n / d.s))
})
.derive({ Distance: d => 1 - d.I / d.H })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 7, 6, 5]
})
.rollup({ corr: d => op.corr(d.a, d.b) })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 7, 6, 5]
})
.orderby('a').derive({ ra: op.avg_rank() })
.orderby('b').derive({ rb: op.avg_rank() })
.rollup({ rho: d => op.corr(d.ra, d.rb) })
.view()
Insert cell
Insert cell
aq.table({
a: [3, 4, 5, 6, 7],
b: [1, 2, 3, 4, 5]
})
.rollup({
cohens_d: ({ a, b }) => {
const va = (op.valid(a) - 1) * op.variance(a);
const vb = (op.valid(b) - 1) * op.variance(b);
const sd = op.sqrt((va + vb) / (op.valid(a) + op.valid(b) - 2));
return sd ? (op.mean(a) - op.mean(b)) / sd : 0;
}
})
.view()
Insert cell
Insert cell
Insert cell
aq.table({ x: [1, 2, 3], y: [4, 5, 6] })
.rollup({ dot: d => op.sum(d.x * d.y) })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 5] })
.rollup({
'L1 norm': d => op.sum(op.abs(d.x)),
'L2 norm': d => op.sqrt(op.sum(d.x * d.x)),
'L3 norm': d => op.cbrt(op.sum(op.pow(op.abs(d.x), 3))),
'L∞ norm': d => op.max(op.abs(d.x))
})
.view()
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more