Public
Edited
Jul 17, 2024
6 forks
Importers
85 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
aq.table({
country: ['USA', 'USA', 'Canada', 'Canada'],
medal: ['gold', 'silver', 'gold', 'silver'],
count: [10, 20, 7, 26]
})
.groupby('country') // ensures country is retained as a column
.pivot('medal', 'count') // medal provides keys (column names), count provides values
.view()
Insert cell
Insert cell
aq.table({
country: ['USA', 'USA', 'Canada', 'Canada'],
medal: ['gold', 'silver', 'gold', 'silver'],
count: [10, 20, 7, 26]
})
// note there is no groupby, we will collapse across countries!
.pivot('medal', { total: d => op.sum(d.count) }) // sum counts within a cell
.view()
Insert cell
Insert cell
Insert cell
aq.table({
country: ['USA', 'Canada'],
gold: [10, 7],
silver: [20, 26]
})
.fold(aq.not('country'), { as: ['medal', 'count'] })
.view()
Insert cell
Insert cell
Insert cell
aq.table({ w: ["if I've said it once", "I've said it a thousand times"] })
.derive({ w: d => op.split(d.w, ' ') }) // split on spaces into arrays, replace previous column
.unroll('w') // unroll arrays, with one row per value
.groupby('w') // group by words
.count() // count word occurrences
.view()
Insert cell
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
return aq.fromCSV(
'date\n1/1/2000\n06/01/2010\n12/10/2020',
{ parse: { date: d3.timeParse('%m/%d/%Y') } }
)
.view()
}
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
// create and register a date parser under the name 'parse_mdy'
// use { override: true } to suppress errors if we re-evaluate this cell
aq.addFunction('parse_mdy', d3.timeParse('%m/%d/%Y'), { override: true });
return aq
.table({ date: ['1/1/2000', '06/01/2010', '12/10/2020'] })
.derive({ date: d => op.parse_mdy(d.date) })
.view()
}
Insert cell
Insert cell
{
// import d3-time-format
const d3 = await require('d3-time-format');
const parseMDY = d3.timeParse('%m/%d/%Y');
// escape our derive expression to call the function as-is, including closures
return aq
.table({ date: ['1/1/2000', '06/01/2010', '12/10/2020'] })
.derive({ date: aq.escape(d => parseMDY(d.date)) })
.view()
}
Insert cell
Insert cell
function normalize_column(name) {
return name.toLowerCase() // map to lower case
.replace(/[%#$£()\'\"]/g, '') // remove unwanted characters
.replace(/[ /,+.*:\-\r\n@]/g, '_') // replace spacing and punctuation with an underscore
.replace(/_+/g, '_') // collapse repeated underscores
.normalize('NFD') // perform unicode normalization
.replace(/[\u0300-\u036f]/g, ''); // strip accents from characters
}
Insert cell
Insert cell
function normalize(table) {
const name = table.columnNames();
return aq.table({ name, norm: name.map(normalize_column) }) // create table of names & normalized names
.groupby('norm') // group by normalized name
.derive({ index: op.row_number(), count: op.count() }) // count duplicates, generate index for each
.objects() // generate an array of { name, norm } objects
.map(o => ({ [o.name]: o.norm + (o.count > 1 ? `_${o.index}` : '') })); // rename, adding index as needed
}
Insert cell
Insert cell
aq.table({ 'FOO%+BAr': [1], 'FOO++BAr': [2], 'COL.1': [3], 'käse': [4] })
.select(normalize)
.columnNames()
Insert cell
Insert cell
Insert cell
aq.table({
region: ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
quarter: ['Q1', 'Q2', 'Q3', 'Q4', 'Q1', 'Q2', 'Q3', 'Q4'],
sales: [6, 5, 3, 2, 4, 8, 2, 6]
})
.groupby('quarter') // we want to compute percentage of sales for that quarter
.derive({ percent: d => d.sales / op.sum(d.sales) || 0 }) // include `|| 0` in case sum is zero
.orderby('quarter') // sort by quarter to ease comparison
.view()
Insert cell
Insert cell
aq.table({ x: [1, 1, 2, 2, 3, 3, 4, 4, 5, 5] })
.orderby('x')
.derive({ ra: aq.rolling(d => op.sum(d.x)) })
.view()
Insert cell
Insert cell
aq.table({ x: [1.1, 2.2, 3.2, 4.3, 6.5, 8.6, 9.7, 10.1] })
.orderby('x')
.derive({ ra: aq.rolling(d => op.average(d.x), [-2, 0]) })
.view()
Insert cell
Insert cell
Insert cell
aq.bin('x', { maxbins: 10 })
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 4, 5, 5, 5, 6, 6, 8, 9, 10] })
.groupby({
bin_0: aq.bin('x', {maxbins: 10}),
bin_1: aq.bin('x', {maxbins: 10, offset: 1})
})
.count()
.view()
Insert cell
Insert cell
// Note: T\the aq.bin and op.bins calls should have the same parameters!
aq.table({ x: [1.1, 3.2, 4.1, 4.2, 5.1, 5.3, 5.4, 6.1, 6.6, 8.8, 9.3, 10] })
.groupby({ bin: aq.bin('x') })
.count()
.impute(
{ count: () => 0 }, // set imputed counts to zero
{ expand: { bin: d => op.sequence(...op.bins(d.bin)) } } // include rows for all bin values
// Note: op.bins(d.bin) -> [start, stop, step]
// so op.sequence(...op.bins(d.bin)) -> op.sequence(start, stop, step)
)
.orderby('bin')
.view();
Insert cell
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.rollup({
average: d => op.average(d.x),
mean: d => op.mean(d.x), // mean and average are synonyms
geom_mean: d => op.exp(op.mean(op.log(d.x))),
harm_mean: d => 1 / op.mean(1 / d.x)
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, NaN, 4, 6, 8, 9, 10] })
.rollup({
samples: d => op.valid(d.x), // invalid values are excluded!
stdev: d => op.stdev(d.x),
stderr: d => op.stdev(d.x) / op.sqrt(op.valid(d.x))
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.rollup({
lo: d => op.mean(d.x) - 1.96 * op.stdev(d.x) / op.sqrt(op.valid(d.x)),
hi: d => op.mean(d.x) + 1.96 * op.stdev(d.x) / op.sqrt(op.valid(d.x))
})
.view()
Insert cell
Insert cell
Insert cell
// compute 1000 bootstrapped mean estimates
// then find quantile boundaries for 95% CI
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.sample(aq.frac(1000), { replace: true })
.derive({ id: d => op.row_number() % 1000 })
.groupby('id')
.rollup({ mu: op.mean('x') })
.rollup({
lo: op.quantile('mu', 0.025), // exclude the bottom 2.5%
hi: op.quantile('mu', 0.975) // exclude the top 2.5%
})
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ z: ({ x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ dev: d => d.x - op.median(d.x) })
.rollup({ mad: d => op.median(op.abs(d.dev)) })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 6, 8, 9, 10] })
.derive({ dev: d => d.x - op.median(d.x) })
.derive({ dev: d => d.dev / op.median(op.abs(d.dev)) || 0 })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 1, 2, 2, 3, 4, 5],
b: [1, 2, 2, 1, 3, 4, 10]
})
.derive({
za: ({ a: x }) => (x - op.mean(x)) / op.stdev(x) || 0,
zb: ({ b: x }) => (x - op.mean(x)) / op.stdev(x) || 0,
})
.derive({
dm: ({za, zb}) => {
const s = op.covariance(za, zb) || 0; // internally calculated once and reused
const di = 1 / (1 - s * s); // inverse determinant of covariance matrix
return op.sqrt(di * (za * za - 2 * s * za * zb + zb * zb));
}
})
.select(aq.not('za', 'zb'))
.view(10)
Insert cell
Insert cell
aq.table({ x: [1, 1, 1, 2, 2, 3] })
.groupby('x').count({ as: 'n' }) // get counts
.derive({ p: d => d.n / op.sum(d.n) }) // from counts to probabilities
.rollup({ H: d => -op.sum(d.p * op.log2(d.p)) }) // entropy
.view()
Insert cell
Insert cell
// first compute histograms for (a, b), a, and b
// then compute mutual information I(a, b) and entropy H(a, b)
// using both we can also compute a normalized distance
aq.table({
a: [1, 1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 8, 7, 6, 5]
})
.groupby('a', 'b').count({as: 'n'})
.groupby('a').derive({ nx: d => op.sum(d.n) })
.groupby('b').derive({ ny: d => op.sum(d.n) })
.ungroup().derive({ s: d => op.sum(d.n) })
.rollup({
I: d => op.sum((d.n / d.s) * op.log2((d.n / d.nx) * (d.s / d.ny))),
H: d => -op.sum((d.n / d.s) * op.log2(d.n / d.s))
})
.derive({ Distance: d => 1 - d.I / d.H })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 7, 6, 5]
})
.rollup({ corr: d => op.corr(d.a, d.b) })
.view()
Insert cell
Insert cell
aq.table({
a: [1, 2, 2, 3, 4, 5],
b: [9, 8, 8, 7, 6, 5]
})
.orderby('a').derive({ ra: op.avg_rank() })
.orderby('b').derive({ rb: op.avg_rank() })
.rollup({ rho: d => op.corr(d.ra, d.rb) })
.view()
Insert cell
Insert cell
aq.table({
a: [3, 4, 5, 6, 7],
b: [1, 2, 3, 4, 5]
})
.rollup({
cohens_d: ({ a, b }) => {
const va = (op.valid(a) - 1) * op.variance(a);
const vb = (op.valid(b) - 1) * op.variance(b);
const sd = op.sqrt((va + vb) / (op.valid(a) + op.valid(b) - 2));
return sd ? (op.mean(a) - op.mean(b)) / sd : 0;
}
})
.view()
Insert cell
Insert cell
Insert cell
aq.table({ x: [1, 2, 3], y: [4, 5, 6] })
.rollup({ dot: d => op.sum(d.x * d.y) })
.view()
Insert cell
Insert cell
aq.table({ x: [1, 2, 3, 4, 5] })
.rollup({
'L1 norm': d => op.sum(op.abs(d.x)),
'L2 norm': d => op.sqrt(op.sum(d.x * d.x)),
'L3 norm': d => op.cbrt(op.sum(op.pow(op.abs(d.x), 3))),
'L∞ norm': d => op.max(op.abs(d.x))
})
.view()
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more