Public
Edited
Dec 21, 2022
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
dfd = require('danfojs@0.0.15/dist/index.min.js').catch(() => window.dfd)
Insert cell
Insert cell
import {db, duckdb, DuckDBClient} from '@cmudig/duckdb'
Insert cell
raw = {
const letters = "abcdefghijklmnopqrstuvwxyz"
const len = 10_000
let data = []
let aqTable = {
IDX:[],
LTR:[],
VAL: []
}
let sqlVals = []
for (let i=0; i<len;i++){
const rand = Math.random()
const letter = letters[Math.floor(letters.length * rand)]
data.push({
index: i,
letter: letter,
value: rand
})
aqTable["IDX"].push(i)
aqTable["LTR"].push(letter)
aqTable["VAL"].push(rand)
sqlVals.push(`(${i}, '${letter}', ${rand})`)
}
return {
data,
sqlVals,
df: aq.table(aqTable)
}
}
Insert cell
client= {
const duck = new DuckDBClient();
await duck.query(`CREATE TABLE dt(IDX INTEGER, LTR STRING, val FLOAT)`);
await duck.query(
`INSERT INTO dt VALUES ${raw.sqlVals.join(', ')};`
);
return duck
}
Insert cell
client.describeTables()
Insert cell
client.table(`SELECT SUM(val) FROM dt`)
Insert cell
df = raw.df
Insert cell
raw.data
Insert cell
JSON.parse(JSON.stringify(raw.data))
Insert cell
col = raw.data
Insert cell
UNIQUE_VALS = {
const iters = 10
let then = performance.now()
for (let i=0;i<iters;i++){
const unique = df.rollup({x: op.array_agg_distinct("LTR")})
}
let now = performance.now()
const aqPerf = now - then
then = performance.now()
for (let i=0; i<iters;i++){
const vals = new Set()
for (const d of col){
vals.add(d.LTR)
}
}
now = performance.now()
const vanillaPerf = now - then
return {
aqPerf,
vanillaPerf
}
}
Insert cell
SUM = {
// arquero
const iters = 1_000
let then = performance.now()
for (let i=0;i<iters;i++){
const unique = df.rollup({x: op.sum("VAL")})
}
let now = performance.now()
const aqPerf = now - then

// vanilla
then = performance.now()
for (let i=0; i<iters;i++){
let sum = 0;
for (let i=0;i<col.length;i++){
sum += col[i].VAL
}
}
now = performance.now()
const vanillaPerf = now - then

// duckdb
then = performance.now()
for (let i=0; i<iters;i++){
const sum = await client.table(`SELECT SUM(val) FROM dt`)
}
now = performance.now()
const duckPerf = now - then
return {
aqPerf,
duckPerf,
vanillaPerf
}
}
Insert cell
FILTER = {
const iters = 10
let then = performance.now()
for (let i=0;i<iters;i++){
const a = df.filter(f => f.LTR === "a")
}
let now = performance.now()
const aqPerf = now - then
then = performance.now()
for (let i=0; i<iters;i++){
let newData = []
for (let i=0; i<col.length;i++){
if (col[i].LTR === "a") newData.push(col[i])
}
}
now = performance.now()
const vanillaPerf = now - then
return {
aqPerf,
vanillaPerf
}
}
Insert cell
Insert cell
dt.view()
Insert cell
Insert cell
[dt.numCols(), dt.numRows()]
Insert cell
dt.columnNames()
Insert cell
dt.get('y', 2)
Insert cell
dt.array('y')
Insert cell
Insert cell
dt.objects()
Insert cell
Insert cell
[...dt] // or, Array.from(dt)
Insert cell
Insert cell
Insert cell
aq.from([{a: 1, b: 0.2}, {a: 4, b: 0.3}])
.view()
Insert cell
Insert cell
aq.from({a: 1, b: 2, c: 3})
.view()
Insert cell
aq.from(new Map([['d', 4], ['e', 5], ['f', 6]]))
.view()
Insert cell
Insert cell
flights = aq.loadArrow('https://vega.github.io/vega-datasets/data/flights-200k.arrow')
Insert cell
flights.view({ limit: 5 }) // view(5) also works as a shorthand
Insert cell
Insert cell
aq.fromJSON('{"id":[1,2],"date":["2020-06-01","2020-09-01"]}')
.view()
Insert cell
Insert cell
aq.table(JSON.parse('{"id":[1,2],"date":["2020-06-01","2020-09-01"]}'))
.view()
Insert cell
Insert cell
Insert cell
viewof beers = aq // viewof shows the table view, but assigns the table value
.fromCSV(await FileAttachment('beer.csv').text())
.view({ height: 240 }) // set maximum height of table viewer in pixels
Insert cell
Insert cell
Insert cell
Insert cell
beers
.orderby('abv')
.view(5) // shorthand for view({ limit: 5 })
Insert cell
Insert cell
beers
.orderby(aq.desc('abv'))
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select('name', 'style', 'ibu') // ['name', 'style', 'ibu'] also works
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select(0, 1, 4) // [0, 1, 4] also works
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select(aq.not('brewery_id')) // 'not' can accept multiple columns, too...
.view(5)
Insert cell
Insert cell
Insert cell
beers
.filter(d => op.includes(op.lower(d.name), 'hop'))
.view()
Insert cell
Insert cell
Insert cell
Insert cell
beers
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.orderby(aq.desc('intensity'))
.view(10)
Insert cell
Insert cell
beers
.rollup({
mean_abv: d => op.mean(d.abv), // op.average is available as a synonym
mean_ibu: d => op.mean(d.ibu),
mean_intensity: d => op.mean(3 * d.abv + op.log10(d.ibu) / 3),
corr_abv_ibu: d => op.corr(d.abv, d.ibu),
count: d => op.count()
})
.view()
Insert cell
Insert cell
beers
.groupby('style')
.rollup({
mean_abv: d => op.mean(d.abv),
mean_ibu: d => op.mean(d.ibu),
mean_intensity: d => op.mean(3 * d.abv + op.log10(d.ibu) / 3),
count: op.count()
})
.filter(d => d.count > 20) // let's not trust results with little support...
.orderby(aq.desc('mean_intensity'))
.view(10)
Insert cell
Insert cell
Insert cell
beers
.groupby('style')
.count()
.view(10)
Insert cell
Insert cell
// group by lower-case values, rename to 'type'
beers.groupby({ type: d => op.lower(d.style) })
Insert cell
Insert cell
Insert cell
beers
.groupby('style')
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.derive({ zscore: ({ intensity: x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.orderby(aq.desc('zscore'))
.select(aq.not('brewery_id', 'intensity'))
.view()
Insert cell
Insert cell
beers
.groupby('style')
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.derive({ zscore: ({ intensity: x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.orderby(aq.desc('zscore'))
.filter(d => op.rank() === 1 && op.count() > 20) // filter by rank and count
.select(aq.not('brewery_id', 'intensity'))
.view({ height: 400 }) // set max height to show full table without scroll
Insert cell
Insert cell
Insert cell
viewof breweries = aq
.fromCSV(await FileAttachment('breweries.csv').text())
.view(10)
Insert cell
Insert cell
beers
.join(breweries)
.view(5)
Insert cell
Insert cell
beers
.join(breweries, ['brewery_id', 'brewery_id']) // <- just 'brewery_id' also works!
.view(5)
Insert cell
Insert cell
beers
.join(breweries, 'brewery_id')
.groupby('style')
.rollup({ breweries: op.distinct('brewery_id') } )
.orderby(aq.desc('breweries'))
.view(10)
Insert cell
Insert cell
Insert cell
beers
.join(breweries,
(a, b) => op.equal(a.brewery_id, b.brewery_id),
[aq.all(), aq.not('brewery_id')]
)
.view(5)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
breweries
.semijoin(beers) // implicit comparison on 'brewery_id'
.count()
.view()
Insert cell
Insert cell
breweries
.antijoin(beers) // implicit comparison on 'brewery_id'
.count()
.view()
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more