Public
Edited
Dec 21, 2022
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
dfd = require('danfojs@0.0.15/dist/index.min.js').catch(() => window.dfd)
Insert cell
Insert cell
import {db, duckdb, DuckDBClient} from '@cmudig/duckdb'
Insert cell
raw = {
const letters = "abcdefghijklmnopqrstuvwxyz"
const len = 10_000
let data = []
let aqTable = {
IDX:[],
LTR:[],
VAL: []
}
let sqlVals = []
for (let i=0; i<len;i++){
const rand = Math.random()
const letter = letters[Math.floor(letters.length * rand)]
data.push({
index: i,
letter: letter,
value: rand
})
aqTable["IDX"].push(i)
aqTable["LTR"].push(letter)
aqTable["VAL"].push(rand)
sqlVals.push(`(${i}, '${letter}', ${rand})`)
}
return {
data,
sqlVals,
df: aq.table(aqTable)
}
}
Insert cell
client= {
const duck = new DuckDBClient();
await duck.query(`CREATE TABLE dt(IDX INTEGER, LTR STRING, val FLOAT)`);
await duck.query(
`INSERT INTO dt VALUES ${raw.sqlVals.join(', ')};`
);
return duck
}
Insert cell
client.describeTables()
Insert cell
client.table(`SELECT SUM(val) FROM dt`)
Insert cell
df = raw.df
Insert cell
raw.data
Insert cell
JSON.parse(JSON.stringify(raw.data))
Insert cell
col = raw.data
Insert cell
UNIQUE_VALS = {
const iters = 10
let then = performance.now()
for (let i=0;i<iters;i++){
const unique = df.rollup({x: op.array_agg_distinct("LTR")})
}
let now = performance.now()
const aqPerf = now - then
then = performance.now()
for (let i=0; i<iters;i++){
const vals = new Set()
for (const d of col){
vals.add(d.LTR)
}
}
now = performance.now()
const vanillaPerf = now - then
return {
aqPerf,
vanillaPerf
}
}
Insert cell
SUM = {
// arquero
const iters = 1_000
let then = performance.now()
for (let i=0;i<iters;i++){
const unique = df.rollup({x: op.sum("VAL")})
}
let now = performance.now()
const aqPerf = now - then

// vanilla
then = performance.now()
for (let i=0; i<iters;i++){
let sum = 0;
for (let i=0;i<col.length;i++){
sum += col[i].VAL
}
}
now = performance.now()
const vanillaPerf = now - then

// duckdb
then = performance.now()
for (let i=0; i<iters;i++){
const sum = await client.table(`SELECT SUM(val) FROM dt`)
}
now = performance.now()
const duckPerf = now - then
return {
aqPerf,
duckPerf,
vanillaPerf
}
}
Insert cell
FILTER = {
const iters = 10
let then = performance.now()
for (let i=0;i<iters;i++){
const a = df.filter(f => f.LTR === "a")
}
let now = performance.now()
const aqPerf = now - then
then = performance.now()
for (let i=0; i<iters;i++){
let newData = []
for (let i=0; i<col.length;i++){
if (col[i].LTR === "a") newData.push(col[i])
}
}
now = performance.now()
const vanillaPerf = now - then
return {
aqPerf,
vanillaPerf
}
}
Insert cell
Insert cell
dt.view()
Insert cell
Insert cell
[dt.numCols(), dt.numRows()]
Insert cell
dt.columnNames()
Insert cell
dt.get('y', 2)
Insert cell
dt.array('y')
Insert cell
Insert cell
dt.objects()
Insert cell
Insert cell
[...dt] // or, Array.from(dt)
Insert cell
Insert cell
Insert cell
aq.from([{a: 1, b: 0.2}, {a: 4, b: 0.3}])
.view()
Insert cell
Insert cell
aq.from({a: 1, b: 2, c: 3})
.view()
Insert cell
aq.from(new Map([['d', 4], ['e', 5], ['f', 6]]))
.view()
Insert cell
Insert cell
flights = aq.loadArrow('https://vega.github.io/vega-datasets/data/flights-200k.arrow')
Insert cell
flights.view({ limit: 5 }) // view(5) also works as a shorthand
Insert cell
Insert cell
aq.fromJSON('{"id":[1,2],"date":["2020-06-01","2020-09-01"]}')
.view()
Insert cell
Insert cell
aq.table(JSON.parse('{"id":[1,2],"date":["2020-06-01","2020-09-01"]}'))
.view()
Insert cell
Insert cell
Insert cell
viewof beers = aq // viewof shows the table view, but assigns the table value
.fromCSV(await FileAttachment('beer.csv').text())
.view({ height: 240 }) // set maximum height of table viewer in pixels
Insert cell
Insert cell
Insert cell
Insert cell
beers
.orderby('abv')
.view(5) // shorthand for view({ limit: 5 })
Insert cell
Insert cell
beers
.orderby(aq.desc('abv'))
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select('name', 'style', 'ibu') // ['name', 'style', 'ibu'] also works
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select(0, 1, 4) // [0, 1, 4] also works
.view(5)
Insert cell
Insert cell
beers
.orderby(aq.desc('ibu'))
.select(aq.not('brewery_id')) // 'not' can accept multiple columns, too...
.view(5)
Insert cell
Insert cell
Insert cell
beers
.filter(d => op.includes(op.lower(d.name), 'hop'))
.view()
Insert cell
Insert cell
Insert cell
Insert cell
beers
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.orderby(aq.desc('intensity'))
.view(10)
Insert cell
Insert cell
beers
.rollup({
mean_abv: d => op.mean(d.abv), // op.average is available as a synonym
mean_ibu: d => op.mean(d.ibu),
mean_intensity: d => op.mean(3 * d.abv + op.log10(d.ibu) / 3),
corr_abv_ibu: d => op.corr(d.abv, d.ibu),
count: d => op.count()
})
.view()
Insert cell
Insert cell
beers
.groupby('style')
.rollup({
mean_abv: d => op.mean(d.abv),
mean_ibu: d => op.mean(d.ibu),
mean_intensity: d => op.mean(3 * d.abv + op.log10(d.ibu) / 3),
count: op.count()
})
.filter(d => d.count > 20) // let's not trust results with little support...
.orderby(aq.desc('mean_intensity'))
.view(10)
Insert cell
Insert cell
Insert cell
beers
.groupby('style')
.count()
.view(10)
Insert cell
Insert cell
// group by lower-case values, rename to 'type'
beers.groupby({ type: d => op.lower(d.style) })
Insert cell
Insert cell
Insert cell
beers
.groupby('style')
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.derive({ zscore: ({ intensity: x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.orderby(aq.desc('zscore'))
.select(aq.not('brewery_id', 'intensity'))
.view()
Insert cell
Insert cell
beers
.groupby('style')
.derive({ intensity: d => 3 * d.abv + op.log10(d.ibu) / 3 })
.derive({ zscore: ({ intensity: x }) => (x - op.mean(x)) / op.stdev(x) || 0 })
.orderby(aq.desc('zscore'))
.filter(d => op.rank() === 1 && op.count() > 20) // filter by rank and count
.select(aq.not('brewery_id', 'intensity'))
.view({ height: 400 }) // set max height to show full table without scroll
Insert cell
Insert cell
Insert cell
viewof breweries = aq
.fromCSV(await FileAttachment('breweries.csv').text())
.view(10)
Insert cell
Insert cell
beers
.join(breweries)
.view(5)
Insert cell
Insert cell
beers
.join(breweries, ['brewery_id', 'brewery_id']) // <- just 'brewery_id' also works!
.view(5)
Insert cell
Insert cell
beers
.join(breweries, 'brewery_id')
.groupby('style')
.rollup({ breweries: op.distinct('brewery_id') } )
.orderby(aq.desc('breweries'))
.view(10)
Insert cell
Insert cell
Insert cell
beers
.join(breweries,
(a, b) => op.equal(a.brewery_id, b.brewery_id),
[aq.all(), aq.not('brewery_id')]
)
.view(5)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
breweries
.semijoin(beers) // implicit comparison on 'brewery_id'
.count()
.view()
Insert cell
Insert cell
breweries
.antijoin(beers) // implicit comparison on 'brewery_id'
.count()
.view()
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more