Public
Edited
Jul 14, 2023
Insert cell
Insert cell
import {vl} from '@vega/vega-lite-api-v5'
Insert cell
Insert cell
Insert cell
Insert cell
aq.from(exp1).groupby('interface')
.pivot('size',{total: d => op.average(d.duration)})
.view()
Insert cell
aq.from(exp1).groupby('dataset','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('interface', 'dataset')
.view()
Insert cell
Insert cell
aq.from(exp1).groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('interface','dataset')
.view()
Insert cell
Insert cell
vl.markBoxplot({clip: true}).data(exp1.filter(d => d.interface === "duck" || d.interface === "monetdb"))
.width(1000)
.title("Average Query Duration Across All Dashboards")
.encode(
vl.row().fieldN('dataset'),
vl.column().fieldN('size').sort(["100k","1M","10M"]),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({domain: [0,200]})
).render()
Insert cell
vl.markBoxplot({clip: true}).data(exp1.filter(d => d.interface === "duck" || d.interface === "monetdb"))
.width(1000)
.title("Average Query Duration Across All Dashboards")
.encode(
vl.row().fieldN('size').sort(["100k","1M","10M"]),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({domain: [0,400]})
).render()
Insert cell
Insert cell
Insert cell
aq.from(exp1.filter(d => d.interface === "duck")).groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('workflow','interface', 'dataset')
.view()
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.interface === "duck" && d.size === "10M" && d.workflow === "shneiderman"))
.width(400)
.encode(
vl.y().fieldN('dataset').title("Dashboard"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 350]}).title("Query Duration (ms)"),
vl.color().fieldN('dataset')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.interface === "monetdb" && d.size === "10M"))
.width(1000)
.encode(
vl.y().fieldN('dataset'),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 300]}),
vl.color().fieldN('dataset')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.interface === "postgres" && d.size === "10M"))
.width(1000)
.encode(
vl.y().fieldN('dataset'),
vl.x().mean('duration').scale({zero: false}).scale({domain: [300, 3000]}),
vl.color().fieldN('dataset')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.interface === "sqlite" && d.size === "10M"))
.width(1000)
.encode(
vl.y().fieldN('dataset'),
vl.x().mean('duration').scale({zero: false}).scale({domain: [300, 5000]}),
vl.color().fieldN('dataset')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.interface === "duck" && d.size === "10M"))
.width(350)
.encode(
vl.row().fieldN('dataset').title("Dashboard"),
vl.y().fieldN('workflow').title("Workflow"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 200]}).title("Query Duration (ms)"),
vl.color().fieldN('workflow').legend(null)
).render()
Insert cell
Insert cell
myRideMixed50 = d3.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRvDi2_jPtaCWw_nZg80OTEAcMJpAR1dMEHGRdqSJvy7eLrrGSjA7mRG1jOxVD8Yevu1oR273__HhAL/pub?gid=67040716&single=true&output=csv")
Insert cell
manifest = d3.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vS3D9D1spgQc7pPap0vGyofDJpAkl0spmpE1kCvPRb1vGApRQ_9LBXmjdpO_2-g2FLEzkC_c8n27dol/pub?gid=581401638&single=true&output=csv")
Insert cell
myRideMixed50
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
vl.markBoxplot().data(myRideMixed50)
.encode(
vl.row().fieldN('size'),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({zero: false})
).render()
Insert cell
Insert cell
vl.markBoxplot().data(myRideMixed50)
.transform([
vl.filter("datum.interface !== 'sqlite'")
])
.encode(
vl.row().fieldN('size'),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({zero: false})
).render()
Insert cell
Insert cell
vl.markBoxplot().data(myRideMixed50)
.transform([
vl.filter("datum.interface === 'duck' || datum.interface === 'monetdb'")
])
.encode(
vl.row().fieldN('size'),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({zero: false})
).render()
Insert cell
Insert cell
Insert cell
Insert cell
myRideMixed50_all = vl.vconcat(myRideMixed50_100k,myRideMixed50_1M,myRideMixed50_10M)
.title("Query Response Time Distributions -- My Ride Dashboard (Mixed 50 Workload)")
.render()
Insert cell
Insert cell
Insert cell
Insert cell
example = {
var e = {"a": null, "b": null, "c": null};
e.a = 1;
e.b = 2;
e.c = 3;
return e;
}
Insert cell
import {aq, op} from '@uwdata/arquero'
Insert cell
exp1 = aq.loadCSV("https://docs.google.com/spreadsheets/d/e/2PACX-1vSHdYgQM0_ED1K2nUmeYNJEP0oWlyNlo2o2ZgaDwCRZVsg-DxDZa6KpCP9XpO4x4cAcXivXZv8e6qqM/pub?gid=1449390137&single=true&output=csv")
Insert cell
tab= Inputs.table(myRideMixed50,{
columns: [
"interface",
"size",
"dataset",
"workflow",
"duration"
],
header: {
interface: "DBMS",
size: "size",
dataset: "dashboard",
workflow: "workflow",
duration: "average duration"
}
})
Insert cell
Insert cell
// For each dashboard, how many queries are associated with each interaction? Median and average
aq.from(exp1).filter(d => d.interface === "duck" && d.size === "1M").groupby('dataset','interaction_id', 'workflow').rollup({queries: op.count()}).groupby('dataset', 'workflow').rollup({avg_queries: d => op.average(d.queries), med_queries: d => op.median(d.queries)}).orderby('dataset', 'workflow').view()
Insert cell
// performance numbers for DuckDB only
aq.from(exp1).filter(d => d.interface === "duck").groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('dataset')
.view()
Insert cell
vl.markBoxplot({clip: true}).data(exp1)
.title("Average Query Duration Across All Dashboards")
.encode(
vl.row().fieldN('size').sort(["100k","1M","10M"]),
vl.column().fieldN('workflow'),
vl.y().fieldN('interface'),
vl.x().mean('duration').scale({domain: [0,15000]})
).render()
Insert cell
Insert cell
aq.from(exp1).groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('dataset')
.view()
Insert cell
aq.from(exp1).view({ limit: 5 })
Insert cell
Insert cell
exp2b = aq.loadCSV("https://docs.google.com/spreadsheets/d/e/2PACX-1vRDz-IubupT3q6A8cXAolxHgb3IyTrsAePEmXfFVPlEKDWg2jpI2pMA04qbh5ZHMQtGhCT6yRceEc2W/pub?gid=843033891&single=true&output=csv")
Insert cell
aq.from(exp2b).groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('dataset')
.view()
Insert cell
Insert cell
aq.from(exp2b).groupby('dataset','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('dataset')
.view()
Insert cell
exp2 = aq.loadCSV("https://docs.google.com/spreadsheets/d/e/2PACX-1vSHdYgQM0_ED1K2nUmeYNJEP0oWlyNlo2o2ZgaDwCRZVsg-DxDZa6KpCP9XpO4x4cAcXivXZv8e6qqM/pub?gid=815158640&single=true&output=csv")
Insert cell
aq.from(exp2).groupby('dataset','workflow','interface')
.pivot('size',{total: d => op.average(d.duration)}).orderby('dataset')
.view()
Insert cell
exp2b_breakout = aq.loadCSV("https://docs.google.com/spreadsheets/d/e/2PACX-1vSHdYgQM0_ED1K2nUmeYNJEP0oWlyNlo2o2ZgaDwCRZVsg-DxDZa6KpCP9XpO4x4cAcXivXZv8e6qqM/pub?gid=1352742567&single=true&output=csv")
Insert cell
vl.markBoxplot({clip:true}).data(exp2b_breakout.filter(d => (d.interface === "monetdb" || d.interface === "duck" || d.interface === "shneiderman")))
.width(1000).encode(
vl.row().fieldN('interface'),
vl.y().fieldN('workflow'),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 50]}),
vl.color().fieldN('workflow')
).render()
Insert cell
exp1_edit = exp1.derive({ exp: d => 'SIMBA' })
Insert cell
exp2_edit = exp2.derive({ exp: d => 'IDEBench' })

Insert cell
cross_exp = exp1_edit.concat(exp2_edit)
Insert cell
vl.markBoxplot({clip:true}).data(cross_exp.filter(d => d.interface === "duck" && d.size === "10M"))
.width(900)
.encode(
vl.row('exp').title("System"),
vl.y().fieldN('dataset').title("Dashboard"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 2000]}).title("Query Duration (ms)"),
vl.color().fieldN('dataset')
).render()
Insert cell
aq.from(cross_exp.filter(d => d.size === "10M" && d.interface === "duck")).groupby(['exp','interface','dataset'])
.pivot('exp',{total: d => op.max(d.duration)})
.view()
Insert cell
vl.markBoxplot({clip:true}).data(cross_exp.filter(d => d.interface === "duck" && d.size === "10M"))
.width(900)
.encode(
vl.row('exp').title("System"),
vl.y().fieldN('dataset').title("Dashboard"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 500]}).title("Query Duration (ms)"),
vl.color().fieldN('dataset')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(cross_exp.filter(d => d.size === "10M"))
.width(900)
.encode(
vl.row('exp').title("System"),
vl.y().fieldN('interface').title("Interface"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 5000]}).title("Query Duration (ms)"),
vl.color().fieldN('interface')
).render()
Insert cell
vl.markBoxplot({clip:true}).data(exp2.filter(d => d.interface === "duck" && d.size === "10M"))
.width(400)
.encode(
vl.y().fieldN('dataset').title("Dashboard"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 350]}).title("Query Duration (ms)"),
vl.color().fieldN('dataset')
).render()
Insert cell
aq.from(cross_exp.filter(d => d.size === "10M" && d.interface === "duck")).groupby(['viz_name','workflow'])
.pivot('exp',{total: d => op.max(d.duration)})
.view()
Insert cell
Insert cell
vl.markBoxplot({clip:true}).data(exp1.filter(d => d.size === "100k"))
.width(900)
.encode(
vl.row('interface').title("System"),
vl.y().fieldN('dataset').title("Dashboard"),
vl.x().mean('duration').scale({zero: false}).scale({domain: [0, 2000]}).title("Query Duration (ms)"),
vl.color().fieldN('dataset')
).render()
Insert cell
aq.from(exp1.filter(d => d.size === "100k" && (d.interface === "monetdb" || d.interface === "duck") && d.dataset ==="ITMonitor")).groupby(['workflow','viz_name'])
.pivot('interface',{total: d => op.average(d.duration)})
.view()
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more