Public
Edited
Mar 14, 2023
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
short_shas = shas.trim().split("\n").filter(s => s !== '').map(commit => commit.slice(0, 7))
Insert cell
sha_to_url = (sha) => `https://snakebench-public.s3.us-east-2.amazonaws.com/results-${sha}.json`;
// sha_to_url = (sha) => `http://localhost:8080/results-${sha}.json`;
Insert cell
urls = short_shas.map(sha_to_url)
Insert cell
results = await Promise.all(
urls.map(async (url, index) => {
let resp = await fetch(url);
let text = await resp.text();
// Parse newline-delimited JSON
return text.trim().split("\n").map(line => {
// Add `_order` to sort by order commits are listed
let datum = JSON.parse(line);
datum._order = index;
return datum
})
})
)
Insert cell
Insert cell
Insert cell
Insert cell
filterExpr = {
const filters = filter.trim().split("\n").filter(s => s !== '')
// Generating the arquero expression string feels a bit hacky.
// Is there a pure ops way to do this?
let expr = filters.map(f => `includes(d.name, "${f}")`).join(' || ');
if (filterExclude && filters.length) {
expr = '!(' + expr + ')';
}
return expr
}
Insert cell
fullData = {
// TODO split function into multiple variables, so whole thing doesn't rerun when you change any input
let data = aq.from(results.flat(1))

// Apply test name filters
if (filterExpr !== "") {
data = data.filter(filterExpr);
}
// Add cluster costs
if (data.columnIndex('worker_vm_type') !== -1) {
aq.addFunction('bestCluster', bestCluster, {override: true})
data = data
.lookup(instanceCosts, ['worker_vm_type', 'name'], 'hourlyPrice')
.derive({
workers_cost: d => d.hourlyPrice * d.n_workers / (3600 / d.duration)
})
.select(aq.not('hourlyPrice'))
.derive({
best_cluster: d => op.bestCluster({
minClusterMem: (d.peak_memory * 1.2) / 1024 ** 3,
minClusterCpus: d.n_threads,
minInstanceMem: 2,
minInstanceCpus: 2,
})
})
.derive({
ideal_worker_vm: d => d.best_cluster.name,
ideal_n_workers: d => d.best_cluster.count,
ideal_cluster_memory: d => d.best_cluster.totalMemoryGib * 1024 ** 3,
ideal_n_threads: d => d.best_cluster.totalVcpus,
ideal_workers_cost: d => d.best_cluster.totalHourlyPrice / (3600 / d.duration),
})
.select(aq.not('best_cluster'))
}

if (data.columnIndex('total_transfer_per_worker') !== -1) {
aq.addFunction('arraySum', arr => Array.isArray(arr) ? arr.reduce((a, b) => a + b, 0) : undefined, {override: true})
data = data
.derive({
'bytes_transferred': d => op.arraySum(d.total_transfer_per_worker)
})
}
data = data
.params({measure: measure})
.derive({
'value': (d, $) => d[$.measure]
})
.derive({
branch: d => op.replace(d.branch, 'bench/', ''),
passed: d => d.setup_outcome === 'passed' && d.call_outcome === 'passed'
})
.derive({
group: (
groupby === "commit"
? d => d.branch + '@' + d.commit
: d => d.branch
)
}, {
after: 'name'
})

if (show !== "all") {
data = data
.filter(show === 'passed' ? d => d.passed : d => !d.passed)
}

if (everygroup) {
// Restrict to tests that ran in every group
const ngroups = data.rollup({ngroups: d => op.distinct(d.group)}).get('ngroups')
data = data
.params({ngroups: ngroups})
.groupby('name')
.filter(d => op.distinct(d.group) == ngroups)
}
return data
}
Insert cell
// remove memory sampler data in most cases
data = fullData.select(aq.not('memory_samples', 'memory_times'))
Insert cell
data.view()
Insert cell
Insert cell
Insert cell
Insert cell
group_order.view()
Insert cell
Insert cell
Insert cell
bootstrapped = data
.params({sample_size: sample_size})
.groupby(['name', 'group'])
.sample(aq.frac(sample_size), {replace: true})
.derive({ id: (d, $) => op.row_number() % $.sample_size })
.groupby(['name', 'group', 'id'])
.rollup({ bootstrap_run: op.mean('value') })
Insert cell
bootstrapped.view()
Insert cell
relative_diff = bootstrapped
.groupby(['name', 'id'])
// pull out just baseline group into separate column
.pivot('group', 'bootstrap_run')
.relocate({[baseline_group]: 'baseline'}, {after: 'id'})
// turn other groups back into separate rows
.fold(aq.not('name', 'id', 'baseline'), {as: ['group', 'value']})
// calculate difference from baseline
.derive({diff: d => d.value / d.baseline - 1})
.filter(d => op.is_finite(d.diff))
.lookup(group_order, 'group', '_order')
Insert cell
relative_diff.view()
Insert cell
Insert cell
zbaseline = baseline_data
.groupby('name')
.rollup({
base_mean: op.mean('value'),
base_stdev: op.stdev('value'),
})
Insert cell
zdata = data
.lookup(zbaseline, 'name', 'base_mean', 'base_stdev')
.groupby('name')
.derive(
{
zvalue: d => (d.value - d.base_mean) / d.base_stdev || 0,
diff: d => (d.value / d.base_mean) - 1
},
{after: 'value'}
)
.lookup(group_order, 'group', '_order')
Insert cell
zdata.view()
Insert cell
zdata
// .ungroup()
.select('diff')
.rollup({
diff: d => op.mean(d.diff)
})
.view()
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more