Published
Edited
Aug 16, 2022
1 star
Insert cell
Insert cell
Insert cell
Insert cell
import { aq, op } from "@uwdata/arquero";
Insert cell
desc = aq.desc;
Insert cell
vega = require('vega@5.22.1/build/vega.min.js');
Insert cell
import { pyxis } from '@leibatt/pyxis';
Insert cell
Insert cell
Insert cell
beers = pyxis.jsonObjectToDataset(await FileAttachment("beers@1.json").json(),"beers");
Insert cell
breweries = pyxis.jsonObjectToDataset(await FileAttachment("breweries.json").json(), "breweries");
Insert cell
penguinsJson = (await fetch("https://raw.githubusercontent.com/vega/vega-datasets/next/data/penguins.json")).json();
Insert cell
penguins = pyxis.jsonObjectToDataset(penguinsJson, "penguins");
Insert cell
Insert cell
Insert cell
filterTransformationArquero = {
let t = {
sources: [beers], // sources lists all Dataset objects involved in the transformation
ops: ["filter"], // ops lists all Arquero verbs that will be used to process the sources
transforms: [ // transforms is the list of Arquero Verbs to execute, in order
{
op: "filter", // the specific verb to execute, which must be the actual function name in Arquero
args: [(d) => op.includes(op.lower(d.name), 'hop',0)] // the exact parameters to pass to the Arquero verb
}
]
};
return t;
};
Insert cell
Insert cell
hoppyBeerNames = pyxis.transformation.arquero.executeDataTransformation(filterTransformationArquero);
Insert cell
Insert cell
aggregationTransformationArquero = {
let t = {
sources: [beers],
ops: ["groupby","rollup","filter", "orderby"],
transforms: [
{ // groupby verb, grouping by beer style
op: "groupby",
args: ["style"]
},
{ // rollup verb, calculating aggregate statistics per beer style
op: "rollup",
args: [{
mean_abv: (d) => op.mean(d.abv),
mean_ibu: (d) => op.mean(d.ibu),
mean_intensity: (d) => op.mean(3 * d.abv + op.log10(d.ibu) / 3),
count: op.count()
}]
},
{
op: "filter", // filter verb, removing beer styles with less than 20 beers
args: [(d) => d.count > 20]
},
{
op: "orderby", // orderby verb, sorting in descending order by the mean_intensity aggregate statistic
args: [desc("mean_intensity")]
}
]
};
return t;
};
Insert cell
Insert cell
beersGroupRollup = pyxis.transformation.arquero.executeDataTransformation(aggregationTransformationArquero);
Insert cell
Insert cell
joinTransformationArquero = {
let t = {
sources: [beers,breweries], // list all sources for our records. The first source (beers) is assumed to be the lefthand source for the join
ops: ["join"], // list all verbs for our records
transforms: [
{
op: "join", // specify the join verb in Arquero
args: [['brewery_id', 'brewery_id']], // specify the attributes to join by for the lefthand (beers) and righthand (breweries) data sources
toJoin: breweries // specify the righthand data source to join with (this is a BaseDataset object)
}
]
};
return t;
};
Insert cell
Insert cell
beersByBrewery = pyxis.transformation.arquero.executeDataTransformation(joinTransformationArquero);
Insert cell
Insert cell
Insert cell
Insert cell
filterTransformationVega = {
let t = {
sources: [beers], // sources lists all Dataset objects involved in the transformation
ops: ["filter"], // ops lists all Vega transform operations that will be used to process the sources
transforms: [ // transforms is the list of Vega Transform operations to execute, in order
{
"type": "filter", // the type of transform
"expr": "indexof(lower(datum.name),'hop') >= 0" // a Vega expression to execute to evaluate the filter
}
]
};
return t;
};
Insert cell
Insert cell
hoppyBeerNamesVega = pyxis.transformation.vega.executeDataTransformation(filterTransformationVega);
Insert cell
Insert cell
// In this example, we show how to chain Vega transform operations together to
// perform more complex transformations.
aggregationTransformationVega = {
let t = {
sources: [beers],
ops: ["formula","aggregate","filter", "collect"],
transforms: [
{ // calculate beer intensity
"type": "formula",
"as": "intensity",
"expr": "3 * datum.abv + log(datum.ibu) / 3" // NOTE: should be log10 but not supported in Vega
},
{ // calculate aggregate statistics per beer style
"type": "aggregate",
"groupby": ["style"],
"fields": ["abv","ibu","intensity","abv"],
"ops": ["mean","mean","mean","count"],
"as": ["mean_abv","mean_ibu","mean_intensity","count"]
},
{
"type": "filter", // filter out beer styles with less than 20 beers
"expr": "datum.count > 20"
},
{
"type": "collect", // sort in descending order by the mean_intensity aggregate statistic
"sort": {
"field": ["mean_intensity"],
"order": ["descending"]
}
}
]
};
return t;
};

Insert cell
Insert cell
beersGroupRollupVega = pyxis.transformation.vega.executeDataTransformation(aggregationTransformationVega);
Insert cell
Insert cell
joinTransformationVega = {
let t = {
sources: [beers,breweries], // list all sources for our records. The first source (beers) is assumed to be the primary (i.e., lefthand) source for the lookup
ops: ["lookup"], // list all transform operators for our records
transforms: [
{
"type": "lookup", // specify the lookup transform operator in Vega
"from": "breweries", // specify the secondary stream
"key": "brewery_id", // key to join on in the secondary stream (breweries)
"fields": ["brewery_id"], // key to join on in the primary stream (beers)
"values": ["brewery-name","city","state"] // what attributes to include from the secondary stream (breweries)
}
]
};
return t;
};
Insert cell
beersByBreweryVega = pyxis.transformation.vega.executeDataTransformation(joinTransformationVega);
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more