Published
Edited
Aug 18, 2022
Importers
2 stars
Insert cell
# Pyxis: Analytic Knowledge
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
import { aq, op } from "@uwdata/arquero";
Insert cell
desc = aq.desc;
Insert cell
Insert cell
Insert cell
Insert cell
baltimoreCrimeJson = (await fetch("https://raw.githubusercontent.com/leibatt/pyxis/main/datasets/BPD_Part_1_Victim_Based_Crime_Data2.json")).json();
Insert cell
baltimoreCrime = pyxis.jsonObjectToDataset(baltimoreCrimeJson,"baltimoreCrime");
Insert cell
baltimoreCrime.records[190000];
Insert cell
Insert cell
Insert cell
Insert cell
peakCrimesTransformation = {
let t = {
sources: [baltimoreCrime],
ops: ["groupby","rollup","orderby","filter"], // list all verbs for our records
transforms: [
{
op: "groupby",
args: ["CrimeDate"]
},
{
op: "rollup",
args: [{
count: op.count()
}]
},
{
op: "orderby",
args: [desc("count")]
},
{
op: "filter",
args: [() => op.rank() <= 2]
}
]
};
return t;
};
Insert cell
Insert cell
peakCrimesResults = pyxis.transformation.arquero.executeDataTransformation(peakCrimesTransformation);
Insert cell
Insert cell
peakCrimesNode = new pyxis.AnalyticKnowledgeNode(
"peakCrimes", // node name
Date.now(), // timestamp
peakCrimesTransformation, // transformation
null, // relationshipModel
() => pyxis.transformation.arquero.executeDataTransformation(peakCrimesTransformation) // results
);
Insert cell
Insert cell
peakCrimesNode.name;
Insert cell
The second parameter is a timestamp representing when the knowledge was recorded.
Insert cell
peakCrimesNode.timestamp;
Insert cell
The third parameter is the Pyxis `DataTransformation` object associated with the knowledge node. This parameter is arguably the most important one, since it captures the analysis findings we want to record.
Insert cell
peakCrimesNode.transformation;
Insert cell
Insert cell
peakCrimesNode.relationshipModel;
Insert cell
Insert cell
peakCrimesNode.results();
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
crimeLocationModel = new pyxis.DecisionTreeClassificationRelationshipModel(
"baltimoreCrimes", // give the dataset a name, we can just call it baltimoreCrimes
[ // input attributes, the attributes used to predict a certain outcome
{
name: "Inside/Outside", // Attribute name from the baltimoreCrimes dataset
attributeType: pyxis.AttributeType.nominal // the type of attribute (quantitative, ordinal, or nominal)
},
{
name: "Premise",
attributeType: pyxis.AttributeType.nominal
}
],
{ // output attribute, the outcome to be predicted
name: "Description",
attributeType: pyxis.AttributeType.nominal
}
);

Insert cell
Insert cell
Insert cell
crimeLocationModel.train(baltimoreCrime);
Insert cell
Insert cell
crimeLocationNode = new pyxis.AnalyticKnowledgeNode(
"crimeLocation", // node name
Date.now(), // timestamp
null, // transformation
crimeLocationModel, // relationshipModel
() => baltimoreCrime // results
);
Insert cell
Insert cell
crimeLocationNode.relationshipModel;
Insert cell
Insert cell
crimeLocationNode.results();
Insert cell
Insert cell
Insert cell
crimeDistributionTransformation = {
let t = {
sources: [baltimoreCrime],
ops: ["filter","groupby","rollup"], // list all verbs for our records
transforms: [
{
op: "filter",
args: [(d) => d["CrimeDate"] === "04/27/2015"]
// op.year(d["CrimeDate"]) === 2015 && op.month(d["CrimeDate"]) === 3 && op.date(d["CrimeDate"]) === 27
},
{
op: "groupby",
args: ["Description"]
},
{
op: "rollup",
args: [{
count: op.count()
}]
},
{
op: "orderby",
args: [desc("count")]
}
]
};
return t;
};
Insert cell
Insert cell
crimeDistributionNode = new pyxis.AnalyticKnowledgeNode(
"crimeDistribution", // name
Date.now(), // timestamp
crimeDistributionTransformation, // transformation
null, // relationshipModel
() => pyxis.transformation.arquero.executeDataTransformation(crimeDistributionTransformation), // results
"Distribution of crimes observed for the April 27, 2015 peak. Burglaries are the most frequent crime type on this day (210 crimes)." // description
);

Insert cell
crimeDistributionNode.results();
Insert cell
Insert cell
Insert cell
crimeDistributionNode.addSource(peakCrimesNode);
Insert cell
the `addSource` function adds the specified node to the list of sources for the target node and the list of nodes related to this node:
Insert cell
crimeDistributionNode.source;
Insert cell
crimeDistributionNode.related;
Insert cell
Insert cell
peakCrimesNode.target;
Insert cell
peakCrimesNode.related;
Insert cell
Insert cell
Insert cell
Insert cell
crimeLocationNode.addRelated(crimeDistributionNode);
Insert cell
Insert cell
crimeLocationNode.related;
Insert cell
Insert cell
crimeDistributionNode.related;
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more