Published
Edited
Jul 29, 2021
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
data = FileAttachment("diamonds@2.csv").csv()
Insert cell
Insert cell
testData = FileAttachment("diamonds@2.csv").csv({typed: true})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
d3Fetch = require('d3-fetch')
Insert cell
data3 = d3Fetch.json(url)
Insert cell
data4 = d3Fetch.csv(url2)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
newData = data.slice(0,5)
Insert cell
Insert cell
Insert cell
data2 = newData.map(function(d,i){
console.log("test");
return{
"": d[""],
carat: d.carat,
cut: d.cut,
color: d.color,
clarity: d.clarity,
depth: d.depth,
table: d.table,
price: d.price,
x: d.x,
y: d.y,
z: d.z
}
})
Insert cell
Insert cell
Insert cell
Insert cell
copyData = JSON.parse(JSON.stringify(newData))
Insert cell
Insert cell
copyData[1].carat = 0
Insert cell
Insert cell
is_newData_updated(copyData, newData)
Insert cell
Insert cell
printTable(newData)
// Or, can directly do: printTable(data.slice(0, 5))
Insert cell
Insert cell
import {printTable} from '@uwdata/data-utilities'
Insert cell
Insert cell
md`>Number of rows = ${data.length}<br>
Number of columns = ${Object.keys(data[0]).length}`
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
{
for (const i in data2){
data2[i].depth = parseFloat(data2[i].depth).toFixed(2);
}
}
Insert cell
Insert cell
printTable(data2)
Insert cell
Insert cell
printTable(data.slice(0,6), [{field: 'cut', title: 'cut'}, {field: 'color', title: 'color'}, {field: 'clarity', title: 'clarity'}])
Insert cell
Insert cell
printTable(data.filter(d => d.cut === 'Ideal').slice(0,5)) //It means it takes (d) as an argument and returns (d.cut === 'Ideal')
Insert cell
Insert cell
Insert cell
{
for (const i in copyData){
copyData[i].price_per_carat = parseFloat(copyData[i].price/copyData[i].carat).toFixed(2);
}
}
Insert cell
printTable(copyData)
Insert cell
Insert cell
Insert cell
Insert cell
printTable(data.slice(0,6).sort((a,b) => (a.Age-b.Age))) //OR, printTable(data.slice(0, 6).sort(function(a,b) {return a.Age-b.Age}))
Insert cell
Insert cell
Insert cell
keys = Object.keys(data[0])
Insert cell
Insert cell
test = fromColumns(
{
day: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
state: ['reading', 'playing', 'writing', 'interview', 'chilling']
}
)
Insert cell
import {fromColumns} from '@uwdata/data-utilities'
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
import { vl } from "@vega/vega-lite-api"
Insert cell
Insert cell
Insert cell
vegalite = require("vega-embed@6")
Insert cell
Insert cell
Insert cell
Insert cell
vegalite(
{
"data": {"values": data}, //passes on the diamond dataset we previously defined
"mark": {type: "bar", "opacity": 0.6, "strokeWidth": 1.5}, //indicates we are drawing a bar-chart, with overall opacity of bar color as 0.6 and the strokeWidth or marker_line width is 1.5
"height": 300,
"width": 400,
"title": "The number of diamonds for cut quality",
"encoding": {
"x": {"field": "cut", "type": "ordinal",
"axis": { "labelAngle": 0} //makes the labels horizontal
},
"y": {"aggregate": "count", "field": "cut", "type": "quantitative"}, //obtains the count of diamonds at each cut quality
"color": {"value": "rgb(158,158,225)"}, //sets the color of bars
"stroke": {"value": "rgb(8,48,107)"}, //sets the stroke color of bars
},
})
//Note that: The vegalite has many defaults so in the x,y encoding section, code runs even if you don't write "type" of x and y fields but it is a good practice to write it.
Insert cell
Insert cell
vegalite(
{
"data": {"values": data},
"mark": {type: "bar", "opacity": 0.6, "strokeWidth": 1.5},
"height": 300,
"width": 400,
"title": "The number of diamonds for cut quality",
"encoding": {
"x": {"field": "cut", "type": "ordinal", "axis": { "labelAngle": 0},
"sort": "y"}, // sorts in the ascending order of mean values, put ("-y" for descending order)
/*Can also use "sort": {"op": "mean", field:"price", "order": "descending"} */
"y": {"aggregate": "mean", "field": "price", "type": "quantitative"}, //obtains the mean of price of diamonds at each cut quality
"color": {"value": "rgb(158,158,225)"},
"stroke": {"value": "rgb(8,48,107)"},
},
})

Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
vegalite({
"data": { values: data},
"mark": "bar",
"height": 300,
"encoding": {
"column": {"field": "color", "type": "nominal", "spacing": 10, "title": false}, //seperates each color fieds columnwise
"y": {"aggregate": "count", "field": "cut", "title": "Cut of diamonds", type: "quantitative"}, //obtains the count of the cut of diamonds
"x": {"field": "cut", "axis": false, "sort": "-y"}, //Doesn't display any axis elements on x-axis
"color": {"field": "cut",
"scale": {"range": ["rgb(203, 203, 227)","rgb(160, 182, 215)","rgb(114, 162, 202)","rgb(65, 137, 186)","rgb(35, 89, 135)"]},
"legend": {"orient": "right", "title": "Cut Quality"} //Put the legend in the right position
}
}
})
Insert cell
Insert cell
vegalite({
"data": { "values": data },
"facet": { "field": "color", "type": "ordinal", "title": false}, //partitions a plot according to cut quality
"spacing": 10,
"spec": { //specification for facet, describes how each facet should be displayed
"mark": {"type": "bar"},
"height": 300,
"encoding": {
"x": { "field": "cut", "type": "nominal", "axis": false},
"y": {"aggregate": "count", "field": "cut", "title": "Cut of diamonds", "type": "quantitative"},
"color": {"field": "cut",
"scale": {"range": ["#aec7e8","#aec7e8","#1f77b4","#aec7e8","#aec7e8","#aec7e8","#aec7e8"]},
"legend": {"orient": "right", "title": "Cut Quality"} //Put the legend in the right position
}
}
}
})
Insert cell
Insert cell
Insert cell
Insert cell
vegalite({
"data": { "values": data },
"mark": "bar",
"facet": { "field": "clarity", "type": "ordinal"},
"spec": { "width": 80,
"mark": "bar",
"encoding": {
"x": { "field": "cut", "type": "ordinal", "axis": false, "title": false},
"y": {"field": "price", "type": "quantitative", "title": "Price of diamonds"},
"color": { "field": "cut", "title": "cut-quality"},
}
}
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
vegalite({
"data": {"values": flights}, //passes the flight dataset
"mark": "rect", // represents an arbitrary rectangle
"encoding": {
"y": {"field": "month", "type": "ordinal", "sort": null}, //sort: null helps to arrange the data in the original order (no-sort)
"x": {"field": "year", "type": "ordinal"},
"color": { "field": "passengers", "type": "quantitative",
"scale": { "type": "quantitative", "range": importedColor, /*reverse: true*/ }, //analyze what happens when you put "reverse: true"
"legend": {"type": "gradient"} //"gradient" is used for continous data and "symbol" is used for representing discrete ones
}
},
"config": {
"axis": {"grid": true, "tickBand": "extent"} //setting grid true gives a fine white border of small rectangles, set grid to false to find difference and tickBand as extent indicates the ticks and grid lines are at the band extents to indicate intervals. Set tickBand: "center" to observe the difference.
}
})
Insert cell
Insert cell
Insert cell
Insert cell
vegalite(
{
"data": {"values": data},
"mark":
{"type": "bar", "width": 18}, //here width means the width of the bars. Try changing its value to observe
"encoding": {
"x": {"bin": true, "field": "carat", "title": "Carat"}, //When setting bin as true, default binning parameters are applied on carat
"y": {"aggregate": "count", "field": "carat"}, //counts the number of diamonds that falls under each binning group of carat
}
}
)
Insert cell
Insert cell
vegalite(
{
"height": 300,
"width": 400,
"data": {"values": data},
"mark": "bar",
"encoding": {
"x": {"bin": 50, "field": "carat", title: "Carat"}, //can manually change the binning parameter by replacing true by our number
"y": {"aggregate": "count"}
}
}
)
Insert cell
Insert cell
Insert cell
vegalite(
{
"data": {"values": data},
"width": 400,
"transform":[{
"density": "carat", //
"extent": [0, 5], //([min, max]) array indicating the range domain values to display (extent is required in most cases)
"as": ["value", "density"] //[The output fields for the sample value, associated probability] #Can omit this one as it is default
}],
"mark": {"type": "line", "fill": "#aec7e8"}, //or can use ("mark": "area") too. Try to see what happens!
"encoding": {
"x": {"field": "value", "type": "quantitative"},
"y": { "field": "density", "type": "quantitative"},
}
}
)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
vegalite(
{
data: { values: data},
width: 500,
"transform": [
{
"quantile": "carat", //performs quantile estimation on "carat" field
// "quantile": "price", //substituting "carat" by "price" gives the q-q plot of price of diamonds. Try it!
"as": ["prob","value"] //["probability", "quantile value"] contains the output field names. Can ommit this line as this is default value.
},
{
"calculate": "quantileNormal(datum.prob)", //estimates normal quantiles and store it to "norm"
"as": "norm"
}
],
"layer": [ //draws both qq-plot and regression line on the same plot
{
"mark": {type: "circle", size: 80},
"encoding": {
"x": {"field": "norm", "type": "quantitative", "title": "Theoretical Quantiles"},
"y": { "field": "value", "type": "quantitative", "title": "Ordered Values"}
}
},
{
mark: {type: "line", color: "red"},
transform: [
{
"regression" : "value", //takes dependent variable to predict
"on": "norm", //takes independent variable to use a predictor
//No need to write as:[] because the default values is the original field names of the x and y values i.e "norm" and "value"
}
],
"encoding": {
"x": {"field": "norm", "type": "quantitative"},
"y": { "field": "value", "type": "quantitative"}
}
},
]
}
)
Insert cell
Insert cell
Insert cell
xData = data.sort(function(a,b) {return b.x - a.x}).slice(0,100)
Insert cell
yData = data.sort((a,b) => (b.y - a.y)).slice(0,100)
Insert cell
Insert cell
Insert cell
Insert cell
mpgDataset = d3Fetch.csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv')
Insert cell
Insert cell
md`<p class = "class-1"> ISSUE with box plot:<br>
It seems there is a bug in vegalite itself. I have used tooltip so you can also take your mouse to hover on the graph (not only this one but on others below too) which doesn't have a line and you can see that soem weird numbers like maximum number being lower than Q3 or minimum number being greater than Q1. I also found these links where they have discussed about the issues while drawing box plots in vegalite.<br>
https://github.com/vega/vega-lite/issues/4925<br>
https://github.com/vega/vega-lite/pull/6755<br>
Have a look at it?
`
Insert cell
vegalite(
{
"data": {"values": mpgDataset},
"width": 500, //increases the width of the canvas
"mark": {"type": "boxplot", //denotes that we are drawing box plot
"tooltip": true, //enables string to show upon mouse hover, it is similar to --"tooltip": {"content": "encoding"}--

},
//"mark": {type: "boxplot", extent: "min-max"}, //This is done if we don't want to display the outliers as points.
"encoding": {
"x": {"field": "model_year", "type": "ordinal", "axis": {labelAngle: 0}},
"y": {"field": "mpg", "type": "quantitative", "scale": {"zero": false},}, //For quantitative datas, default scaling is from 0, so setting zero false allows us to omit the need of zero-starting
"size": {"value": 20}, //changes the size of the width of the box-plots
"color": {"field": "model_year", "legend": false}
}
})
Insert cell
Insert cell
vl
.markPoint({ type: "boxplot"})
.data(mpgDataset)
.encode(
vl.x().fieldO("model_year"),
vl.y().fieldQ("mpg"),
)
.render()

Insert cell
Insert cell
Insert cell
mpgCopy = JSON.parse(JSON.stringify(mpgDataset))
Insert cell
Insert cell
{
for (const i in mpgCopy){
mpgCopy[i].model_decade = Math.floor(mpgCopy[i].model_year/10)*10;
}
}
Insert cell
Insert cell
printTable(mpgCopy.slice(mpgCopy.length-5, mpgCopy.length))
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
newCopy = JSON.parse(JSON.stringify(data))
Insert cell
{
for (const i in newCopy){
newCopy[i].carat = Math.floor(newCopy[i].carat);
}
}
Insert cell
newCopy
Insert cell
Insert cell
Insert cell
Insert cell
vegalite(
{
"data": {"values": mpgDataset},
"width": 400,
"layer": [
{
"mark": {"type": "area", "tooltip": true},
"transform": [
{
density: "mpg", extent: [0,60],
}
],
"encoding": {
"x": {"field": "value", "type": "quantitative", "axis": {grid: false}, "title": "Violin plot of the mpg" }, //setting grid false helps to remove the unnecessary vertical lines on each x-axis values
"y": {"field": "density", "type": "quantitative",
"stack": "center", //stacks with center baseline (important feature for drawing violin plot)
"axis": null}, //doesn't display any axis components of y-axis
}
},
{
mark: {"type": "rule", "strokeWidth": 1.5}, //draws line segment between q1 and q3
"data": {"values": mpgDataset},
encoding: {
x: {"aggregate": "q1", "field": "mpg"},
x2: {"aggregate": "q3", "field": "mpg"},
},
},
{
mark: {"type": "rule", "strokeWidth": 1.5}, //draws small vertical line segment at median point
encoding: {
x: {"aggregate": "median", field: "mpg"},
y: {"value": 110},
y2: {"value": 90}
}
}],
"config": {"view": {"stroke": null}}, //doesn't display the border of the graph-container
})
Insert cell
Insert cell
md`<p class = "class-1">Issue with ridgeline plots: https://stackoverflow.com/questions/58311926/are-ridgeline-plots-possible-in-vega-lite</p>`
Insert cell
vegalite(
{
data: { values: data },
"mark": {
"type": "line", "fillOpacity": 0.5, "strokeOpacity": 0.8,"strokeWidth":1},
//we can alter the height to keep the desired amount of intersection between graphs
"width": 700,
"height": 40,
"transform": [
{"density": "carat", "groupby": ["color"], "extent": [0, 4]},
//{"filter" : {"field": "color", "oneOf": ['D', 'E', 'F', 'G', 'H', 'I', 'J']}} //Choose any color you want to display
],
encoding: {
"x": {"field": "value", "type": "quantitative", "axis": {"grid": false, "title": "Carat of diamonds"}},
"y": {"field": "density", "type": "quantitative",
"scale": {"range": [40,-80]}, //scale({range: [step, -overlap * step]})
"axis": { "title": false, "domain": false, "labels": false, "ticks": false, "grid": false}, //Can directly do axis: false if you don't want to put title on x-axis, otherwise, it cuts off the title of x-axis. So doing this gives some space
},
"row": { "field": "color", "type": "nominal", "title": "Colors",
"spacing": 0, //reduces the space between curves
"header": {"labelAngle": 0, "labelOrient": "left", "labelAlign":"left", "labelPadding":0},
"axis": false,
"sort": { "field": "color", "op": "max", "order": "ascending"}
},
"fill": { "field": "color", "type": "nominal", "legend": null},
"stroke": { "field": "color"}
},
"bounds": "flush", //(important)use facet cell height for layout, not the full mark bounds
"padding": 0,
"config": {
"view": {
"stroke": "transparent"
}
}
}
)
Insert cell
Insert cell
Insert cell
athlete = FileAttachment("athlete_events.csv").csv({typed: true})
Insert cell
Insert cell
//Filtering the players who didn't get any medals
updatedAthlete = athlete.filter(d => d.Medal != "NA")
Insert cell
newAthelete = updatedAthlete.map(function(d){
if (find(d.country) === false)
return{
NOC: d.NOC,
count: updatedAthlete.filter(c => c.NOC === d.NOC).length
}
return 0
}).filter(d => d!=0)
Insert cell
function find(a){
data2.forEach(function(d) {
if (d.country === a) return true;
});
return false;
}
Insert cell
printTable(data.slice(0,6).sort((a,b) => (a.Age-b.Age)))
Insert cell
vegalite({
"data": {"values": updatedAthlete},
"mark": "bar",
"transform": [
{ //outputs a new field aggregateCount which are unique for each "NOC"
"aggregate": [{"op": "count", "field": "NOC", "as": "aggregateCount"}],
"groupby": ["NOC"],
},
{ //performs calculations over sorted groups of data objects.
"window": [{ "op": "row_number", "as": "rank"}], //Assigns each data object a consecutive row number, starting from 1.
"sort": [{"field": "aggregateCount", "order": "descending"}], //sorts data objects within the window and order is descending because we want the top ones
},
{ "filter": "datum.rank <= 5"}, //shows only 5 data countries
],
"encoding": {
"y": {"field": "NOC", "type": "nominal", "sort": "-x"}, //another sort is needed here to sort the bars in the bar-chart
"x": {"field": "aggregateCount", "type": "quantitative"},
},
"title": "Top five Countries",
})
Insert cell
atheleteOlympics = updatedAthlete.filter(d=>d.Year === 2008)
Insert cell
vegalite({
"data": {"values": atheleteOlympics},
"mark": "bar",

"transform": [
{ //outputs a new field medalCount which are unique for each "Sport"
"aggregate": [{"op": "count", "field": "Medal", "as": "medalCount"}],
"groupby": ["Sport"],
},
{ //performs calculations over sorted groups of data objects.
"window": [{ "op": "row_number", "as": "rank"}], //Assigns each data object a consecutive row number, starting from 1.
"sort": [{"field": "medalCount", "order": "descending"}], //sorts data objects within the window and order is descending because we want the top ones
},
{ "filter": "datum.rank <= 5"}, //shows only 5 data sports
],

"encoding": {
"y": {"field": "Sport", "type": "nominal", "sort": "-x"}, //another sort is needed here to sort the bars in the bar-chart
"x": {"field": "medalCount", "type": "quantitative"},
},
"title": "Top five Sports",
})
Insert cell
Insert cell
updatedAthlete
Insert cell
vegalite({
"data": {"values": updatedAthlete},
"mark": {"type": "bar", "tooltip": true},
"transform": [
{
"filter": {"field": "NOC", "oneOf": ["USA", "URS", "GER", "GBR", "FRA"]},
}
],
"encoding": {
"row": {"field": "NOC", "type": "nominal", "spacing": 10, "title": false},
"y": {"field": "Medal", "type": "ordinal", "sort": "-x", "axis": false},
"x": {"aggregate": "count", "field": "Medal", "type": "quantitative"},
"color": {"field": "Medal", "scale": {"range": ["#cd7f32" , "#FFD700", "#c0c0c0"]}},
},
"title": {"text": "Top five Countries", "anchor": "middle"},
})
Insert cell
vegalite({
"data": {"values": atheleteOlympics},
"mark": {"type": "bar", "tooltip": true},
"transform": [
{
"filter": {"field": "Sport", "oneOf": ["Swimming", "Athletics", "Rowing", "Football", "Hockey"]},
}
],
"encoding": {
"row": {"field": "Sport", "type": "nominal", "spacing": 10, "title": false},
"y": {"field": "Medal", "type": "ordinal", "sort": "-x","axis": false},
"x": {"aggregate": "count", "field": "Medal", "type": "quantitative"},
"color": {"field": "Medal", "scale": {"range": ["#cd7f32" , "#FFD700", "#c0c0c0"]}},
},
"title": {"text": "Top five Sports", "anchor": "middle"},
})
Insert cell
Insert cell
updatedAthlete
Insert cell
{
for (const i in updatedAthlete){
updatedAthlete[i].copyMedal = updatedAthlete[i].Medal;
}
}
Insert cell
updatedAthlete
Insert cell
vegalite({
"data": {"values": updatedAthlete},
"mark": {"type": "bar", "tooltip": true},
"transform": [
{
"aggregate": [{"op": "count", "field": "NOC", "as": "aggregateCount"}],
"groupby": ["NOC"],
},
{
"window": [{ "op": "row_number", "as": "rank"}],
"sort": [{"field": "aggregateCount", "order": "descending"}],
},
{ "filter": "datum.rank <= 5" /*&& ("datum.Medal")*/},
],
"encoding": {
"row": {"field": "NOC", "type": "nominal", "spacing": 10, "title": false},
"y": {"field": "Medal", "type": "ordinal", "sort": "-x"},
"x": {"aggregate": "count", "field": "Medal", "type": "quantitative"},
"color": {"field": "Medal"},
},
"title": "Top five Countries",
})
Insert cell
Insert cell
import {tb} from "@mariodelgadosr/target-blank"
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more