Published
Edited
Mar 14, 2019
Insert cell
Insert cell
Insert cell
Insert cell
//requires d3
d3 = require("d3-fetch@1")
Insert cell
//requires vega-lite
vegalite = require("@observablehq/vega-lite@0.1")
Insert cell
//requires zebras
z = require('https://bundle.run/zebras@0.0.11')
Insert cell
//migration data set
data_mig = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv')
Insert cell
//country metadata data set
data_country_metadata = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/Metadata_Country_API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv')
Insert cell
//get unique columns for migration data set
cols_mig = Object.keys(data_mig[0])
Insert cell
//get unique columns for country metadata data set
cols_co2 = Object.keys(data_country_metadata[0])
Insert cell
//group by country code
group_country = z.groupBy(d=>d.CO2, data_mig)
Insert cell
//display the 210 unique countries
unique_CO2 = Object.keys(group_country)
Insert cell
//the 8 variables
unique_vars = z.unique(z.getCol("Variable", data_mig))
Insert cell
//aggregate and sum by variable and by country
sum_agg_df = {
var group = []
for (let i=0;i<unique_CO2.length; i++) {
//country variable
let CO2 = unique_CO2[i]
//get current row
let currentRow = {}
//make current row's country variable = the unique country code variable
currentRow.CO2 = CO2
//sum data, grouped by variable from the Country data set
let data_sum = z.gbSum("Value", z.groupBy(d=>d.Variable, group_country[CO2]))
//append this data_sum value to currentRow
for (let j=0;j<data_sum.length; j++) {
currentRow[`${data_sum[j].group}`] = data_sum[j].sum
}
//push into data frame
group.push(currentRow)
}
return group
}
Insert cell
z.head(5,sum_agg_df)
Insert cell
Insert cell
join_df = z.merge(sum_agg_df, data_country_metadata,'CO2','Country Code','_df1','_df2')
Insert cell
Insert cell
Insert cell
//group table by IncomeGroup
groupByIncome_df = z.groupBy(r => r.IncomeGroup,join_df)
Insert cell
//get unique income types
income_types = z.unique(z.getCol("IncomeGroup", join_df))
Insert cell
//aggregate all variables in each income group
income_agg = {
let group = []
//loop through each income type
for(let i = 0; i<income_types.length; i++){
//grouped_income = low income, then upper middle, then high, etc.
let grouped_income = income_types[i]
let currentRow = {}
//make currentRow = that income level
currentRow.groupByIncome_df = grouped_income
//from there, loop through all 8 unique Variables
for(let j = 0; j<unique_vars.length; j++){
//make the current row of unique Variables = the sum of all unique Variables in that income group
currentRow[unique_vars[j]] = z.getCol(unique_vars[j], z.groupBy(a => a.IncomeGroup, join_df)[grouped_income])
z.gbSum(currentRow[unique_vars[j]])
}
group.push(currentRow)
}
return group
}
Insert cell
//check to make sure groupBy with income worked
groupByIncome_df
Insert cell
unique_vars
Insert cell
groupByIncome_df
Insert cell
//create grid of histogram plots. Each columns represents inflow, outflow, etc. and each row represents
//different income group, binned into 12 bins
vegalite({
"vconcat":[
{
title: "Low Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Low income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Lower Middle Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Lower middle income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Upper Middle Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Upper middle income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "High Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['High income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Undefined",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['undefined']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
}
]
})
Insert cell
Insert cell
Insert cell
viewof stacked = vegalite({
title: "Normalized Stacked Histograms by Variable and Income Group",
repeat:{column: unique_vars},
data: {values: z.filter(a => a["CO2"]!="TOT", join_df)},
spec: {
mark: "bar",
encoding: {
y: {
aggregate: "count", type: "quantitative", stack: "normalize"
},
x: {
field: {"repeat":"column"}, type: "quantitative", bin: {"maxbins":12}
},
color: {
field: "IncomeGroup", type: "nominal",
scale: {"range": ["#800000","#469990","#911eb4","#f58231","#fabebe","#9a6324","#71bf00"]},
}
}
}
})
Insert cell
Insert cell
gdp2017 = d3.csv('https://gist.githubusercontent.com/jk979/5cdbb39023dcb8cf39fa361e8ddb93a4/raw/bcaf5df8fec8fe09244a18cc06e88efaa0c068d6/gdp2017.csv')
Insert cell
//join by country code to current data set
merged4 = z.merge(gdp2017,join_df,"CountryCode","CO2","_df1","_df2")
Insert cell
//show immigration level per country of birth for 2017 (x-axis)
//show GDP per capita per country for 2017 (y-axis)
//color circles and fill based on region
vegalite({
"width": 400,
"height": 400,
title: "Immigration Levels by Country of Birth vs. Country GDP",
data: {values: merged4},
mark: "point",
encoding: {
x: {field: "Stock of foreign-born population by country of birth", type: "quantitative", //Inflows of foreign population by nationality
"axis": {"title": "Migrant Population Stock per Country of Origin",
"offset": 10},
//"scale": {"type": "sqrt"}
"scale": {"type": "linear"}
},
y: {field: "2017", type: "quantitative",
"axis": {"title": "2017 GDP per Capita (USD)",
"offset": 10},
//"scale": {"type": "log"}
},
color: {
field: "Region",
type: "nominal"
}
}
})
Insert cell
Insert cell
data_mig
Insert cell
dataByYear = z.groupBy(d=>d.Year,z.filter(e=>e['CO2'],data_mig))
Insert cell
sumByYear = { // Creating a new df that contains all countries, all inflows per year
//make new group
let group = []
//go through all object keys of dataByYear
for (let k = 0; k < Object.keys(dataByYear).length; k++){
//for each object key, group by country, and sum the values for each
let sum_value = z.gbSum("Value", z.groupBy(d=>d.CO2, dataByYear[Object.keys(dataByYear)[k]]))
//go through all object keys of sum_value
for (let m=0;m<Object.keys(sum_value).length;m++){
//populate currentRow with the object keys, country (from group), andvalue (from sum)
let currentRow = {};
currentRow.year = Object.keys(dataByYear)[m]
currentRow.country = sum_value[m].group
currentRow.value = sum_value[m].sum
group.push(currentRow)
}
}
return group
}
Insert cell
//show immigration level per country of birth for 2017 (y-axis)
//show years (x-axis)
//each dot is a different country

viewof composite_dot = vegalite({
width:600,
height:300,
title:"Total Inflows of Foreign Population by Country by Year",
data: {values: z.filter(r=>r['Country']!="TOT", sumByYear)},
"layer":[{
//build points
mark: {type: "point", "tooltip": {"content": "data"}, "filled": true},
encoding: {
x: {field: "year", type: "temporal", title:"Year"},
y: {field: "value", type: "quantitative", title: "Inflows","scale": {"domain": [0,3000000]}}
}
},{
//add layer to show mean values per year across all years
mark: "line",
encoding: {
x: {field: "year", type:"temporal"},
y: {field: "value", type: "quantitative", aggregate: "mean"},
size: {"value": 3},
color: {"value":"green"}
}
}
]})
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more