Published
Edited
Mar 14, 2019
Insert cell
Insert cell
Insert cell
Insert cell
//requires d3
d3 = require("d3-fetch@1")
Insert cell
//requires vega-lite
vegalite = require("@observablehq/vega-lite@0.1")
Insert cell
//requires zebras
z = require('https://bundle.run/zebras@0.0.11')
Insert cell
//migration data set
data_mig = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv')
Insert cell
//country metadata data set
data_country_metadata = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/Metadata_Country_API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv')
Insert cell
//get unique columns for migration data set
cols_mig = Object.keys(data_mig[0])
Insert cell
//get unique columns for country metadata data set
cols_co2 = Object.keys(data_country_metadata[0])
Insert cell
//group by country code
group_country = z.groupBy(d=>d.CO2, data_mig)
Insert cell
//display the 210 unique countries
unique_CO2 = Object.keys(group_country)
Insert cell
//the 8 variables
unique_vars = z.unique(z.getCol("Variable", data_mig))
Insert cell
//aggregate and sum by variable and by country
sum_agg_df = {
var group = []
for (let i=0;i<unique_CO2.length; i++) {
//country variable
let CO2 = unique_CO2[i]
//get current row
let currentRow = {}
//make current row's country variable = the unique country code variable
currentRow.CO2 = CO2
//sum data, grouped by variable from the Country data set
let data_sum = z.gbSum("Value", z.groupBy(d=>d.Variable, group_country[CO2]))
//append this data_sum value to currentRow
for (let j=0;j<data_sum.length; j++) {
currentRow[`${data_sum[j].group}`] = data_sum[j].sum
}
//push into data frame
group.push(currentRow)
}
return group
}
Insert cell
z.head(5,sum_agg_df)
Insert cell
Insert cell
join_df = z.merge(sum_agg_df, data_country_metadata,'CO2','Country Code','_df1','_df2')
Insert cell
Insert cell
Insert cell
//group table by IncomeGroup
groupByIncome_df = z.groupBy(r => r.IncomeGroup,join_df)
Insert cell
//get unique income types
income_types = z.unique(z.getCol("IncomeGroup", join_df))
Insert cell
//aggregate all variables in each income group
income_agg = {
let group = []
//loop through each income type
for(let i = 0; i<income_types.length; i++){
//grouped_income = low income, then upper middle, then high, etc.
let grouped_income = income_types[i]
let currentRow = {}
//make currentRow = that income level
currentRow.groupByIncome_df = grouped_income
//from there, loop through all 8 unique Variables
for(let j = 0; j<unique_vars.length; j++){
//make the current row of unique Variables = the sum of all unique Variables in that income group
currentRow[unique_vars[j]] = z.getCol(unique_vars[j], z.groupBy(a => a.IncomeGroup, join_df)[grouped_income])
z.gbSum(currentRow[unique_vars[j]])
}
group.push(currentRow)
}
return group
}
Insert cell
//check to make sure groupBy with income worked
groupByIncome_df
Insert cell
unique_vars
Insert cell
groupByIncome_df
Insert cell
//create grid of histogram plots. Each columns represents inflow, outflow, etc. and each row represents
//different income group, binned into 12 bins
vegalite({
"vconcat":[
{
title: "Low Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Low income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Lower Middle Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Lower middle income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Upper Middle Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['Upper middle income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "High Income",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['High income']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
},
{
title: "Undefined",
repeat: {column: unique_vars},
spec: {
data: {values: groupByIncome_df['undefined']},
mark: "bar",
encoding: {
x: {
field: {"repeat":"column"},
"bin": {"maxbins": 12},
},
y: {aggregate: "count", type: "quantitative"}
}
}
}
]
})
Insert cell
Insert cell
Insert cell
viewof stacked = vegalite({
title: "Normalized Stacked Histograms by Variable and Income Group",
repeat:{column: unique_vars},
data: {values: z.filter(a => a["CO2"]!="TOT", join_df)},
spec: {
mark: "bar",
encoding: {
y: {
aggregate: "count", type: "quantitative", stack: "normalize"
},
x: {
field: {"repeat":"column"}, type: "quantitative", bin: {"maxbins":12}
},
color: {
field: "IncomeGroup", type: "nominal",
scale: {"range": ["#800000","#469990","#911eb4","#f58231","#fabebe","#9a6324","#71bf00"]},
}
}
}
})
Insert cell
Insert cell
gdp2017 = d3.csv('https://gist.githubusercontent.com/jk979/5cdbb39023dcb8cf39fa361e8ddb93a4/raw/bcaf5df8fec8fe09244a18cc06e88efaa0c068d6/gdp2017.csv')
Insert cell
//join by country code to current data set
merged4 = z.merge(gdp2017,join_df,"CountryCode","CO2","_df1","_df2")
Insert cell
//show immigration level per country of birth for 2017 (x-axis)
//show GDP per capita per country for 2017 (y-axis)
//color circles and fill based on region
vegalite({
"width": 400,
"height": 400,
title: "Immigration Levels by Country of Birth vs. Country GDP",
data: {values: merged4},
mark: "point",
encoding: {
x: {field: "Stock of foreign-born population by country of birth", type: "quantitative", //Inflows of foreign population by nationality
"axis": {"title": "Migrant Population Stock per Country of Origin",
"offset": 10},
//"scale": {"type": "sqrt"}
"scale": {"type": "linear"}
},
y: {field: "2017", type: "quantitative",
"axis": {"title": "2017 GDP per Capita (USD)",
"offset": 10},
//"scale": {"type": "log"}
},
color: {
field: "Region",
type: "nominal"
}
}
})
Insert cell
Insert cell
data_mig
Insert cell
dataByYear = z.groupBy(d=>d.Year,z.filter(e=>e['CO2'],data_mig))
Insert cell
sumByYear = { // Creating a new df that contains all countries, all inflows per year
//make new group
let group = []
//go through all object keys of dataByYear
for (let k = 0; k < Object.keys(dataByYear).length; k++){
//for each object key, group by country, and sum the values for each
let sum_value = z.gbSum("Value", z.groupBy(d=>d.CO2, dataByYear[Object.keys(dataByYear)[k]]))
//go through all object keys of sum_value
for (let m=0;m<Object.keys(sum_value).length;m++){
//populate currentRow with the object keys, country (from group), andvalue (from sum)
let currentRow = {};
currentRow.year = Object.keys(dataByYear)[m]
currentRow.country = sum_value[m].group
currentRow.value = sum_value[m].sum
group.push(currentRow)
}
}
return group
}
Insert cell
//show immigration level per country of birth for 2017 (y-axis)
//show years (x-axis)
//each dot is a different country

viewof composite_dot = vegalite({
width:600,
height:300,
title:"Total Inflows of Foreign Population by Country by Year",
data: {values: z.filter(r=>r['Country']!="TOT", sumByYear)},
"layer":[{
//build points
mark: {type: "point", "tooltip": {"content": "data"}, "filled": true},
encoding: {
x: {field: "year", type: "temporal", title:"Year"},
y: {field: "value", type: "quantitative", title: "Inflows","scale": {"domain": [0,3000000]}}
}
},{
//add layer to show mean values per year across all years
mark: "line",
encoding: {
x: {field: "year", type:"temporal"},
y: {field: "value", type: "quantitative", aggregate: "mean"},
size: {"value": 3},
color: {"value":"green"}
}
}
]})
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more