PSET 4 / jk979 | Observable

//requires d3

d3 = require("d3-fetch@1")

//requires vega-lite

vegalite = require("@observablehq/vega-lite@0.1")

//requires zebras

z = require('https://bundle.run/zebras@0.0.11')

//migration data set

data_mig = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv')

//country metadata data set

data_country_metadata = d3.csv('https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/Metadata_Country_API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv')

//get unique columns for migration data set

cols_mig = Object.keys(data_mig[0])

//get unique columns for country metadata data set

cols_co2 = Object.keys(data_country_metadata[0])

//group by country code

group_country = z.groupBy(d=>d.CO2, data_mig)

//display the 210 unique countries

unique_CO2 = Object.keys(group_country)

//the 8 variables

unique_vars = z.unique(z.getCol("Variable", data_mig))

//aggregate and sum by variable and by country

sum_agg_df = {

var group = []

for (let i=0;i<unique_CO2.length; i++) {

//country variable

let CO2 = unique_CO2[i]

//get current row

let currentRow = {}

//make current row's country variable = the unique country code variable

currentRow.CO2 = CO2

//sum data, grouped by variable from the Country data set

let data_sum = z.gbSum("Value", z.groupBy(d=>d.Variable, group_country[CO2]))

//append this data_sum value to currentRow

for (let j=0;j<data_sum.length; j++) {

currentRow[`${data_sum[j].group}`] = data_sum[j].sum

}

//push into data frame

group.push(currentRow)

}

return group

}

z.head(5,sum_agg_df)

join_df = z.merge(sum_agg_df, data_country_metadata,'CO2','Country Code','_df1','_df2')

//group table by IncomeGroup

groupByIncome_df = z.groupBy(r => r.IncomeGroup,join_df)

//get unique income types

income_types = z.unique(z.getCol("IncomeGroup", join_df))

//aggregate all variables in each income group

income_agg = {

let group = []

//loop through each income type

for(let i = 0; i<income_types.length; i++){

//grouped_income = low income, then upper middle, then high, etc.

let grouped_income = income_types[i]

let currentRow = {}

//make currentRow = that income level

currentRow.groupByIncome_df = grouped_income

//from there, loop through all 8 unique Variables

for(let j = 0; j<unique_vars.length; j++){

//make the current row of unique Variables = the sum of all unique Variables in that income group

currentRow[unique_vars[j]] = z.getCol(unique_vars[j], z.groupBy(a => a.IncomeGroup, join_df)[grouped_income])

z.gbSum(currentRow[unique_vars[j]])

}

group.push(currentRow)

}

return group

}

//check to make sure groupBy with income worked

groupByIncome_df

unique_vars

groupByIncome_df

//create grid of histogram plots. Each columns represents inflow, outflow, etc. and each row represents

//different income group, binned into 12 bins

vegalite({

"vconcat":[

{

title: "Low Income",

repeat: {column: unique_vars},

spec: {

data: {values: groupByIncome_df['Low income']},

mark: "bar",

encoding: {

x: {

field: {"repeat":"column"},

"bin": {"maxbins": 12},

},

y: {aggregate: "count", type: "quantitative"}

}

},

{

title: "Lower Middle Income",

repeat: {column: unique_vars},

spec: {

data: {values: groupByIncome_df['Lower middle income']},

mark: "bar",

encoding: {

x: {

field: {"repeat":"column"},

"bin": {"maxbins": 12},

},

y: {aggregate: "count", type: "quantitative"}

}

},

{

title: "Upper Middle Income",

repeat: {column: unique_vars},

spec: {

data: {values: groupByIncome_df['Upper middle income']},

mark: "bar",

encoding: {

x: {

field: {"repeat":"column"},

"bin": {"maxbins": 12},

},

y: {aggregate: "count", type: "quantitative"}

}

},

{

title: "High Income",

repeat: {column: unique_vars},

spec: {

data: {values: groupByIncome_df['High income']},

mark: "bar",

encoding: {

x: {

field: {"repeat":"column"},

"bin": {"maxbins": 12},

},

y: {aggregate: "count", type: "quantitative"}

}

},

{

title: "Undefined",

repeat: {column: unique_vars},

spec: {

data: {values: groupByIncome_df['undefined']},

mark: "bar",

encoding: {

x: {

field: {"repeat":"column"},

"bin": {"maxbins": 12},

},

y: {aggregate: "count", type: "quantitative"}

}

]

})

viewof stacked = vegalite({

title: "Normalized Stacked Histograms by Variable and Income Group",

repeat:{column: unique_vars},

data: {values: z.filter(a => a["CO2"]!="TOT", join_df)},

spec: {

mark: "bar",

encoding: {

y: {

aggregate: "count", type: "quantitative", stack: "normalize"

},

x: {

field: {"repeat":"column"}, type: "quantitative", bin: {"maxbins":12}

},

color: {

field: "IncomeGroup", type: "nominal",

scale: {"range": ["#800000","#469990","#911eb4","#f58231","#fabebe","#9a6324","#71bf00"]},

}

})

gdp2017 = d3.csv('https://gist.githubusercontent.com/jk979/5cdbb39023dcb8cf39fa361e8ddb93a4/raw/bcaf5df8fec8fe09244a18cc06e88efaa0c068d6/gdp2017.csv')

//join by country code to current data set

merged4 = z.merge(gdp2017,join_df,"CountryCode","CO2","_df1","_df2")

//show immigration level per country of birth for 2017 (x-axis)

//show GDP per capita per country for 2017 (y-axis)

//color circles and fill based on region

vegalite({

"width": 400,

"height": 400,

title: "Immigration Levels by Country of Birth vs. Country GDP",

data: {values: merged4},

mark: "point",

encoding: {

x: {field: "Stock of foreign-born population by country of birth", type: "quantitative", //Inflows of foreign population by nationality

"axis": {"title": "Migrant Population Stock per Country of Origin",

"offset": 10},

//"scale": {"type": "sqrt"}

"scale": {"type": "linear"}

},

y: {field: "2017", type: "quantitative",

"axis": {"title": "2017 GDP per Capita (USD)",

"offset": 10},

//"scale": {"type": "log"}

},

color: {

field: "Region",

type: "nominal"

}

})

data_mig

dataByYear = z.groupBy(d=>d.Year,z.filter(e=>e['CO2'],data_mig))

sumByYear = { // Creating a new df that contains all countries, all inflows per year

//make new group

let group = []

//go through all object keys of dataByYear

for (let k = 0; k < Object.keys(dataByYear).length; k++){

//for each object key, group by country, and sum the values for each

let sum_value = z.gbSum("Value", z.groupBy(d=>d.CO2, dataByYear[Object.keys(dataByYear)[k]]))

//go through all object keys of sum_value

for (let m=0;m<Object.keys(sum_value).length;m++){

//populate currentRow with the object keys, country (from group), andvalue (from sum)

let currentRow = {};

currentRow.year = Object.keys(dataByYear)[m]

currentRow.country = sum_value[m].group

currentRow.value = sum_value[m].sum

group.push(currentRow)

}

return group

}

//show immigration level per country of birth for 2017 (y-axis)

//show years (x-axis)

//each dot is a different country

viewof composite_dot = vegalite({

width:600,

height:300,

title:"Total Inflows of Foreign Population by Country by Year",

data: {values: z.filter(r=>r['Country']!="TOT", sumByYear)},

"layer":[{

//build points

mark: {type: "point", "tooltip": {"content": "data"}, "filled": true},

encoding: {

x: {field: "year", type: "temporal", title:"Year"},

y: {field: "value", type: "quantitative", title: "Inflows","scale": {"domain": [0,3000000]}}

}

},{

//add layer to show mean values per year across all years

mark: "line",

encoding: {

x: {field: "year", type:"temporal"},

y: {field: "value", type: "quantitative", aggregate: "mean"},

size: {"value": 3},

color: {"value":"green"}

}

]})

Purpose-built for displays of data