Published
Edited
Mar 3, 2019
Insert cell
Insert cell
Insert cell
Insert cell
// Loading necessary library
z = require('https://bundle.run/zebras@0.0.11')

Insert cell
// Loading necessary library
d3 = require("d3")
Insert cell
// Target appropriate URL for dataset
migrationURL = 'https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv'
Insert cell
// Load data from target URL
migration = d3.csv(migrationURL)
Insert cell
// check out the dataset
z.head(20, migration)
Insert cell
// get dimensions of dataset
shape = ({rows: z.getCol('CO2', migration).length, columns:Object.keys(migration[0]).length})
Insert cell
Insert cell
// Specify the target column, "Country of birth/nationality," then get the unique values within it
z.unique(z.getCol('Country of birth/nationality', migration))
Insert cell
Insert cell
//overall process:
// create groupby key based on country_variable
// reduce df to the 3 variables we want to look at and parse values into numbers
// group the reduced df by country_variable and sum over those groups (this aggregates over the years)
// put those somes into a wide table, from the long format

// create groupby key from country and variable
groupLabel = z.deriveCol(r => (r.Variable + r.Country), migration)
Insert cell
// add group by key to migration table
migrationKeyed = z.addCol("key", groupLabel, migration)
Insert cell
// Produce a table where each row is a country listed in the migration table's "Country" column (NOT 'Country of birth/nationality' column)
// 3 additional data columns contain sum of inflows, outflows, or stock for that country, aggregated across years.

countryStat = z.pipe([
//subset df to just the relevant variables
z.filter(r => r['Variable'] == "Inflows of foreign population by nationality" ||
r['Variable'] == "Outflows of foreign population by nationality" ||
r['Variable'] == "Stock of foreign-born population by country of birth"),
// parse values into numbers
z.parseNums(['Value']),
// collapse dataframe by country and variable
//z.groupBy(x => x.key),
])(migrationKeyed)
Insert cell
// group by and sum over country and variable
migrationSums = z.gbSum("Value", z.groupBy(d => d.key, countryStat))
Insert cell
// fill new dataframe with the sums
// drop "TOTAL" CO2 from sums?
df_summed = {
let countryList = z.unique(z.getCol("Country", countryStat))
let varList = z.unique(z.getCol("Variable", countryStat))
let columnNames = ["country", "inflow", "outflow", "stock"]
let sums = []
for (var i = 0; i < countryList.length; i++)
{
let currRow = {country: countryList[i],
inflow: 0,
outflow: 0,
stock: 0}
let tag = ""
for (var j = 0; j < varList.length; j++)
{
tag = varList[j] + countryList[i]
currRow[columnNames[j+1]] = z.getCol("sum", (z.filter(r => r["group"] == tag, migrationSums)))[0]
}
sums.push(currRow)
}
return sums
}
Insert cell
{
let a = {country: "A",
inflow: 10,
outflow: 20
}
let columnNames = ["country", "inflow","outflow","stock"]
return a[columnNames[0]]
}
Insert cell
Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
// Your code here

Insert cell
Insert cell
Insert cell
//Questions
// What to do with TOT CO2? Drop? sometimes multiple totals per year: see Austria, inflows, TOT
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more