PSET 3 DRAFT DO NOT TURN IN / rlluo1

rlluo1

Workspace

Published

Edited

Mar 3, 2019

// Loading necessary library

z = require('https://bundle.run/zebras@0.0.11')

// Loading necessary library

d3 = require("d3")

// Target appropriate URL for dataset

migrationURL = 'https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv'

// Load data from target URL

migration = d3.csv(migrationURL)

// check out the dataset

z.head(20, migration)

// get dimensions of dataset

shape = ({rows: z.getCol('CO2', migration).length, columns:Object.keys(migration[0]).length})

// Specify the target column, "Country of birth/nationality," then get the unique values within it

z.unique(z.getCol('Country of birth/nationality', migration))

//overall process:

// create groupby key based on country_variable

// reduce df to the 3 variables we want to look at and parse values into numbers

// group the reduced df by country_variable and sum over those groups (this aggregates over the years)

// put those somes into a wide table, from the long format

// create groupby key from country and variable

groupLabel = z.deriveCol(r => (r.Variable + r.Country), migration)

// add group by key to migration table

migrationKeyed = z.addCol("key", groupLabel, migration)

// Produce a table where each row is a country listed in the migration table's "Country" column (NOT 'Country of birth/nationality' column)

// 3 additional data columns contain sum of inflows, outflows, or stock for that country, aggregated across years.

countryStat = z.pipe([

//subset df to just the relevant variables

z.filter(r => r['Variable'] == "Inflows of foreign population by nationality" ||

r['Variable'] == "Outflows of foreign population by nationality" ||

r['Variable'] == "Stock of foreign-born population by country of birth"),

// parse values into numbers

z.parseNums(['Value']),

// collapse dataframe by country and variable

//z.groupBy(x => x.key),

])(migrationKeyed)

// group by and sum over country and variable

migrationSums = z.gbSum("Value", z.groupBy(d => d.key, countryStat))

// fill new dataframe with the sums

// drop "TOTAL" CO2 from sums?

df_summed = {

let countryList = z.unique(z.getCol("Country", countryStat))

let varList = z.unique(z.getCol("Variable", countryStat))

let columnNames = ["country", "inflow", "outflow", "stock"]

let sums = []

for (var i = 0; i < countryList.length; i++)

{

let currRow = {country: countryList[i],

inflow: 0,

outflow: 0,

stock: 0}

let tag = ""

for (var j = 0; j < varList.length; j++)

{

tag = varList[j] + countryList[i]

currRow[columnNames[j+1]] = z.getCol("sum", (z.filter(r => r["group"] == tag, migrationSums)))[0]

}

sums.push(currRow)

}

return sums

}

{

let a = {country: "A",

inflow: 10,

outflow: 20

}

let columnNames = ["country", "inflow","outflow","stock"]

return a[columnNames[0]]

}

// Your code here

//Questions

// What to do with TOT CO2? Drop? sometimes multiple totals per year: see Austria, inflows, TOT

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more