Published
Edited
Mar 4, 2019
1 fork
1 star
Insert cell
Insert cell
Insert cell
Insert cell
// Loading necessary library here
migrationUrl = "https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/6dff0d45ed26352a7a3c7afb4ace0bad1ce8ba20/MIG_18022019163824069.csv"
Insert cell
Insert cell
migration = d3.csv(migrationUrl)
Insert cell
Insert cell
z = require('https://bundle.run/zebras@0.0.11')
Insert cell
z.unique(z.getCol('Country of birth/nationality', migration))
Insert cell
Insert cell
//pre-treatment
migrationCountry={
//only keep the varaibles we are interested in summarizing
var migrationFilter=z.filter(r=> r.VAR=="B11"|r.VAR=="B12"||r.VAR=="B14", migration)
//turn values into numbers
var migrationParse=z.parseNums(["Value"], migrationFilter)
//group by countries
return z.groupBy(x => x.CO2, migrationParse)
}
Insert cell
countryBirth=z.unique(z.getCol('CO2', migration))
Insert cell
df_summed={
var sumTable=[]
var migrationB={}
var migrationValB={}
for (var i=0; i<countryBirth.length; i++){
migrationB[countryBirth[i][0]]=z.filter(r=> r.VAR=="B11", migrationCountry[countryBirth[i]])
migrationB[countryBirth[i][1]]=z.filter(r=> r.VAR=="B12", migrationCountry[countryBirth[i]])
migrationB[countryBirth[i][2]]=z.filter(r=> r.VAR=="B14", migrationCountry[countryBirth[i]])
migrationValB[countryBirth[i][0]]=z.getCol("Value", migrationB[countryBirth[i][0]])
migrationValB[countryBirth[i][1]]=z.getCol("Value", migrationB[countryBirth[i][1]])
migrationValB[countryBirth[i][2]]=z.getCol("Value", migrationB[countryBirth[i][2]])
//calculate the sum of the value column for each variables within a particulat country array, and push the results in the summary table as a new array.
//since it is not reasonable to have non-integral number of people. I use "parseInt" function to turn all the values into integral before logging out.
sumTable.push({"Country Code":countryBirth[i], "Inflows of foreign population by nationality":parseInt(migrationValB[countryBirth[i][0]].reduce(function(a, b) { return a + b; }, 0)) ,"Outflows of foreign population by nationality": parseInt(migrationValB[countryBirth[i][1]].reduce(function(a, b) { return a + b; }, 0)), "Stock of foreign-born population by country of birth":parseInt(migrationValB[countryBirth[i][2]].reduce(function(a, b) { return a + b; }, 0))})
}
return sumTable
}
Insert cell
Insert cell
Insert cell
Insert cell
migration2 = d3.csv(Data2)
Insert cell
// create a data frame only keeps the Country Codes and the Continent.
migration2Subset=z.pickCols(["Three_Letter_Country_Code", "Continent_Code"], migration2)
Insert cell
// join data set by country code
migrationJoined=z.merge(df_summed, migration2Subset, "Country Code", "Three_Letter_Country_Code")
//Question: why we only join by country code? Some countries are in different continents thus the country codes are not unique in migration2Subset (our newly created dataset).
Insert cell
//delete those observations unable to be merged with the second dataset
migration2Filter=z.filter(r => r.Continent_Code != undefined, migrationJoined)
Insert cell
newAggregate={
var groupContinent={}
var newSum=[]
//first group by continents to calculate the sum of different variables of interest.
groupContinent[[0]]=z.gbSum("Inflows of foreign population by nationality", z.groupBy(d => d.Continent_Code, migration2Filter))
groupContinent[[1]]=z.gbSum("Outflows of foreign population by nationality", z.groupBy(d => d.Continent_Code, migration2Filter))
groupContinent[[2]]=z.gbSum("Stock of foreign-born population by country of birth", z.groupBy(d => d.Continent_Code, migration2Filter))
//for each continent, push the information needed and organized in an array
for (var i=0; i<6; i++){
newSum.push({"Continent Code":groupContinent[[0]][i].group, "Inflows of foreign population by nationality":parseInt(groupContinent[[0]][i].sum),"Outflows of foreign population by nationality":parseInt(groupContinent[[1]][i].sum) ,"Stock of foreign-born population by country of birth":parseInt(groupContinent[[2]][i].sum)} )
}
return newSum
}
Insert cell
z.head(6, newAggregate)
Insert cell
Insert cell
// aggregates the countries for birth by Variable
df_sum2={
var migrationParse=z.parseNums(["Value"], migration)
var migrationV=z.groupBy(x=>x.Variable,migrationParse)
var variables=z.unique(z.getCol("Variable", migration))
var sumObject={}
var aggregate=[]
for (var i=0; i<variables.length; i++){
sumObject[variables[i]]=z.gbSum("Value", z.groupBy(d => d.CO2, migrationV[variables[i]]))
}
//organize each country's information as an object
for (var i=0; i<countryBirth.length; i++){
var obj={}
obj["Country Code"]=countryBirth[i]
for(var j=0;j<variables.length;j++)
{
//get the object of interests identified by country codes (countryBirth is the key of countries)
var countrySrc=z.filter(x=>x.group==countryBirth[i],sumObject[variables[j]])
//first loop the values of variables into an object, then pushed into the array
obj[variables[j]]=parseInt(countrySrc[0]["sum"])
}
//push that object into an array (data frame)
aggregate.push(obj)
}
return aggregate
}
Insert cell
Insert cell
sumStock16={
// filter the data frame for both variable and year
var migrationParse=z.parseNums(["Value"], migration)
var migration16Filter=z.filter(r => r.VAR == "B14" && r.Year == "2016", migrationParse)
var migration16Sum=z.gbSum("Value", z.groupBy(d => d.CO2, migration16Filter))
var Sum16=[]
for (var i=0; i<countryBirth.length; i++){
//get the object of interests identified by country codes (countryBirth is the key of countries)
var countrySrc=z.filter(x=>x.group==countryBirth[i],migration16Sum)
//push the sum of each country into the final array
Sum16.push({"Country Code":countryBirth[i], "StockSum2016": parseInt(countrySrc[0]["sum"])})
}
return Sum16
}
Insert cell
z.head(5,sumStock16)
Insert cell
{
// first generate three arrays by filter
//generate object keys so that they can be used for filter
var newData1=z.filter(x=>x.StockSum2016> 100000, sumStock16)
var newData2=z.filter(x=>x.StockSum2016> 50000 && x.StockSum2016< 100000, sumStock16)
var newData3=z.filter(x=>x.StockSum2016< 50000, sumStock16)
//put all three arrays into an object, which includes three new data frames (optional)
var newDataCombined={"High Immigration":newData1, "medium Immigration": newData2, "low Immigration":newData3}
return newDataCombined
}
Insert cell
Insert cell
// Your code here
compareArray={
for(var i=0;i<countryBirth.length; i++){
df_summed[i]["Country"]=df_summed[i]["Country Code"]
}
var compareArray=[]
for(var i=0;i<countryBirth.length; i++){
var countrySrc=z.filter(x=>x.Country==countryBirth[i],df_summed)
compareArray.push({"Country Code":countryBirth[i], "Inflow-Outflow":parseInt(countrySrc[0]["Inflows of foreign population by nationality"]- countrySrc[0]["Outflows of foreign population by nationality"])})
}
return compareArray
}
Insert cell
Insert cell
Insert cell
Insert cell
// Your code here

Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more