Published
Edited
Feb 27, 2019
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
z = require('https://bundle.run/zebras@0.0.11')
//get more information from an observable notebook: https://observablehq.com/@nickslevine/introduction-to-zebras-a-data-analysis-library-for-javascr
//get how to use all the functions: https://zebrasjs.com/Z.html#.sortByCol
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
rainAmounts = [
18.3,
19,
14.1
]
Insert cell
rainDates = Array.from({length: 3}, (_, i) => Date.now() - 1000 * 60 * 60 * 24 * i)
Insert cell
Insert cell
//three elements, two arrays; loop to get through the
newRainfall={
let rainArray=[]
for (var i=0; i<rainAmounts.length; i++){
let currRow={rainAmount: rainAmounts[i],
date: rainDates[i]}
//now we have an object, this is the way zebras considers as a dataframe
rainArray.push(currRow)}
//storing every element of currRow into rainArray's end
return rainArray
//remember: type in "rai" then "tab", you can get access to things
}
Insert cell
rainfall = {
let rainDF = []
for (var i=0; i<rainAmounts.length; i++) {
rainDF.push({precipitation: rainAmounts[i], date: rainDates[i]})
}
return rainDF
}
//object, like the observations; arrays like the columns
Insert cell
Insert cell
daySeries = ["Monday", "Tuesday", "Wednesday"]
Insert cell
myNewDF = z.addCol("day", daySeries, rainfall)
//add a column called "day" holding the values of daySeries in the dataframe rainfall
//now add a column to all objects in the df
Insert cell
Insert cell
columns = Object.keys(rainfall[0])
//the key of object is the name of columns
Insert cell
Insert cell
shape = ({rows: rainfall.length, columns:Object.keys(rainfall[0]).length})
//rainfall是一个存了很多object的df,所以rainfall.length即返回观测数;column的数量需要根据key的数量判断(可以将key视为variable);注意每个key必须根据一个object进行提取,而不是对于整个df。
Insert cell
Insert cell
Insert cell
migrationUrl = "https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv"
//input csv file from github
Insert cell
migration = d3.csv(migrationUrl)
// knowledge about d3: https://github.com/d3/d3
Insert cell
Insert cell
Insert cell
Insert cell
z.head(5, migration)
Insert cell
Insert cell
Insert cell
migration.length
Insert cell
Insert cell
({rows: migration.length, columns:Object.keys(migration[0]).length})
//way to get rows and columns
Insert cell
Insert cell
migrationColumns = Object.keys(migration[0])
Insert cell
Insert cell
Insert cell
firstRow = Object.values(migration[0])
Insert cell
lastRow = Object.values(migration[migration.length-1])
Insert cell
randomRow = Object.values(migration[Math.floor(Math.random() * migration.length)])
Insert cell
Insert cell
Insert cell
numericalColumns = migrationColumns.slice(0, migrationColumns.length-4)
//use "slice" to get everything except the last 4; get the subset from 0 to the last-4.
Insert cell
migrationParsed = z.parseNums(numericalColumns, migration)
//turn the values into numbers using "z.parseNums"
Insert cell
Insert cell
z.head(5, migrationParsed)
//z.head is determining the number of rows displayed
Insert cell
Insert cell
countryName = z.getCol('Country Name', migrationParsed)
Insert cell
Insert cell
// 10th row of our Dataframe:
countryName[9]
Insert cell
Insert cell
z.describe(z.getCol('1962', migrationParsed))
//summary statistics of that column
Insert cell
Insert cell
z.valueCounts(z.getCol('Indicator Code', migrationParsed))
//"z.valueCounts" returns the fequency of each value within 1 column; i.e. return the number of unique values
Insert cell
Insert cell
Insert cell
z.unique(z.getCol('Indicator Code', migrationParsed))
//give an array of unique values
Insert cell
Insert cell
Insert cell
z.filter(r => r['1962'] > 0, migrationParsed)
Insert cell
migrationParsed
//the previous calculation will not change the original df
Insert cell
function(row){
return row['1962']>500
}
//similarly, this will work the same way.
Insert cell
Insert cell
z.filter(r => r['Country Name'] == "China", migrationParsed)
//condition + df to work with
Insert cell
Insert cell
z.filter(r => r['1962'] > 0 && r['2012'] < 0, migrationParsed)
Insert cell
Insert cell
//We can use argument 'asc' to reverse the sorting
migrationSorted = z.sortByCol('2017', 'des', migrationParsed)
//sort by aa given column; des: larger to smaller
Insert cell
z.filter(r => r['1962'] > r['2012'] , migrationParsed)
Insert cell
Insert cell
z.head(5, z.pickCols(['Country Name', '2017'], migrationSorted.slice(1)))
Insert cell
Insert cell
Insert cell
z.filter(r => r['Country Name'] == "Cayman Islands", migrationParsed)
Insert cell
Insert cell
isNaN(z.getCol('1960', migrationParsed)[1])
Insert cell
Insert cell
z.filter(r => Number.isNaN(r['1962']) == false, migrationParsed)
//get those rows with no NaN for year 1962
Insert cell
Insert cell
z.getCol('1960', migrationParsed)
Insert cell
Insert cell
z.dropCol(["1960"], migrationParsed)
Insert cell
Insert cell
Insert cell
migrationMetaUrl = "https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/Metadata_Country_API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv"
Insert cell
migrationMeta = d3.csv(migrationMetaUrl)
//d3.csv - get a comma-separated values (CSV) file.
Insert cell
Insert cell
migrationMeta.length
Insert cell
Insert cell
migrationMetaColumns = Object.keys(migrationMeta[0])
Insert cell
z.head(5, migrationMeta)
Insert cell
Insert cell
md`
> \`\`\`javascript
(static) merge(dfLeft, dfRight, leftOn, rightOn, leftSuffix, rightSuffix) → {df}
\`\`\`
`
//the join is on the left, thus right/ left order matters
Insert cell
Insert cell
migration_join = z.merge(migrationParsed, migrationMeta, "Country Code", "Country Code", "_df1", "_df2")
// migration_join = migrationParsed.map(x => Object.assign(x, migrationMeta.find(y => y['Country Code'] == x['Country Code'])));
Insert cell
Insert cell
Insert cell
migration_2017 = z.getCol('2017', migration_join)
Insert cell
z.print([
{
minMigration: z.min(migration_2017).toFixed(5),
maxMigration: z.max(migration_2017).toFixed(5),
meanMigration: z.mean(migration_2017).toFixed(5),
medianMigration: z.median(migration_2017).toFixed(5),
stdMigration: z.std(migration_2017).toFixed(5)
}
])
Insert cell
Insert cell
migration_2017_Piped = z.pipe(
[
z.parseNums(['1992','1997','2002','2007', '2012', '2017']),
z.getCol('2017'),
z.mean()
]
)(migration_join)
//chaining law. apply the functions one by one, with no need to passing a new df name every operation you do.
Insert cell
Insert cell
migrationByRegion = z.groupBy(x=>x.Region_df1, migration_join)
Insert cell
Insert cell
migrationCountRegion = z.gbCount("Region", migrationByRegion)
Insert cell
Insert cell
Insert cell
Insert cell
d3 = require("d3")
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more