Published
Edited
Feb 27, 2019
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
z = require('https://bundle.run/zebras@0.0.11')
//get more information from an observable notebook: https://observablehq.com/@nickslevine/introduction-to-zebras-a-data-analysis-library-for-javascr
//get how to use all the functions: https://zebrasjs.com/Z.html#.sortByCol
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
rainAmounts = [
18.3,
19,
14.1
]
Insert cell
rainDates = Array.from({length: 3}, (_, i) => Date.now() - 1000 * 60 * 60 * 24 * i)
Insert cell
Insert cell
//three elements, two arrays; loop to get through the
newRainfall={
let rainArray=[]
for (var i=0; i<rainAmounts.length; i++){
let currRow={rainAmount: rainAmounts[i],
date: rainDates[i]}
//now we have an object, this is the way zebras considers as a dataframe
rainArray.push(currRow)}
//storing every element of currRow into rainArray's end
return rainArray
//remember: type in "rai" then "tab", you can get access to things
}
Insert cell
rainfall = {
let rainDF = []
for (var i=0; i<rainAmounts.length; i++) {
rainDF.push({precipitation: rainAmounts[i], date: rainDates[i]})
}
return rainDF
}
//object, like the observations; arrays like the columns
Insert cell
Insert cell
daySeries = ["Monday", "Tuesday", "Wednesday"]
Insert cell
myNewDF = z.addCol("day", daySeries, rainfall)
//add a column called "day" holding the values of daySeries in the dataframe rainfall
//now add a column to all objects in the df
Insert cell
Insert cell
columns = Object.keys(rainfall[0])
//the key of object is the name of columns
Insert cell
Insert cell
shape = ({rows: rainfall.length, columns:Object.keys(rainfall[0]).length})
//rainfall是一个存了很多object的df,所以rainfall.length即返回观测数;column的数量需要根据key的数量判断(可以将key视为variable);注意每个key必须根据一个object进行提取,而不是对于整个df。
Insert cell
Insert cell
Insert cell
migrationUrl = "https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv"
//input csv file from github
Insert cell
migration = d3.csv(migrationUrl)
// knowledge about d3: https://github.com/d3/d3
Insert cell
Insert cell
Insert cell
Insert cell
z.head(5, migration)
Insert cell
Insert cell
Insert cell
migration.length
Insert cell
Insert cell
({rows: migration.length, columns:Object.keys(migration[0]).length})
//way to get rows and columns
Insert cell
Insert cell
migrationColumns = Object.keys(migration[0])
Insert cell
Insert cell
Insert cell
firstRow = Object.values(migration[0])
Insert cell
lastRow = Object.values(migration[migration.length-1])
Insert cell
randomRow = Object.values(migration[Math.floor(Math.random() * migration.length)])
Insert cell
Insert cell
Insert cell
numericalColumns = migrationColumns.slice(0, migrationColumns.length-4)
//use "slice" to get everything except the last 4; get the subset from 0 to the last-4.
Insert cell
migrationParsed = z.parseNums(numericalColumns, migration)
//turn the values into numbers using "z.parseNums"
Insert cell
Insert cell
z.head(5, migrationParsed)
//z.head is determining the number of rows displayed
Insert cell
Insert cell
countryName = z.getCol('Country Name', migrationParsed)
Insert cell
Insert cell
// 10th row of our Dataframe:
countryName[9]
Insert cell
Insert cell
z.describe(z.getCol('1962', migrationParsed))
//summary statistics of that column
Insert cell
Insert cell
z.valueCounts(z.getCol('Indicator Code', migrationParsed))
//"z.valueCounts" returns the fequency of each value within 1 column; i.e. return the number of unique values
Insert cell
Insert cell
Insert cell
z.unique(z.getCol('Indicator Code', migrationParsed))
//give an array of unique values
Insert cell
Insert cell
Insert cell
z.filter(r => r['1962'] > 0, migrationParsed)
Insert cell
migrationParsed
//the previous calculation will not change the original df
Insert cell
function(row){
return row['1962']>500
}
//similarly, this will work the same way.
Insert cell
Insert cell
z.filter(r => r['Country Name'] == "China", migrationParsed)
//condition + df to work with
Insert cell
Insert cell
z.filter(r => r['1962'] > 0 && r['2012'] < 0, migrationParsed)
Insert cell
Insert cell
//We can use argument 'asc' to reverse the sorting
migrationSorted = z.sortByCol('2017', 'des', migrationParsed)
//sort by aa given column; des: larger to smaller
Insert cell
z.filter(r => r['1962'] > r['2012'] , migrationParsed)
Insert cell
Insert cell
z.head(5, z.pickCols(['Country Name', '2017'], migrationSorted.slice(1)))
Insert cell
Insert cell
Insert cell
z.filter(r => r['Country Name'] == "Cayman Islands", migrationParsed)
Insert cell
Insert cell
isNaN(z.getCol('1960', migrationParsed)[1])
Insert cell
Insert cell
z.filter(r => Number.isNaN(r['1962']) == false, migrationParsed)
//get those rows with no NaN for year 1962
Insert cell
Insert cell
z.getCol('1960', migrationParsed)
Insert cell
Insert cell
z.dropCol(["1960"], migrationParsed)
Insert cell
Insert cell
Insert cell
migrationMetaUrl = "https://gist.githubusercontent.com/cesandoval/b834ac93c07e03ec5205843b97f68017/raw/90951b60444376eebfcbbee9beca00c083f489f6/Metadata_Country_API_SM.POP.NETM_DS2_en_csv_v2_10473747.csv"
Insert cell
migrationMeta = d3.csv(migrationMetaUrl)
//d3.csv - get a comma-separated values (CSV) file.
Insert cell
Insert cell
migrationMeta.length
Insert cell
Insert cell
migrationMetaColumns = Object.keys(migrationMeta[0])
Insert cell
z.head(5, migrationMeta)
Insert cell
Insert cell
md`
> \`\`\`javascript
(static) merge(dfLeft, dfRight, leftOn, rightOn, leftSuffix, rightSuffix) → {df}
\`\`\`
`
//the join is on the left, thus right/ left order matters
Insert cell
Insert cell
migration_join = z.merge(migrationParsed, migrationMeta, "Country Code", "Country Code", "_df1", "_df2")
// migration_join = migrationParsed.map(x => Object.assign(x, migrationMeta.find(y => y['Country Code'] == x['Country Code'])));
Insert cell
Insert cell
Insert cell
migration_2017 = z.getCol('2017', migration_join)
Insert cell
z.print([
{
minMigration: z.min(migration_2017).toFixed(5),
maxMigration: z.max(migration_2017).toFixed(5),
meanMigration: z.mean(migration_2017).toFixed(5),
medianMigration: z.median(migration_2017).toFixed(5),
stdMigration: z.std(migration_2017).toFixed(5)
}
])
Insert cell
Insert cell
migration_2017_Piped = z.pipe(
[
z.parseNums(['1992','1997','2002','2007', '2012', '2017']),
z.getCol('2017'),
z.mean()
]
)(migration_join)
//chaining law. apply the functions one by one, with no need to passing a new df name every operation you do.
Insert cell
Insert cell
migrationByRegion = z.groupBy(x=>x.Region_df1, migration_join)
Insert cell
Insert cell
migrationCountRegion = z.gbCount("Region", migrationByRegion)
Insert cell
Insert cell
Insert cell
Insert cell
d3 = require("d3")
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more