Published
Edited
Jan 8, 2021
4 forks
139 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// Require the d3 library
d3 = require("d3")
Insert cell
Insert cell
// Load the Seattle building permit data using `d3.csv()`
data = d3.csv("https://data.seattle.gov/api/views/76t5-zqzr/rows.csv")
Insert cell
Insert cell
// Display a table showing a subset of the data
render_data_table(data)
Insert cell
Insert cell
Insert cell
// Inspect the data
data
Insert cell
Insert cell
Insert cell
// Get the first (zero-ith) element
data[0]
Insert cell
Insert cell
// Access the PermitClass key of the first element
data[0].PermitNum
Insert cell
Insert cell
Insert cell
Insert cell
// How many observations (building permits) are in the dataset?
data.length
Insert cell
Insert cell
// Get a list of features in our dataset from the first observation
Object.keys(data[0])
Insert cell
Insert cell
Insert cell
// Require the lodash package
_ = require('lodash')
Insert cell
Insert cell
// Get a list of the unique values of the `PermitClass` feature
permit_classes = _.uniqBy(data, d => d.PermitClass)
Insert cell
Insert cell
// Get a list of the unique values of the `PermitClass` feature (only returning the string values of interest)
permit_class_list = _.uniqBy(data, d => d.PermitClass).map(d => d.PermitClass)
Insert cell
Insert cell
// What is the total number of housing units added across the dataset?
data.reduce((sum, d) => (sum += +d.HousingUnitsAdded), 0)
Insert cell
Insert cell
// Get the sum of three values (an example of d3.sum _without_ an accessor function)
d3.sum([1, 2, 3])
Insert cell
// Get the sum of the total number of housing units added across the dataset
d3.sum(data, d => +d.HousingUnitsAdded) // seems like a lot!
Insert cell
Insert cell
// What is the average estimated project cost?
d3.mean(data, d => +d.EstProjectCost)
Insert cell
// What is the standard deviation across project cost?
d3.deviation(data, d => +d.EstProjectCost)
Insert cell
// What is the most expensive project?
d3.max(data, d => +d.EstProjectCost)
Insert cell
Insert cell
// First Permit Issue date
data[1000].IssuedDate
Insert cell
Insert cell
// Create a time parsing function based on the structure of the date-strings in our data
parser = d3.timeParse("%Y-%m-%d")
Insert cell
parser(data[1000].IssuedDate).getFullYear()
Insert cell
Insert cell
// Parse the date of the first permit issued
parser(data[0].IssuedDate)
Insert cell
Insert cell
// Get the extent (min and max) of the dates in the dataset
date_range = d3.extent(data, d => parser(d.IssuedDate))
Insert cell
Insert cell
// Define a formatting function for *displaying* date objects
formatter = d3.timeFormat("%B %d, %Y")
Insert cell
Insert cell
// Write out a sentence describing the date range
md`The permit data ranges from ${formatter(date_range[0])} to ${formatter(
date_range[1]
)}`
Insert cell
Insert cell
Insert cell
// How many projects have been completed? Only keep the objects where the StatusCurrent is "Completed"
num_completed_projects = data.filter(d => d.StatusCurrent === "Completed")
.length
Insert cell
// How many permits have been issued in 2020? Only keep the objects where year is 2020
// Note, we need to parse the date of each object, then get the *year* to perform the filter
num_permits_2020 = data.filter(d => {
// Note, some of the IssuedDate values are empty strings, so we need to check that first
// (here, we only keep the object -- return true -- if the date isn't "" and has a year of 2020)
return d.IssuedDate !== "" && parser(d.IssuedDate).getFullYear() === 2020;
}).length
Insert cell
// What is the description of the most expensive project?
// Writing this out in a few different lines for clarity
most_expensive_description = {
// Determine the *value* of most expensive project in the dataset
const highest_cost = d3.max(data, d => +d.EstProjectCost);

// Filter down the dataset to the observations where the estimated cost is equal to the max
// This will return an array with 1+ objects (more than one if multiple projects have the same cost)
const most_expensive = data.filter(d => +d.EstProjectCost === highest_cost);

// Extract the description from the first element in the array
const description = most_expensive[0].Description;
return description;
}
Insert cell
Insert cell
// Calculate permits per year using plain old JavaScript
permits_per_year = {
let per_year = []; // this is the variable that we'll return

// Iterate through the permits, adding new objects to the `per_year` array
data.forEach(permit => {
// Skip rows where the IssuedDate is not present
if (permit.IssuedDate === "") return;

// Store the year of this permit in a variable (for readability)
const year = parser(permit.IssuedDate).getFullYear();

// See if this year is present in the dataset
let this_year = per_year.find(d => d.year === year);

// If this year hasn't been added, add it -- otherwise, increment the number of permits by 1
if (this_year === undefined) {
per_year.push({ year: year, permits: 1 });
} else {
this_year.permits += 1;
}
});
return per_year;
}
Insert cell
Insert cell
// Calculate permits per year using lodash -- wow, so much easier!
permits_per_year_lodash = _.countBy(data, d => {
// Ok, it's a little tricky because we still have to deal with the missing date
if (d.IssuedDate === "") return "No date present";
return parser(d.IssuedDate).getFullYear();
})
Insert cell
Insert cell
// Use the Object.keys() method to convert our object into an array of objets
permits_per_year_lodash_array = Object.keys(permits_per_year_lodash).map(d => {
return { key: d, value: permits_per_year_lodash[d] };
})
Insert cell
Insert cell
// Count the number of permits by PermitClass
by_class = d3.rollup(
data, // our array of objects
v => v.length, // the reducer_fuction that calculate a summary statistic for each group
d => d.PermitClass // the key_function that gruops the data by this key
)
Insert cell
Insert cell
// Retrive the keys using a spread operator
[...by_class.keys()]
Insert cell
// Retrive the values using a spread operator
[...by_class.values()]
Insert cell
// Get a value of interest
by_class.get("Multifamily")
Insert cell
Insert cell
by_class_array = d3.rollups(data, v => v.length, d => d.PermitClass)
Insert cell
Insert cell
// Calculate summary statistics by permit class
summary_by_class = d3.rollup(
data,
v => ({
count: v.length, // number of elements with this PermitClass
mean_cost: d3.mean(v, d => +d.EstProjectCost), // get the average estimated project cost!
max_cost: d3.max(v, d => +d.EstProjectCost) // get the max estimated project cost!
}),
d => d.PermitClass
)
Insert cell
// Calculate summary statistics by permit class (using .rollups() to return an array instead of a map)
summary_by_class_array = d3.rollups(
data,
v => ({
count: v.length,
mean_cost: d3.mean(v, d => +d.EstProjectCost), // get the average estimated project cost!
max_cost: d3.max(v, d => +d.EstProjectCost) // get the max estimated project cost!
}),
d => d.PermitClass
)
Insert cell
Insert cell
summary_by_class.get("Commercial").mean_cost
Insert cell
Insert cell
// Calculate summary statistics by permit class and year
summary_by_class_and_year = d3.rollup(
data,
v => ({
count: v.length,
mean_cost: d3.mean(v, d => +d.EstProjectCost), // get the average estimated project cost!
max_cost: d3.max(v, d => +d.EstProjectCost) // get the max estimated project cost!
}),
d => d.PermitClass,
d => (d.IssuedDate ? parser(d.IssuedDate).getFullYear() : "")
)
Insert cell
Insert cell
summary_by_class_and_year.get("Industrial").get(2015).count
Insert cell
Insert cell
// Reformatting into key / value pairs for vegalite
vl_data = summary_by_class_array.map(d => {
let obj = {};
const [key, value] = d;
obj = { key: key, value: value.count };
return obj;
})
Insert cell
// Load the vega-lite package
import { vl } from "@vega/vega-lite-api"
Insert cell
// Create a bar chart of the number of permits of each class
simpleBar = vl
.markBar() // Make a bar chart
.data(vl_data) // Using the summary data by Permit Class
.encode(
vl
.x()
.fieldQ("value") // Encode the number of permits on the x axis
.axis({ title: "Number of Permits" }), // Set the axis title
vl
.y()
.fieldO("key") // Encode our Key on the y axis (Permit Class),
.sort(null) // Show the order that they appear in the data (for demonstration)
.axis({ title: "Permit Class" }) // Set the axis title
)
.render() // display the chart
Insert cell
Insert cell
Insert cell
// Clone our summary_by_class data so we can sort it (for demonstration only)
data_to_sort = _.clone(vl_data)
Insert cell
Insert cell
// Here is our sorting function to sort by the number of permits (recall data structure above)
sort_by_num_permits = (a, b) => +b.value - +a.value
Insert cell
// Sort the data using the function described above (which could have been written in line)
data_to_sort.sort(sort_by_num_permits)
Insert cell
Insert cell
// Make a bar chart of the number of permits -- same code as above, but using the sorted_data
sorted_bar = vl
.markBar() // Make a bar chart
.data(data_to_sort) // Using the *sorted* data
.encode(
vl
.x()
.fieldQ("value") // Encode the number of permits on the x axis
.axis({ title: "Number of Permits" }), // Set the axis title
vl
.y()
.fieldO("key") // Encode our Key on the y axis,
.sort(null) // Show the order that they appear in the data (for demonstration)
.axis({ title: "Permit Class" }) // Set the axis title
)
.render()
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more