Published
Edited
Sep 15, 2022
4 stars
Insert cell
Insert cell
Insert cell
Insert cell
mtaData = d3.csv('https://data.ny.gov/api/views/vxuj-8kew/rows.csv?accessType=DOWNLOAD&sorting=true', d3.autoType)
Insert cell
Insert cell
Insert cell
import {DuckDBClient} from "@cmudig/duckdb"
Insert cell
db = DuckDBClient.of({mtaData: mtaData})
Insert cell
db
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
db
SELECT "Date"
, "Subways: Total Estimated Ridership"
FROM mtaData
Insert cell
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(mtaSQL, {x: "Date", y: "Subways: Total Estimated Ridership"})
]
})
Insert cell
Insert cell
mtaDb = DuckDBClient.of({mtaData: mtaData.map(d => ({...d, Date: parseDate(d.Date) }))})
Insert cell
mtaDb
SELECT "Date"
, "Subways: Total Estimated Ridership"
FROM mtaData
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(mtaSQLData, {x: 'Date', y: "Subways: Total Estimated Ridership"})
],
marginLeft: 60
})
Insert cell
Insert cell
function movingAverage(values, N) {
let i = 0;
let sum = 0;
const means = new Float64Array(values.length).fill(NaN);
for (let n = Math.min(N - 1, values.length); i < n; ++i) {
sum += values[i];
}
for (let n = values.length; i < n; ++i) {
sum += values[i];
means[i] = sum / N;
sum -= values[i - N + 1];
}
return means;
}
Insert cell
Insert cell
subwayRidershipValues = mtaData.sort((a,b) => parseDate(a.Date) - parseDate(b.Date)).map(d => d["Subways: Total Estimated Ridership"])
Insert cell
Insert cell
viewof movingAvgN = Inputs.range([1, 100], {label: "N for Moving Average", step: 1, value: 30})
Insert cell
movingAverages = movingAverage(subwayRidershipValues, movingAvgN)
Insert cell
Insert cell
enhancedMTA = mtaSQLData.map((d,i) => {
return {
...d,
movingAvg: movingAverages[i]
}
})
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(enhancedMTA, {x: "Date", y: "movingAvg"})
],
marginLeft: 100
})
Insert cell
Insert cell
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(mtaSQLData, Plot.windowY({
x:"Date",
y: "Subways: Total Estimated Ridership",
k: movingAvgN,
reduce: "mean"
}))
],
marginLeft: 100
})
Insert cell
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(mtaSQLData, { // Original
x: "Date",
y: "Subways: Total Estimated Ridership",
stroke: '#ccc',
strokeWidth: .75
}),
Plot.lineY(mtaSQLData, Plot.windowY({
x: "Date",
y: "Subways: Total Estimated Ridership",
k: movingAvgN,
reduce: "mean",
stroke: 'steelblue',
anchor: 'middle',
mixBlendMode: 'multiply'
})),
Plot.lineY(enhancedMTA, {
x: "Date",
y: "movingAvg",
stroke: "coral",
mixBlendMode: 'multiply'
})
],
marginLeft: 100
})
Insert cell
Insert cell
Insert cell
Insert cell
Type HTML, then Shift-Enter. Arrow ↑/↓ to switch modes.

Insert cell
Insert cell
function movingIQR(values, N) {
const means = new Array(values.length).fill(NaN);
let i = 0;
for (let n = values.length; i < n; ++i) {
// Filter values for just those that compute the moving average
// This includes filter those below the N threshold, which happens at the beginning and end of the dataset.
if(i >= N - 1) {
const meanObject = {};
// Then we run the mean of values that fall within the range
meanObject.median = d3.median(values.filter((e, f) => f > (i - N) && f <= i))
meanObject.min = d3.min(values.filter((e, f) => f > (i - N) && f <= i))
meanObject.max = d3.max(values.filter((e, f) => f > (i - N) && f <= i))
meanObject.first = d3.quantile(values.filter((e, f) => f > (i - N) && f <= i), .25)
meanObject.third = d3.quantile(values.filter((e, f) => f > (i - N) && f <= i), .75)

means[i] = meanObject
}
}
return means;
}
Insert cell
movingIQRs = movingIQR(mtaSQLData.sort((a,b) => a.Date - b.Date).map(d => d["Subways: Total Estimated Ridership"]), 20)
Insert cell
superEnhancedMTA = mtaSQLData.sort((a,b) => a.Date - b.Date).map((d,i) => {
return {
...d,
movingIQR: movingIQRs[i]
}
})
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.areaY(superEnhancedMTA, {
x: "Date",
y1: d => d.movingIQR?.min,
y2: d => d.movingIQR?.max,
fill: '#ccc',
fillOpacity: .25
}),
Plot.areaY(superEnhancedMTA, {
x: "Date",
y1: d => d.movingIQR?.first,
y2: d => d.movingIQR?.third,
curve: "basis",
fill: "#4B74D5",
fillOpacity: .25
}),
Plot.lineY(superEnhancedMTA, {x: "Date", y: d => d.movingIQR?.median}),
],
marginLeft: 100
})
Insert cell
Insert cell
simpleSeasonalData = [
{date: '01/01/2020', dayOfWeek: 'Monday', value: 1},
{date: '01/02/2020', dayOfWeek: 'Tuesday', value: 1},
{date: '01/03/2020', dayOfWeek: 'Wednesday', value: 1},
{date: '01/04/2020', dayOfWeek: 'Thursday', value: 1},
{date: '01/05/2020', dayOfWeek: 'Friday', value: 1},
{date: '01/06/2020', dayOfWeek: 'Saturday', value: 10},
{date: '01/07/2020', dayOfWeek: 'Sunday', value: 10},
{date: '01/08/2020', dayOfWeek: 'Monday', value: 1},
{date: '01/09/2020', dayOfWeek: 'Tuesday', value: 1},
{date: '01/10/2020', dayOfWeek: 'Wednesday', value: 1},
{date: '01/11/2020', dayOfWeek: 'Thursday', value: 1},
{date: '01/12/2020', dayOfWeek: 'Friday', value: 1},
{date: '01/13/2020', dayOfWeek: 'Saturday', value: 10},
{date: '01/14/2020', dayOfWeek: 'Sunday', value: 10},
]
Insert cell
Insert cell
simpleDayOfWeekAverages = d3.rollup(simpleSeasonalData, v => d3.mean(v, d => d.value), d => d.dayOfWeek)
Insert cell
Insert cell
simpleAvgOfAvgs = d3.mean(simpleDayOfWeekAverages, d => d[1])
Insert cell
Insert cell
seasonedSimple = simpleSeasonalData.map(d => (
{
...d,
date: parseDate(d.date),
seasonality: d.value / (simpleDayOfWeekAverages.get(d.dayOfWeek) / simpleAvgOfAvgs)
}))
Insert cell
Inputs.table(seasonedSimple)
Insert cell
Plot.plot({
marks: [
Plot.ruleY([0]),
Plot.lineY(seasonedSimple, {x: "date", y: "seasonality"})
]
})
Insert cell
Insert cell
formatDayOfWeek = d3.timeFormat('%A')
Insert cell
days = Array.from(new Set(mtaSQLData.map(d => d.Date).sort(d3.ascending).map(d => formatDayOfWeek(d))))
Insert cell
days.map(day => mtaSQLData.filter(d => formatDayOfWeek(d.Date) === day))
Insert cell
d3.group(mtaSQLData, d => formatDayOfWeek(d.Date))
Insert cell
dayOfWeekMean = d3.rollup(mtaSQLData, v => d3.mean(v, d => d['Subways: Total Estimated Ridership']), d => formatDayOfWeek(d.Date))
Insert cell
overallAverage = d3.mean(Array.from(dayOfWeekMean), d => d[1])
Insert cell
daySeasonalityIndex = d3.rollup(dayOfWeekMean, v => v[0][1] / overallAverage, d => d[0])
Insert cell
seasonallyAdjustedMTA = mtaSQLData.map(d => {
const day = formatDayOfWeek(d.Date)
const index = daySeasonalityIndex.get(day)
return {
...d,
seasonality: d['Subways: Total Estimated Ridership'] / index
}
})
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(seasonallyAdjustedMTA, {
x: "Date",
y: "Subways: Total Estimated Ridership",
stroke: "#ccc",
strokeWidth: .5
}),
Plot.lineY(seasonallyAdjustedMTA, {
x: "Date",
y: "seasonality"
})
],
marginLeft: 60
})
Insert cell
Insert cell
formatWeeks = d3.timeFormat('%W')
Insert cell
weeks = Array.from(new Set(mtaSQLData.map(d => d.Date).sort(d3.ascending).map(d => formatWeeks(d))))
Insert cell
weekIndexes = {
const returnObject = {}
const weekMeans = [];
weeks.forEach(week => {
const weekValues = mtaSQLData.filter(d => formatWeeks(d.Date) === week);
const weekMean = d3.mean(weekValues.map(d => d['Subways: Total Estimated Ridership']));
weekMeans.push(weekMean)
})

const overallAverage = d3.mean(weekMeans)

weeks.forEach((day, i) => {
returnObject[day] = weekMeans[i] / overallAverage;
})

return returnObject
}
Insert cell
superSeasonallyAdjustedMTA = seasonallyAdjustedMTA.map(d => {
const week = formatWeeks(d.Date)
const index = weekIndexes[week]
return {
...d,
superSeasonality: d['seasonality'] / index
}
})
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(superSeasonallyAdjustedMTA, {
x: "Date",
y: "Subways: Total Estimated Ridership",
stroke: "#ccc",
strokeWidth: .5
}),,
Plot.lineY(superSeasonallyAdjustedMTA, {
x: "Date",
y: "superSeasonality",
stroke: 'steelblue',
mixBlendMode: 'multiply'
}),
Plot.lineY(superSeasonallyAdjustedMTA, {
x: "Date",
y: "seasonality",
stroke: 'coral',
mixBlendMode: 'multiply',
strokeWidth: .5
})
],
marginLeft: 60
})
Insert cell
Insert cell
Insert cell
parseDate = d3.utcParse('%m/%d/%Y')
Insert cell
nycHurricanes = [
{name: 'Tropical Storm Fay', date: '07/10/2020'},
{name: 'Tropical Storm Isaias', date: '08/04/2020'},
{name: 'Hurricane Laura', date: '08/31/2020'},
{name: 'Hurricane Delta', date: '10/12/2020'},
{name: 'Hurricane Zeta', date: '10/28/2020'},
{name: 'Hurricane Eta', date: '11/13/2020'},
{name: 'Groundhog Day noreaster', date: '02/01/2021'},
{name: 'Hurricane Elsa', date: '07/09/2021'},
{name: 'Tropical Storm Fred', date: '08/18/2021'},
{name: 'Hurricane Henri', date: '08/22/2021'},
{name: 'Hurricane Ida', date: '09/01/2021'},
{name: 'Hurricane Larry', date: '09/06/2021'},
{name: 'Tropical Storm Wanda', date: '10/26/2021'},
]
Insert cell
USholidays = [
{name: 'Thanksgiving', date: '11/26/2020'},
{name: 'Thanksgiving', date: '11/25/2021'},
{name: 'Halloween', date: '10/31/2021'},
{name: 'Christmas', date: '12/25/2020'},
{name: 'Christmas Eve', date: '12/24/2020'},
{name: 'Christmas', date: '12/25/2021'},
{name: 'Christmas Eve', date: '12/24/2021'},
{name: 'Labor Day', date: '09/06/2021'},
{name: 'Labor Day', date: '09/07/2020'},
{name: 'New Years Eve', date: '12/31/2020'},
{name: 'New Years Day', date: '01/01/2021'},
{name: 'New Years Eve', date: '12/31/2021'},
{name: 'New Years Day', date: '01/01/2022'},
{name: 'MLK Day', date: '01/18/2021'},
{name: 'MLK Day', date: '02/15/2022'},
{name: 'Presidents Day', date: '02/18/2021'},
{name: 'Presidents Day', date: '02/21/2022'},
{name: 'St. Patricks Day', date: '03/17/2021'},
{name: 'St. Patricks Day', date: '03/17/2022'},
{name: 'Memorial Day', date: '05/25/2020'},
{name: 'Memorial Day', date: '05/31/2021'},
{name: 'Memorial Day', date: '05/30/2022'},
{name: 'July 4th', date: '07/04/2020'},
{name: 'July 4th', date: '07/04/2021'},
{name: 'July 4th', date: '07/04/2022'},
]
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.dotX(nycHurricanes, {
x: d => parseDate(d.date),
y: 0,
r: 5,
stroke: "#E8C655",
title: d => `${d.date} ${d.name}`
}),
Plot.dotX(USholidays, {
x: d => parseDate(d.date),
y: 0,
r: 5,
stroke: "#BC2E2E",
title: d => `${d.date} ${d.name}`
}),
Plot.lineY(seasonallyAdjustedMTA, {x: "Date", y: "metric", stroke: '#ccc', strokeWidth: .75}),
Plot.lineY(seasonallyAdjustedMTA, {x: "Date", y: "seasonality", stroke: '#4B74D5'})
],
marginLeft: 60
})
Insert cell
Insert cell
bisect = d3.bisector(d => d.Date)
Insert cell
Plot.plot({
width,
marks: [
Plot.ruleY([0]),
Plot.lineY(seasonallyAdjustedMTA, {x: "Date", y: "seasonality", stroke: '#4B74D5'}),
Plot.dot(nycHurricanes, {
x: d => parseDate(d.date),
y: d => {
const bisectIndex = bisect.center(
seasonallyAdjustedMTA,
parseDate(d.date)
);
d = seasonallyAdjustedMTA[bisectIndex];
return d['seasonality'];
},
r: 3,
stroke: "#E8C655",
title: d => `${d.date} ${d.name}`
}),
Plot.dot(USholidays, {
x: d => parseDate(d.date),
y: d => {
const bisectIndex = bisect.center(
seasonallyAdjustedMTA,
parseDate(d.date)
);
d = seasonallyAdjustedMTA[bisectIndex];
return d['seasonality'];
},
r: 3,
stroke: "#BC2E2E",
title: d => `${d.date} ${d.name}`
}),
],
marginLeft: 60
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more