Published
Edited
Dec 22, 2019
Insert cell
md`# Park Data Wrangling`
Insert cell
d3 = require('d3@5')
Insert cell
d3array = require("d3-array@^2.2")
Insert cell
VegaLite = require('vega-embed@5')
Insert cell
gasPrice = d3.csv("https://www.sfu.ca/~ridgez/ParkData/gas_price.txt", d => {
return {
year: d.year,
price: d.gas_constant,
}
})
Insert cell
income = d3.csv("https://www.sfu.ca/~ridgez/ParkData/MEHOINUSA672N.txt", d => {
return {
year: d.DATE.split("/")[2],
income: +d["Income (2018 Adjusted Dollars)"]
}
})
Insert cell
parkRaw = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/All%20National%20Parks%20Visitation%201904-2016.txt")
Insert cell
park = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/All%20National%20Parks%20Visitation%201904-2016.txt", d => {
if (d["YearRaw"].includes("Total")) return undefined;

return {
year:d.YearRaw,
state: d.State,
region:d.Region,
type: d.Unit_Type,
parkname:d.Unit_Name,
visitors: d.Visitors,
count: d.Number_of_Records,
}
})
Insert cell
centroidRaw = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt")
Insert cell
centroid = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt", d => {

return {
id:d.OBJECTID,
x: d.X,
y: d.Y,
parkname: d.PARKNAME,
type:d.UNIT_TYPE,
state: d.STATE,
region: d.REGION,
UUID: d.GlobalID,
}
})
Insert cell
tourismIn = d3.csv("https://www.sfu.ca/~ridgez/ParkData/TOURISM_INBOUND_09112019011654690.txt", d => {
if (d["VARIABLE"].includes("INB_")) return undefined;
return {
year: d.YEAR,
country: d.Variable,
population: +d.Value,
}
})
Insert cell
tourismAccommodation = d3.csv("https://www.sfu.ca/~ridgez/ParkData/TOURISM_INBOUND_09112019011654690.txt", d => {
if (!d["VARIABLE"].includes("INB_")) return undefined;
return {
year: d.YEAR,
accommodation: d.Variable,
population: d.Value,
}
})
Insert cell
population = d3.csv("https://www.sfu.ca/~ridgez/ParkData/state_pop.txt", d => {
return {
year: d.year,
state: d.state,
pop: d.pop === "NA" ? 0 : d.pop,
}
})
Insert cell
centroidGeoJson2 = d3.json("https://www.sfu.ca/~ridgez/IAT355FinalData/centroid.geojson")
Insert cell
filter = function(oldType) {
// filter function to combine type of park into large catagories where we can deal with easier
let type = null;
if (oldType.includes("Battlefield") ||
oldType.includes("Memorial") ||
oldType.includes("Military")) {
type = "Memorial";
} else if (oldType.includes("River") ||
oldType.includes("Seashore") ||
oldType.includes("Lakeshore")) {
type = "Waters";
} else if (oldType.includes("Historic") ||
oldType.includes("Monument")) {
type = "Historic";
} else {
type = "Park";
}
return type;
}
Insert cell
centroidGeoJson = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt");
Insert cell
centroidGeoJsonFiltered = {

// filter points so we only have the points that we have in the all data union data set, since this may have more parks than that.
let filtered = centroidGeoJson.filter(d => allDataUnion.filter(v => v.parkName === d.UNIT_NAME).length > 0);

// create json object manually
let geoJson = {
type: "FeatureCollection",

// create each feature with nicely named and new properties
features: filtered.map(d => {
let currentParkData = allDataUnion.filter(v => v.parkName === d.UNIT_NAME);

return {
type: "Feature",
geometry: {
type: "Point",
coordinates: [d.X, d.Y]
},
properties: {
name: d.UNIT_NAME,
state: d.STATE,
id: d.GlobalID,
yearMin: d3.min(currentParkData.map(d => d.year)),
yearMax: d3.max(currentParkData.map(d => d.year)),
type: currentParkData[0].parkType,
}
}
})
}
return geoJson;
}
Insert cell
// was going to use this to speed up processing, but did not have enough time to implement fully
hashMapByParkName = d3array.group(allDataUnion, d => d.parkName);
Insert cell
allDataUnion = {

// start off with an empty array
let array = [];

// color mapping for failed attempt at scale = null for color channel
let colours = {
Historic: "#4B79A8",
Memorial: "#F48520",
Park: "#E35656",
Waters: "#72B7B2",
}

// only go through these dates
for (let year = 1900; year < 2020; year++) {

// fetch from other data sets for information
let gasPriceThisYear = gasPrice.filter(d => d.year == year);
if (gasPriceThisYear.length > 0) gasPriceThisYear = gasPriceThisYear[0].price;
else gasPriceThisYear = 0;

let incomeThisYear = income.filter(d => d.year == year);
if (incomeThisYear.length > 0) incomeThisYear = incomeThisYear[0].income;
else incomeThisYear = 0;

let populationThisYear = population.filter(d => d.year == year);
if (populationThisYear.length > 0) populationThisYear = d3.sum(populationThisYear.map(d => d.pop));
else populationThisYear = 0;

let tourismThisYear = tourismIn.filter(d => d.year == year);
if (tourismThisYear.length > 0) tourismThisYear = d3.sum(tourismThisYear.map(d => d.population));
else tourismThisYear = 0;

let numberOfParksThisYear = park.filter(d => d.year == year);
if (numberOfParksThisYear.length > 0) numberOfParksThisYear = d3.sum(numberOfParksThisYear.map(d => d.count));
else numberOfParksThisYear = 0;

// parks for that year
let parkVisits = park.filter(d => d.year == year);
// if (parkVisits.length > 0) parkVisits = d3.sum(parkVisits.map(d => d.visitors))
// else parkVisits = 0;

// list for per year aggregation and calculation
let tempList = [];

for (let c = 0; c < parkVisits.length; c++) {

// custom type filter. if it does not match our filter, skip to next part
let type = filter(parkVisits[c].type);
if (type === null) {
continue;
}

// set color to color of type
let color = colours[type];

// set up data for entry
let d = {
year: year,
parkVisits: parseFloat(parkVisits[c].visitors),
parkState: parkVisits[c].state,
parkType: type,
parkName: parkVisits[c].parkname,
gasPrice: gasPriceThisYear,
income: incomeThisYear,
population: populationThisYear,
tourist: tourismThisYear,
color: color,
numberOfParks: numberOfParksThisYear
}

// add to list
tempList.push(d);
}

// count how many parks are there per type
let typeCount = d3.nest()
.key(d => d.parkType)
.rollup(v => v.length)
.entries(tempList);

// total part visits per type
let typeVisitorsCount = d3.nest()
.key(d => d.parkType)
.rollup(v => {
return v.reduce((acc, d) => acc += d.parkVisits, 0)
})
.entries(tempList);


for (let c = 0; c < tempList.length; c++) {

// get the park
let parkTemp = tempList[c];

// get how many parks were there for the same part type
let typeCountTemp = typeCount.filter(d => d.key == parkTemp.parkType)[0].value;

// get how may park visits for that park type in total
let typeVisitorsCountTemp = typeVisitorsCount.filter(d => d.key == parkTemp.parkType)[0].value;

// type's percentage
parkTemp["percentPerTypePerYear"] = typeCountTemp / tempList.length * 100;

// ratio between how many parks there are and its total visits number.
parkTemp["perTypePerYearRatioWithVisitor"] = typeVisitorsCountTemp / tempList.filter(
d => d.parkType == parkTemp.parkType
).length;

}

// add temp list into final array
array = array.concat(tempList);

}
array.filter(d => d.parkVisits > 0);
return array;
}
Insert cell
data = {
let c = {
unionByYear: allDataUnion,
unionCentroid: centroidGeoJsonFiltered,
unionInbound: tourismIn,
hashMapByParkName: hashMapByParkName,
};
return c;
}
Insert cell
bob = [1,2,3,4]
Insert cell
md`sources`
Insert cell
md`
visitor amount
https://data.world/inform8n/us-national-parks-visitation-1904-2016-with-boundaries

centroids
https://public-nps.opendata.arcgis.com/datasets/national-park-service-park-unit-centroids/data?geometry=-140.367%2C39.851%2C-84.864%2C50.632

GAS prices
https://www.energy.gov/eere/vehicles/fact-915-march-7-2016-average-historical-annual-gasoline-pump-price-1929-2015

USA state population
https://fred.stlouisfed.org/release/tables?rid=118&eid=259194&od=1900-01-01#

inbound tourism
https://stats.oecd.org/Index.aspx?DataSetCode=TOURISM_INBOUND

income
https://fred.stlouisfed.org/series/MEHOINUSA672N
`
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more