Published
Edited
Dec 22, 2019
Insert cell
md`# Park Data Wrangling`
Insert cell
d3 = require('d3@5')
Insert cell
d3array = require("d3-array@^2.2")
Insert cell
VegaLite = require('vega-embed@5')
Insert cell
gasPrice = d3.csv("https://www.sfu.ca/~ridgez/ParkData/gas_price.txt", d => {
return {
year: d.year,
price: d.gas_constant,
}
})
Insert cell
income = d3.csv("https://www.sfu.ca/~ridgez/ParkData/MEHOINUSA672N.txt", d => {
return {
year: d.DATE.split("/")[2],
income: +d["Income (2018 Adjusted Dollars)"]
}
})
Insert cell
parkRaw = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/All%20National%20Parks%20Visitation%201904-2016.txt")
Insert cell
park = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/All%20National%20Parks%20Visitation%201904-2016.txt", d => {
if (d["YearRaw"].includes("Total")) return undefined;

return {
year:d.YearRaw,
state: d.State,
region:d.Region,
type: d.Unit_Type,
parkname:d.Unit_Name,
visitors: d.Visitors,
count: d.Number_of_Records,
}
})
Insert cell
centroidRaw = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt")
Insert cell
centroid = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt", d => {

return {
id:d.OBJECTID,
x: d.X,
y: d.Y,
parkname: d.PARKNAME,
type:d.UNIT_TYPE,
state: d.STATE,
region: d.REGION,
UUID: d.GlobalID,
}
})
Insert cell
tourismIn = d3.csv("https://www.sfu.ca/~ridgez/ParkData/TOURISM_INBOUND_09112019011654690.txt", d => {
if (d["VARIABLE"].includes("INB_")) return undefined;
return {
year: d.YEAR,
country: d.Variable,
population: +d.Value,
}
})
Insert cell
tourismAccommodation = d3.csv("https://www.sfu.ca/~ridgez/ParkData/TOURISM_INBOUND_09112019011654690.txt", d => {
if (!d["VARIABLE"].includes("INB_")) return undefined;
return {
year: d.YEAR,
accommodation: d.Variable,
population: d.Value,
}
})
Insert cell
population = d3.csv("https://www.sfu.ca/~ridgez/ParkData/state_pop.txt", d => {
return {
year: d.year,
state: d.state,
pop: d.pop === "NA" ? 0 : d.pop,
}
})
Insert cell
centroidGeoJson2 = d3.json("https://www.sfu.ca/~ridgez/IAT355FinalData/centroid.geojson")
Insert cell
filter = function(oldType) {
// filter function to combine type of park into large catagories where we can deal with easier
let type = null;
if (oldType.includes("Battlefield") ||
oldType.includes("Memorial") ||
oldType.includes("Military")) {
type = "Memorial";
} else if (oldType.includes("River") ||
oldType.includes("Seashore") ||
oldType.includes("Lakeshore")) {
type = "Waters";
} else if (oldType.includes("Historic") ||
oldType.includes("Monument")) {
type = "Historic";
} else {
type = "Park";
}
return type;
}
Insert cell
centroidGeoJson = d3.csv("https://www.sfu.ca/~ridgez/IAT355FinalData/National_Park_Service__Park_Unit_Centroids.txt");
Insert cell
centroidGeoJsonFiltered = {

// filter points so we only have the points that we have in the all data union data set, since this may have more parks than that.
let filtered = centroidGeoJson.filter(d => allDataUnion.filter(v => v.parkName === d.UNIT_NAME).length > 0);

// create json object manually
let geoJson = {
type: "FeatureCollection",

// create each feature with nicely named and new properties
features: filtered.map(d => {
let currentParkData = allDataUnion.filter(v => v.parkName === d.UNIT_NAME);

return {
type: "Feature",
geometry: {
type: "Point",
coordinates: [d.X, d.Y]
},
properties: {
name: d.UNIT_NAME,
state: d.STATE,
id: d.GlobalID,
yearMin: d3.min(currentParkData.map(d => d.year)),
yearMax: d3.max(currentParkData.map(d => d.year)),
type: currentParkData[0].parkType,
}
}
})
}
return geoJson;
}
Insert cell
// was going to use this to speed up processing, but did not have enough time to implement fully
hashMapByParkName = d3array.group(allDataUnion, d => d.parkName);
Insert cell
allDataUnion = {

// start off with an empty array
let array = [];

// color mapping for failed attempt at scale = null for color channel
let colours = {
Historic: "#4B79A8",
Memorial: "#F48520",
Park: "#E35656",
Waters: "#72B7B2",
}

// only go through these dates
for (let year = 1900; year < 2020; year++) {

// fetch from other data sets for information
let gasPriceThisYear = gasPrice.filter(d => d.year == year);
if (gasPriceThisYear.length > 0) gasPriceThisYear = gasPriceThisYear[0].price;
else gasPriceThisYear = 0;

let incomeThisYear = income.filter(d => d.year == year);
if (incomeThisYear.length > 0) incomeThisYear = incomeThisYear[0].income;
else incomeThisYear = 0;

let populationThisYear = population.filter(d => d.year == year);
if (populationThisYear.length > 0) populationThisYear = d3.sum(populationThisYear.map(d => d.pop));
else populationThisYear = 0;

let tourismThisYear = tourismIn.filter(d => d.year == year);
if (tourismThisYear.length > 0) tourismThisYear = d3.sum(tourismThisYear.map(d => d.population));
else tourismThisYear = 0;

let numberOfParksThisYear = park.filter(d => d.year == year);
if (numberOfParksThisYear.length > 0) numberOfParksThisYear = d3.sum(numberOfParksThisYear.map(d => d.count));
else numberOfParksThisYear = 0;

// parks for that year
let parkVisits = park.filter(d => d.year == year);
// if (parkVisits.length > 0) parkVisits = d3.sum(parkVisits.map(d => d.visitors))
// else parkVisits = 0;

// list for per year aggregation and calculation
let tempList = [];

for (let c = 0; c < parkVisits.length; c++) {

// custom type filter. if it does not match our filter, skip to next part
let type = filter(parkVisits[c].type);
if (type === null) {
continue;
}

// set color to color of type
let color = colours[type];

// set up data for entry
let d = {
year: year,
parkVisits: parseFloat(parkVisits[c].visitors),
parkState: parkVisits[c].state,
parkType: type,
parkName: parkVisits[c].parkname,
gasPrice: gasPriceThisYear,
income: incomeThisYear,
population: populationThisYear,
tourist: tourismThisYear,
color: color,
numberOfParks: numberOfParksThisYear
}

// add to list
tempList.push(d);
}

// count how many parks are there per type
let typeCount = d3.nest()
.key(d => d.parkType)
.rollup(v => v.length)
.entries(tempList);

// total part visits per type
let typeVisitorsCount = d3.nest()
.key(d => d.parkType)
.rollup(v => {
return v.reduce((acc, d) => acc += d.parkVisits, 0)
})
.entries(tempList);


for (let c = 0; c < tempList.length; c++) {

// get the park
let parkTemp = tempList[c];

// get how many parks were there for the same part type
let typeCountTemp = typeCount.filter(d => d.key == parkTemp.parkType)[0].value;

// get how may park visits for that park type in total
let typeVisitorsCountTemp = typeVisitorsCount.filter(d => d.key == parkTemp.parkType)[0].value;

// type's percentage
parkTemp["percentPerTypePerYear"] = typeCountTemp / tempList.length * 100;

// ratio between how many parks there are and its total visits number.
parkTemp["perTypePerYearRatioWithVisitor"] = typeVisitorsCountTemp / tempList.filter(
d => d.parkType == parkTemp.parkType
).length;

}

// add temp list into final array
array = array.concat(tempList);

}
array.filter(d => d.parkVisits > 0);
return array;
}
Insert cell
data = {
let c = {
unionByYear: allDataUnion,
unionCentroid: centroidGeoJsonFiltered,
unionInbound: tourismIn,
hashMapByParkName: hashMapByParkName,
};
return c;
}
Insert cell
bob = [1,2,3,4]
Insert cell
md`sources`
Insert cell
md`
visitor amount
https://data.world/inform8n/us-national-parks-visitation-1904-2016-with-boundaries

centroids
https://public-nps.opendata.arcgis.com/datasets/national-park-service-park-unit-centroids/data?geometry=-140.367%2C39.851%2C-84.864%2C50.632

GAS prices
https://www.energy.gov/eere/vehicles/fact-915-march-7-2016-average-historical-annual-gasoline-pump-price-1929-2015

USA state population
https://fred.stlouisfed.org/release/tables?rid=118&eid=259194&od=1900-01-01#

inbound tourism
https://stats.oecd.org/Index.aspx?DataSetCode=TOURISM_INBOUND

income
https://fred.stlouisfed.org/series/MEHOINUSA672N
`
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more