Published
Edited
Feb 22, 2021
Insert cell
md`# Manipulating Covid-19 data`
Insert cell
Insert cell
Insert cell
d3 = require("d3@6")
Insert cell
md`
## Step 0: Import data
`
Insert cell
dataRaw = d3.json("https://covid.ourworldindata.org/data/owid-covid-data.json")
Insert cell
Insert cell
countryCode = Object.keys(dataRaw)
Insert cell
function getData(a) {
let data = [];
for (let i = 0; i < countryCode.length; i++) {
let dataWithoutCode = dataRaw[countryCode[i]];
data.push(dataWithoutCode);
};
return data;
}
// If you don't understand dataRaw[countryCode[i]]: for example, dataRaw['ABW'] will retrieve the object named 'ABW'.
Insert cell
DataWithoutCountryCode = getData(dataRaw)
Insert cell
DataWithoutCountryCode[2]
Insert cell
Insert cell
function AddCode2Object (a) {
for (let i = 0; i < countryCode.length; i++) {
a[i]['country_code'] = countryCode[i];
}
return a;
}
Insert cell
codeAddedforEachObject = AddCode2Object(DataWithoutCountryCode)
Insert cell
Insert cell
[codeAddedforEachObject[1].data[1].date, codeAddedforEachObject[1].data[1].total_cases]
Insert cell
function Add_confirmed_and_deaths (a) {
a['confirmed'] = [];
a['deaths'] = [];
for (let i = 0; i < a.data.length; i++) {
a['confirmed'].push([a.data[i].date, a.data[i].total_cases]);
a['deaths'].push([a.data[i].date, a.data[i].total_deaths]);
}
return a;
}
Insert cell
DataBeforeMapping = {
for (let i = 0; i < countryCode.length; i++) {
Add_confirmed_and_deaths(codeAddedforEachObject[i])
}
return codeAddedforEachObject
}
Insert cell
Insert cell
DataAfterMapping = DataBeforeMapping.map(d => ({
'country_code': d.country_code,
'country_name': d.location,
'population': d.population,
'region': d.continent,
'confirmed': d.confirmed,
'deaths': d.deaths,
}))
Insert cell
m = DataAfterMapping[1]['confirmed'][66][1]
Insert cell
function clearChange (a) {
if (a == undefined)
{a = 99};
return a;
};
Insert cell
md` ## Step 4: Deal with missing data in \`confirmed\` and \`deaths\`.

In the original dataset, there are quite a few missing data in \`confirmed\` and \`deaths\`. How I solved this issue:

1. For a day (a), if data for confirmed or deaths is missing, I filled the blank with data from the previous day (a - 1). If data for the previous date is also missing, I filled the blank in **Day a - 1** with data for **Day a - 2**, ...
2. If the first day's data is missing, I filled it with \`0\`.
`
Insert cell
function fillUndefined (a) {
if (a[0][1] == undefined) {a[0][1] = 0};
for (let t = 1; t < a.length; t++) {
if (a[t][1] == undefined) {a[t][1] = a[t-1][1]};
}
return a;
}
Insert cell
blankFilled = {
for (let i = 0; i < countryCode.length; i++) {
fillUndefined(DataAfterMapping[i]['confirmed']);
fillUndefined(DataAfterMapping[i]['deaths'])
}
return DataAfterMapping
}
Insert cell
md`
## Step 5: Normalize each country/area by using the same date range.

In the raw data, data for some countries/areas started from Dec. 31, 2019, whereas that for other places did not. In this step, I'll normalize each country/area by letting them have the same date range, i.e., from Dec. 31, 2019 to the current day.
`
Insert cell
Insert cell
bottomDay = parseTime(blankFilled[0]['confirmed'][(blankFilled[0]['confirmed'].length)-1][0])
Insert cell
bottomDayPlusOne = bottomDay.setDate(bottomDay.getDate() + 1)
Insert cell
formatTime = d3.timeFormat("%Y-%m-%d")
Insert cell
formatTime(bottomDay) == formatTime(new Date(Date.now()))
Insert cell
parseTime = d3.timeParse("%Y-%m-%d")
Insert cell
parseTime("2015-1-01")
Insert cell
Insert cell
dateRange = d3.timeDay.range(new Date(2019, 11, 31), bottomDayPlusOne)
Insert cell
formatTime(dateRange[dateRange.length - 1])
Insert cell
blankFilled[0]['confirmed'][(blankFilled[0]['confirmed'].length)-1][0]
Insert cell
Insert cell
eachCodeLastDate_confirmed = {
let falsebank = [];
for (let i = 0; i < blankFilled.length; i++) {
if (blankFilled[i]['confirmed'][(blankFilled[i]['confirmed'].length)-1][0] != blankFilled[0]['confirmed'][(blankFilled[0]['confirmed'].length)-1][0]) {
let falsecountry = blankFilled[i]['country_code'];
let falsecountry_lastdate = blankFilled[i]['confirmed'][(blankFilled[i]['confirmed'].length)-1][0];
falsebank.push([falsecountry, falsecountry_lastdate])
}
}
return falsebank;
}
Insert cell
eachCodeLastDate_deaths = {
let falsebank = [];
for (let i = 0; i < blankFilled.length; i++) {
if (blankFilled[i]['deaths'][(blankFilled[i]['deaths'].length)-1][0] != blankFilled[0]['deaths'][(blankFilled[0]['deaths'].length)-1][0]) {
let falsecountry = blankFilled[i]['country_code'];
let falsecountry_lastdate = blankFilled[i]['deaths'][(blankFilled[i]['deaths'].length)-1][0];
falsebank.push([falsecountry, falsecountry_lastdate])
}
}
return falsebank;
}
Insert cell
function generateTS(a) {
let dates = []
for (let i = 0; i < a.length; i++) {
let date = formatTime(a[i]);
let zero = 0
dates.push([date, zero])
}
return dates;
}
Insert cell
datesAndZeros = generateTS(dateRange)
Insert cell
blankFilled[1]['confirmed'][0][0] == formatTime(dateRange[0])
Insert cell
Insert cell
firstDateAdded = {
for (let i = 0; i < blankFilled.length; i++) {
if (blankFilled[i]['confirmed'][0][0] != formatTime(dateRange[0])){
blankFilled[i]['confirmed'].unshift([formatTime(dateRange[0]), 0])
}
if (blankFilled[i]['deaths'][0][0] != formatTime(dateRange[0])){
blankFilled[i]['deaths'].unshift([formatTime(dateRange[0]), 0])
}
}
return blankFilled
}
Insert cell
benchMark = firstDateAdded[0]['confirmed']
Insert cell
md`### 5.4 Normalize each country/area by using the same data range:
`
Insert cell
function insertIt (a) {
for (let i = 1; i < a.length; i++){
if (a[i][0] != formatTime(dateRange[i])) {
a.splice(i, 0, [formatTime(dateRange[i]), a[i-1][1]])
}
}
return a;
}
Insert cell
confirmedAndDeathInserted = {
for (let i = 0; i < firstDateAdded.length; i++) {
insertIt(firstDateAdded[i]['confirmed']);
insertIt(firstDateAdded[i]['deaths'])
}
return firstDateAdded;
}
Insert cell
md`
## Step 6: Sort by the alphabetical order of country codes
`
Insert cell
confirmedAndDeathInserted_sorted = confirmedAndDeathInserted.sort((a, b) => a.country_code.localeCompare(b.country_code))
// From https://stackoverflow.com/a/35092754
Insert cell
md`## Step 7: Finding country codes that are different from those in Professor YY's raw data
`
Insert cell
function onlyUnique(value, index, self) {
return self.indexOf(value) === index;
}
// From https://stackoverflow.com/questions/1960473/get-all-unique-values-in-a-javascript-array-remove-duplicates?page=1&tab=votes#tab-top
Insert cell
originalData = d3.json("https://raw.githubusercontent.com/covid19-data/covid19-data/master/output/cntry_stat_owid.json")
Insert cell
originalCountryCodeList = {
let original_list = []
originalData.forEach((element) => {
original_list.push(element.country_code)
})
return original_list;
}
Insert cell
currentCountryCodeList = {
let current_list = []
confirmedAndDeathInserted_sorted.forEach(element =>
current_list.push(element.country_code)
)
return current_list
}
Insert cell
Insert cell
unique_to_original = originalCountryCodeList.filter(x => !currentCountryCodeList.includes(x))
//https://stackoverflow.com/questions/1187518/how-to-get-the-difference-between-two-arrays-in-javascript
Insert cell
unique_to_current = currentCountryCodeList.filter(x => !originalCountryCodeList.includes(x))
Insert cell
dataBeforeEditingCode = confirmedAndDeathInserted_sorted
Insert cell
md`### 7.1. I changed \`OWID_KOS\` to \`XKX\`, and \`OWID_WRL\` to \`WLD\` to be consistant with Professor YY's codes.
`
Insert cell
dataAfterEditingCode = {
dataBeforeEditingCode.find(element => element.country_code == 'OWID_KOS').country_code = 'XKX';
dataBeforeEditingCode.find(element => element.country_code == 'OWID_WRL').country_code = 'WLD';
return dataBeforeEditingCode;
}
Insert cell
md`
## Step 8: Add the \`region\` object for each country/area with data from the World Bank's country metadata.
`
Insert cell
countryMetadata = FileAttachment("metadata_country.csv").csv()
Insert cell
countryMetadata[0]["Country Code"]
Insert cell
WBCountryList = {
let WB_List = []
countryMetadata.forEach((element) => {
WB_List.push(element["Country Code"])
})
return WB_List;
}
Insert cell
updatedCurrCountryList = {
let updated_country_list = [];
dataAfterEditingCode.forEach((element) => {
updated_country_list.push(element.country_code)
})
return updated_country_list;
}
Insert cell
md`
### 8.1. Find out whether there are country codes in the current data that are not found in the World Bank data.
`
Insert cell
unique_to_updated = updatedCurrCountryList.filter(x => !WBCountryList.includes(x))
// The list of countries that were not found in the WB country metadata list.
Insert cell
md`
### 8.2 Fill \`region\` with data from the World Bank.
`
Insert cell
datawithWBCode = {
for (let i = 0; i < dataAfterEditingCode.length; i ++) {
countryMetadata.forEach((element) => {
if (dataAfterEditingCode[i].country_code == element["Country Code"]) {
dataAfterEditingCode[i].WB_Region = element.Region}
})
}
return dataAfterEditingCode;
}
Insert cell
dataWBCodeAfterMapping = datawithWBCode.map(d => ({
'country_code': d.country_code,
'country_name': d.country_name,
'population': d.population,
'continent': d.region,
'region': d.WB_Region,
'confirmed': d.confirmed,
'deaths': d.deaths
}))
Insert cell
Insert cell
dataWBCodeAfterMapping.filter(element => element.region == undefined)
Insert cell
dataNineNewRegion = dataWBCodeAfterMapping
Insert cell
dataNineNewRegion2 = {
//dataNineNewRegion.find(element => element.country_code == 'AIA').region = 'Latin America & Caribbean';
//dataNineNewRegion.find(element => element.country_code == 'BES').region = 'Latin America & Caribbean';
// dataNineNewRegion.find(element => element.country_code == 'ESH').region = 'Middle East & North Africa';
//dataNineNewRegion.find(element => element.country_code == 'FLK').region = 'Latin America & Caribbean';
//dataNineNewRegion.find(element => element.country_code == 'GGY').region = 'Europe & Central Asia';
//dataNineNewRegion.find(element => element.country_code == 'JEY').region = 'Europe & Central Asia';
//dataNineNewRegion.find(element => element.country_code == 'MSR').region = 'Latin America & Caribbean';
dataNineNewRegion.find(element => element.country_code == 'TWN').region = 'East Asia & Pacific';
dataNineNewRegion.find(element => element.country_code == 'VAT').region = 'Europe & Central Asia';
//dataNineNewRegion.find(element => element.country_code == 'WLF').region = 'East Asia & Pacific';
dataNineNewRegion.find(element => element.country_code == 'WLD').region = 'World';
return dataNineNewRegion;
}
// From https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more