Published
Edited
Jul 7, 2022
Insert cell
# Data Analysis in Javascript
Insert cell
SimpleData = {
const sda = await require("simple-data-analysis@0.9.18");
return sda.SimpleData;
}
Insert cell
function showTable(sd) {
return Inputs.table(sd.getData());
}
Insert cell
function showChart(htmlOrSvg) {
return html`${htmlOrSvg}`;
}
Insert cell
{
let someData = [
{firstName: "Nael", lastName: "Shiab", job: "Computational journalist"},
{firstName: "Isabelle", lastName: "Bouchard"}
];

let simpleData = new SimpleData({
data: someData,
fillMissingKeys: true
});

return showTable(simpleData);
}

Insert cell
### data from url
Insert cell
// Here, we load a CSV file from our Github repository

simpleData = new SimpleData()
.loadDataFromUrl({
url:
"https://raw.githubusercontent.com/nshiab/simple-data-analysis/main/data/employees.csv"
})

// you can also load data in nodejs using localDataFromLocalFile
Insert cell
Insert cell
showTable(simpleData)
Insert cell
Insert cell
Insert cell
// Department or Unit" becomes departmentOrUnit and End-of_year-Bonus becomes endOfYearBonus

simpleDataCleanKeys = simpleData.clone().formatAllKeys();
Insert cell
showTable(simpleDataCleanKeys);
Insert cell
### renameKey()

you can also rename keys individually.
Insert cell
simpleDataShortKeys = simpleDataCleanKeys
.clone()
.renameKey({oldKey:"departementOrUnit", newKey: "unit"})
.renameKey({oldKey: "endOfYearBonus", newKey: "bonus"});
Insert cell
showTable(simpleDataShortKeys);
Insert cell
### checkValues()

we must check what is in our values
checkValues tells us how many and what percentage (count | percentage ) of the values are of a specific type
Insert cell
valuesChecked = simpleDataShortKeys.clone().checkValues();
Insert cell
showTable(valuesChecked);
Insert cell
Insert cell
// Noticed we have a typo for the vice-president Neena Kocchar. Instead of Number, there is string "&6%" for its salary. Let's add it to the missingValues options, so the VP is also excluded.

simpleDataNoMissingValues = simpleDataShortKeys
.clone()
.excludeMissingValues({missingValues: [null, NaN, undefined, " ", "&6%"]});

//if you want to exclude items based on the values of a specific key only, you would write .excludeMissingValues({key: "nameOfTheKey"})
Insert cell
showTable(simpleDataNoMissingValues);
Insert cell
Insert cell
simpleDataString = simpleDataNoMissingValues
.clone()
.valuesToString({key: "unit"});
Insert cell
showTable(simpleDataString);
Insert cell
Insert cell
Insert cell
Insert cell
simpleDataDates = simpleDataString
.clone()
.valuesToDate({key: "hireDate", format: "%d-%b-%y"});
Insert cell
showTable(simpleDataDates);
Insert cell
### dateToString()

You can convert a date into a string with a specific format. Let's rewrite our dates in hireDate to match a more familiar syntax.
Insert cell
simpleDataDatesString = simpleDataDates
.clone()
.datesToString({key: "hireDate", format: "%A, %B %e, %Y"});
Insert cell
showTable(simpleDataDatesString);
Insert cell
### replaceValues()
Sometimes, we want to replace specific things in the text values of our data.

By default, **replaceValues** will make the replacement only if the entire string matches the **oldValue** provided. You can change the method option to partialString to replace only a part of the string. That's what we do here.

**The values must be strings to use this method**
Insert cell
simpleDataReplacedValues = simpleDataDatesString
.clone()
//Bonuses have a "%" at the end. We remove it.
.replaceValues({
key: "bonus",
oldValue: "%",
newValue: "",
method: "partialString"
})
// The comma must be replaced by a point for the decimals.
.replaceValues({
key: "bonus",
oldValue: ",",
newValue: ".",
method: "partialString"
})

// Since bonus should be a number instead of a string, we convert it to float while we are at it. It wasn't possible before removing the "%" and replacing the ",".

.valuesToFloat({
key: "bonus"
});
Insert cell
Insert cell
Insert cell
### modifyValues()

When you need to do more complex operations on a value, **modifyValues** is the method to use. You can provide a function to **valueGenerator** that will be applied to values.

**modifyValues gives you access to a specified value only.** If you need values stored in multiple keys, check **modifyItems**.
Insert cell
// Let's change the order of the first and last name.

simpleDataValuesModified = simpleDataRounded
.clone()
.modifyValues({
key: "name",
valueGenerator: (name) => {
const nameSpilt = name.split(",");
return `${nameSpilt[1].trim()} ${nameSpilt[0].trim()}`;
}
});
Insert cell
showTable(simpleDataValuesModified);
Insert cell
### modifyItems()

**modifyItems** lets us generate new values for a specific key by using any values from any keys. All you need is to provide **itemGenerator** option.
Insert cell
simpleDataItemsModified = simpleDataValuesModified
.clone()
.modifyItems({
key: "bonus",
itemGenerator: (employee) => {
return Math.round((employee.bonus/100) * employee.salary);
}
});
Insert cell
showTable(simpleDataItemsModified);
Insert cell
Insert cell
Insert cell
simpleDataKeyAdded = simpleDataItemsModified
.clone()
.addKey({
key: "firstName",
itemGenerator: (employee) => employee.name.split(" ")[0]
});
Insert cell
showTable(simpleDataKeyAdded);
Insert cell
Insert cell
simpleDataKeyRemoved = simpleDataKeyAdded
.clone()
.removeKey({key: "firstName"});
Insert cell
showTable(simpleDataKeyRemoved);
Insert cell
Insert cell
simpleDataItemsAdded = {
//our array of objects
const newEmployees = [{
hireDate:"Thursday, April 6, 2006",
name: "kaketo Paulo",
job: "Data Analyst",
salary: 1000000,
unit: "60",
bonus: 50.0,
}
];
return simpleDataKeyRemoved.clone().addItems({
dataToBeAdded: newEmployees
});
};
Insert cell
showTable(simpleDataItemsAdded);
Insert cell
Insert cell
simpleDataMerged = {
// Heres the name of each unit as an array of objects.

const unitsNames = [
{unit: "20", unitName: "Marketing"},
{unit: "30", unitName: "Administration"},
{unit: "40", unitName: "Sales"},
{unit: "50", unitName: "Human Resources"},
{unit: "60", unitName: "Engineering"},
{unit: "100", unitName: "Accounting"},
{unit: "110", unitName: "R&N"},
];

return simpleDataItemsAdded
.clone()
.mergeItems({dataToBeMerged: unitsNames, commonKey: "unit"});
};
Insert cell
showTable(simpleDataMerged);
Insert cell
## Selecting Data
While looking for answers in your data, you often need to focus on only a part of your data sets.
Insert cell
Insert cell
simpleDataSelected = simpleDataMerged
.clone()
.selectKeys({keys: ["job", "salary", "unit"]});
Insert cell
showTable(simpleDataSelected);
Insert cell
Insert cell
// let us keep employees with a salary of more than 7000

simpleDataFilteredValues = simpleDataSelected
.clone()
.filterValues({key: "salary", valueComparator: (salary) => salary > 7000});
Insert cell
showTable(simpleDataFilteredValues);
Insert cell
Insert cell
// We keep employees that are not in the unit 100 and with a salary more than 7000

simpleDataFilteredItems = simpleDataSelected
.clone()
.filterItems({
itemComparator: (employee) => employee.unit != "100" && employee.salary > 7000
});
Insert cell
showTable(simpleDataFilteredItems);
Insert cell
Insert cell
Insert cell
showTable(simpleDataSelected.clone().describe());
Insert cell
Insert cell
showTable(simpleDataSelected.clone().sortValues({
key: "salary", order: "descending"
}));
Insert cell
Insert cell
simpleDataQuantiles = simpleDataSelected
.clone()
.addQuantiles({
key: "salary",
newKey: "decile",
nbQuantiles: 10
});
Insert cell
showTable(simpleDataQuantiles);
Insert cell
Insert cell
simpleDataBins = simpleDataSelected
.clone()
.addBins({
key: "salary",
newKey: "bin",
nbBins: 1000
});
Insert cell
showTable(simpleDataBins);
Insert cell

### addOutliers()

Identifying outliers can be useful when your data values cover a wide range. **addOutliers** create a new key indicating if the value is an outlier(**true** or not **false**).

The outlier definition of this method is the same as the one used in **Box plots**


Insert cell
simpleDataOutliers = simpleDataSelected
.clone()
.addOutliers({
key: "salary",
newKey: "outlier"
})
// Let's reorder the data to see outliers first
.sortValues({key: "outlier", order: "descending"});
Insert cell
showTable(simpleDataOutliers);
Insert cell
Insert cell
Insert cell
showTable(simpleDataSelectedNoOutliers);
Insert cell
Insert cell
simpleDataSales = new SimpleData({
data: [
{temperature: 2, iceCreamSales: 0, hotChocholateSales: 15},
{temperature: 0, iceCreamSales: 0, hotChocholateSales: 9},
{temperature: 7, iceCreamSales: 0, hotChocholateSales: 3},
{temperature: 15, iceCreamSales: 1, hotChocholateSales: 0},
{temperature: 12, iceCreamSales: 4, hotChocholateSales: 1},
{temperature: 25, iceCreamSales: 10, hotChocholateSales: 0},
{temperature: 31, iceCreamSales: 25, hotChocholateSales: 0},
]
});
Insert cell
Insert cell
showTable(
simpleDataSales
.clone()
.correlation({
key1: "temperature",
key2: "iceCreamSales"
})
);
Insert cell
Insert cell
showTable(simpleDataSales.clone().correlation());
Insert cell
Insert cell
showTable(simpleDataSelected);
Insert cell
Insert cell
showTable(simpleDataSelected.clone().summarize());
Insert cell
Insert cell
Insert cell
Insert cell
showTable(
simpleDataSelected.clone().summarize({
value: "Salary",
keyCategory: "job",
summary: ["mean", "median"]
})
);
Insert cell
Insert cell
Insert cell
chart = simpleDataSelectedNoOutliers.getChart({
x: "job",
y: "salary",
type: "dot",
color: "job",
marginLeft: 75,
marginBottom: 50
});
Insert cell
Insert cell
showChart(chart);
Insert cell
Insert cell
plotOptions = ({
grid: true,
facet: {
marginLeft: 100,
data: simpleDataSelectedNoOutliers.getData(),
y: "job"
},
marks: [
Plot.dotX(simpleDataSelectedNoOutliers.getData(), {
x: "salary",
fill: "job"
})
]
});

Insert cell
Plot.plot(plotOptions);
Insert cell
Insert cell
Insert cell
simpleDataSelected.getKeys();
Insert cell
Insert cell
simpleDataSelected.getArray({key: "job"});
Insert cell
Insert cell
simpleDataSelected.getUniqueValues({key: "job"});
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more