Dec 22, 2022
27 forks
217 stars
// Create a view of the summary statistics
viewof summary_data = SummaryTable(data, {label: "Penguins Data"})
// Use the summary data (an array of objects, with additional properties)
// A small inset showing the # of rows and columns, and their types
SummaryCard = (data, label = "Summary") => {
// Compute values
const sample = data[0] || {};
const cols = data.columns || Object.keys(sample);
const col_data = => {
return {
label:d === "" ? "unlabeled" : d,
type:getType(data, d)
const n_columns = col_data.length;
const n_rows = data.length;
// Create the header row as a plot
const header_plot = addTooltips(
{fill:d => colorMap.get(d.type).color, title: d => `${d.label}\n(${d.type})`}
domain:[...colorMap.values()].map(d => d.color)
{stroke:"black", "stroke-width":"3px"}
// Create the columns as a plot
const col_plot = Plot.cellX(col_data, {fill:d => colorMap.get(d.type).color, fillOpacity:.3}).plot({
width:100, height:80,
domain:[...colorMap.values()].map(d => d.color)
// Construct the element
const arrow_styles = {display: "inline-block",
verticalAlign: "top",
transformOrigin: "0 0",
transform: "rotate(90deg)",
marginTop: "20px",
left: "114px",
top: "54px"}
const ele = htl.html`<div style="font-family:sans-serif; font-size:13px; margin-right:10px;">
<span style="font-size:1.3em">${label}</span>
<div>${d3.format(",.0f")(n_columns)} ⟶</div>
<span style="display:inline-block">${col_plot}</span>
<span style="display:inline-block; vertical-align:top;">${d3.format(",.0f")(n_rows)}<br/></span>
<span style=${arrow_styles}>⟶</span>
ele.value = {n_rows, n_columns};
return ele
// A small inset showing the type of column, the categories, and a few (unlabeled) summary stats
viewof col_summary = SummarizeColumn(data, "sex")
// A function to summarize a single column
SummarizeColumn = (data, col) => {
let content, value, format, finiteFormat, el, chart, missing_label, pct_missing, min, max, median, mean, sd;
const notFiniteFormat = d3.format(",.0f");

// Construct content based on type
const type = getType(data, col)
const col1 = htl.html`<td style="white-space: nowrap;vertical-align:middle;padding-right:5px;padding-left:3px;">${icon_fns[type]()}<strong style="vertical-align:middle;">${col === "" ? "unlabeled" : col}</strong></td>`
switch(type) {
// Categorical columns
case 'ordinal':
format = d3.format(",.0f")
// Calculate category percent and count
const categories = d3.rollups(
v => ({count:v.length, pct:v.length / data.length || 1}),
d => d[col]
).sort((a, b) => b[1].count - a[1].count)
.map(d => {
let obj = {}
obj[col] = (d[0] === null || d[0] === "") ? "(missing)" : d[0]
obj.count = d[1].count
obj.pct = d[1].pct
return obj
// Calculate pct. missing
pct_missing = data.filter(d => (d[col] === null || d[col] === "")).length / data.length
// Create the chart
const stack_chart = SmallStack(categories, col)
// element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
<td><div style="position:relative;">${stack_chart}</div></td>
value = {column: col, type, min:null, max: null, mean: null, median: null,
sd: null, missing:pct_missing, n_categories:categories.length}
// Date columns
case "date":
// Calculate and format start / end
const start = d3.min(data, d => +d[col])
const end = d3.max(data, d => +d[col])
mean = d3.mean(data, d => +d[col]);
median = d3.median(data, d => +d[col]);
sd = d3.deviation(data, d => +d[col]);
// Calculate pct. missing
pct_missing = data.filter(d => d[col] === null || d[col] === "").length / data.length
chart = Histogram(data, col, type)
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
<td><div style="position:relative;">${chart}</div></td>
value = {column: col, type, min:start, max: end, mean: null, median: null,
sd: null, missing:pct_missing, n_categories:null}
// Continuous columns
// Compute values
min = d3.min(data, d => +d[col])
max = d3.max(data, d => +d[col])
mean = d3.mean(data, d => +d[col])
median = d3.median(data, d => +d[col])
sd = d3.deviation(data, d => +d[col])
if(Number.isFinite(sd)) {
finiteFormat = d3.format(",." + d3.precisionFixed(sd / 10) + "f");
format = x => Number.isFinite(x) ? finiteFormat(x) : notFiniteFormat(x);
else {
format = notFiniteFormat;
pct_missing = data.filter(d => d[col] === null || isNaN(d[col])).length / data.length
chart = Histogram(data, col, type)
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
<td><div style="position:relative;top:3px;">${chart}</div></td>
value = {column: col, type, min, max, mean, median, sd, missing:pct_missing, n_categories:null}
el.value = value;
el.appendChild(html`<style>td {vertical-align:middle;} </style>`)
return el
// Function that returns a summary table
SummaryTable = (dataObj, {label="Summary"} = {}) => {
const data = typeof dataObj.numRows === "function" ? dataObj.objects() :
typeof dataObj.toArray === "function" ? dataObj.toArray().map((r) => Object.fromEntries(r)) :
const sample = data[0] || {};
const cols = data.columns || Object.keys(sample);
let value = []

// Create the summary card and track data shape
const summaryCard = SummaryCard(data, label)
value.n_rows = summaryCard.value.n_rows
value.n_columns = summaryCard.value.n_columns
value.columns = cols

// Compose the element
const element = htl.html`<div style="display:inline-block; vertical-align:top;">${summaryCard}</div>
<div style="display:inline-block; max-width:${width < 500 ? width : width - 160}px">
<table style="vertical-align:middle; display:block;overflow-x:auto; max-width:${width}px;">
<thead style="z-index:-999;">
<th style="min-width:250px">Snapshot</th>
${ => {
const ele = SummarizeColumn(data, d)
value.push(ele.value) // get the value from the element
return ele
element.value = value;
return element
SmallStack = (categoryData, col, maxCategories = 100) => {
// Get a horizontal stacked bar
const label = categoryData.length === 1 ? " category" : " categories";
let chartData = categoryData;
let categories = 0;
if (chartData.length > maxCategories) {
chartData = categoryData.filter((d, i) => i < maxCategories);
const total = d3.sum(categoryData, (d) => d.count);
const otherCount = total - d3.sum(chartData, (d) => d.count);
let other = {};
other[col] = "Other categories...";
other.count = otherCount;
other.pct = other.count / total;

return addTooltips(
Plot.barX(chartData, {
x: "count",
fill: col,
y: 0,
title: (d) => d[col] + "\n" + pct_format(d.pct)
color: { scheme: "blues" },
marks: [
Plot.text([0, 0], {
x: 0,
frameAnchor: "bottom",
dy: 10,
text: (d) => d3.format(",.0f")(categoryData.length) + `${label}`
style: {
paddingTop: "0px",
paddingBottom: "15px",
textAnchor: "start",
overflow: "visible"
x: { axis: null },
color: {
domain: => d[col]),
scheme: "blues",
reverse: true
height: 30,
width: 205,
y: {
axis: null,
range: [30, 3]
{ fill: "darkblue" }
Histogram = (data, col, type = "continuous") => {
// Compute color + mean
const barColor = colorMap.get(type).brighter;
const mean = d3.mean(data, (d) => d[col]);

// Formatter for the mean
const extent = d3.extent(data, (d) => d[col]);
const format = type === "date" ? getDateFormat(extent) :
Math.floor(extent[0]) === Math.floor(extent[1]) ? d3.format(",.2f") : d3.format(",.0f");
const rules = [{ label: "mean", value: mean }];
return addTooltips(
height: 55,
width: 240,
style: {
display: "inline-block"
x: {
label: "",
ticks: extent,
tickFormat: format
y: {
axis: null
marks: [
y: "count",
title: (elems) => {
// compute range for the elements
const [start, end] = d3.extent(elems, (d) => d[col]);
let barFormat;
if (type === "date") {
barFormat = getDateFormat([start, end]);
} else {
barFormat = d3.format(
Math.floor(start) === Math.floor(end) ? ",.2f" : ",.0f"
return `${elems.length} rows\n[${barFormat(
)} to ${barFormat(end)}]`;
{ x: col, fill: barColor }
Plot.ruleX(rules, {
x: "value",
strokeWidth: 2,
title: (d) => `${d.label} ${col}: ${format(d.value)}`
style: {
marginLeft: -17,
background: "none",
overflow: "visible"
{ opacity: 1, fill: colorMap.get(type).color }
// Using an offet to calculate the format
getDateFormat = (extent) => {
const formatMillisecond = d3.utcFormat(".%L"),
formatSecond = d3.utcFormat(":%S"),
formatMinute = d3.utcFormat("%I:%M"),
formatHour = d3.utcFormat("%I %p"),
formatDay = d3.utcFormat("%a %d"),
formatWeek = d3.utcFormat("%b %d"),
formatMonth = d3.utcFormat("%B"),
formatYear = d3.utcFormat("%Y");

// Test on the difference between the extent, offset by 1
return extent[1] > d3.utcYear.offset(extent[0], 1)? formatYear :
extent[1] > d3.utcMonth.offset(extent[0], 1)? formatMonth :
extent[1] > d3.utcWeek.offset(extent[0], 1) ? formatWeek :
extent[1] > d3.utcDay.offset(extent[0], 1) ? formatDay :
extent[1] > d3.utcHour.offset(extent[0], 1) ? formatHour :
extent[1] > d3.utcMinute.offset(extent[0], 1) ? formatMinute :
extent[1] > d3.utcSecond.offset(extent[0], 1) ? formatSecond :
extent[1] > d3.utcMillisecond.offset(extent[0], 1) ? formatMillisecond :
function dateFormat(date) {
var formatMillisecond = d3.timeFormat(".%L"),
formatSecond = d3.timeFormat(":%S"),
formatMinute = d3.timeFormat("%I:%M"),
formatHour = d3.timeFormat("%I %p"),
formatDay = d3.timeFormat("%a %d"),
formatWeek = d3.timeFormat("%b %d"),
formatMonth = d3.timeFormat("%B"),
formatYear = d3.timeFormat("%Y");
return (d3.timeSecond(date) < date ? formatMillisecond
: d3.timeMinute(date) < date ? formatSecond
: d3.timeHour(date) < date ? formatMinute
: d3.timeDay(date) < date ? formatHour
: d3.timeMonth(date) < date ? (d3.timeWeek(date) < date ? formatDay : formatWeek)
: d3.timeYear(date) < date ? formatMonth
: formatYear)(date);
icon_fns = ({
ordinal:() => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("ordinal").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="">
<rect x="4" y="4" width="2" height="2" fill="white"/>
<rect x="7" y="4" width="6" height="2" fill="white"/>
<rect x="4" y="7" width="2" height="2" fill="white"/>
<rect x="7" y="7" width="6" height="2" fill="white"/>
<rect x="4" y="10" width="2" height="2" fill="white"/>
<rect x="7" y="10" width="6" height="2" fill="white"/>
date: () => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("date").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="">
<rect x="4" y="5" width="8" height="1" fill="white"/>
<rect x="5" y="4" width="2" height="1" fill="white"/>
<rect x="9" y="4" width="2" height="1" fill="white"/>
<rect x="4" y="7" width="8" height="5" fill="white"/>
continuous:() => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("continuous").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="">
<rect x="4" y="12" width="4" height="2" transform="rotate(-90 4 12)" fill="white"/>
<rect x="7" y="12" width="6" height="2" transform="rotate(-90 7 12)" fill="white"/>
<rect x="10" y="12" width="8" height="2" transform="rotate(-90 10 12)" fill="white"/>
colorMap = new Map([["ordinal","rgba(78, 121, 167, 1)"],
["continuous", "rgba(242, 142, 44, 1)"],
["date", "rgba(225,87,89, 1)"]
].map(d => {
const col = d3.color(d[1])
const color_copy = _.clone(col)
color_copy.opacity = .6
return [d[0], {color:col.formatRgb(), brighter:color_copy.formatRgb()}]
getType = (data, column) => {
for (const d of data) {
const value = d[column];
if (value == null) continue;
if (typeof value === "number") return "continuous";
if (value instanceof Date) return "date";
return "ordinal"
// if all are null, return ordinal
return "ordinal"
pct_format = d3.format(".1%");
import {addTooltips} from "@mkfreeman/plot-tooltip"
import {dataInput} from "@john-guerra/file-input-with-default-value"
