Published
Edited
Jun 22, 2021
1 fork
10 stars
Insert cell
Insert cell
Insert cell
Insert cell
// Create a view of the summary statistics
viewof summary_data = SummaryTable(data, {label: "Penguins Data"})
Insert cell
Insert cell
// Use the summary data (an array of objects, with additional properties)
summary_data
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// A small inset showing the # of rows and columns, and their types
SummaryCard(data)
Insert cell
SummaryCard = (data, label = "Summary") => {
// Compute values
const sample = data[0] || {};
const cols = data.columns || Object.keys(sample);
const col_data = cols.map(d => {
return {
label:d === "" ? "unlabeled" : d,
type:getType(data, d)
}
})
const n_columns = col_data.length;
const n_rows = data.length;
// Create the header row as a plot
const header_plot = addTooltips(
Plot.cellX(col_data,
{fill:d => colorMap.get(d.type).color, title: d => `${d.label}\n(${d.type})`}
).plot({
x:{axis:null},
width:100,
height:10,
color:{
domain:[...colorMap.values()].map(d => d.color)
},
style:{
overflow:"visible"
}
}),
{stroke:"black", "stroke-width":"3px"}
)
// Create the columns as a plot
const col_plot = Plot.cellX(col_data, {fill:d => colorMap.get(d.type).color, fillOpacity:.3}).plot({
x:{axis:null},
width:100, height:80,
color:{
domain:[...colorMap.values()].map(d => d.color)
}}
)
// Construct the element
const arrow_styles = {display: "inline-block",
verticalAlign: "top",
transformOrigin: "0 0",
transform: "rotate(90deg)",
marginTop: "20px",
position:"absolute",
left: "114px",
top: "54px"}
const ele = htl.html`<div style="font-family:sans-serif; font-size:13px; margin-right:10px;">
<span style="font-size:1.3em">${label}</span>
<div>${d3.format(",.0f")(n_columns)} ⟶</div>
${header_plot}
<span style="display:inline-block">${col_plot}</span>
<span style="display:inline-block; vertical-align:top;">${d3.format(",.0f")(n_rows)}<br/></span>
<span style=${arrow_styles}>⟶</span>
</div>`
ele.value = {n_rows, n_columns};
return ele
}
Insert cell
Insert cell
// A small inset showing the type of column, the categories, and a few (unlabeled) summary stats
viewof col_summary = SummarizeColumn(data, "sex")
Insert cell
// A function to summarize a single column
SummarizeColumn = (data, col) => {
let content, value, format, el, chart, missing_label, pct_missing, min, max, median, mean, sd;
// Construct content based on type
const type = getType(data, col)
const col1 = htl.html`<td style="white-space: nowrap;vertical-align:middle;padding-right:5px;padding-left:3px;">${icon_fns[type]()}<strong style="vertical-align:middle;">${col === "" ? "unlabeled" : col}</strong></td>`
switch(type) {
// Categorical columns
case 'ordinal':
format = d3.format(",.0f")
// Calculate category percent and count
const categories = d3.rollups(
data,
v => ({count:v.length, pct:v.length / data.length || 1}),
d => d[col]
).sort((a, b) => b[1].count - a[1].count)
.map(d => {
let obj = {}
obj[col] = (d[0] === null || d[0] === "") ? "(missing)" : d[0]
obj.count = d[1].count
obj.pct = d[1].pct
return obj
})
// Calculate pct. missing
pct_missing = data.filter(d => d[col] === null).length / data.length
// Create the chart
const stack_chart = SmallStack(categories, col)
// element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
<td><div style="position:relative;">${stack_chart}</div></td>
<td>${pct_format(pct_missing)}</td>
<td>-</td>
<td>-</td>
<td>-</td>
</tr>`;
value = {column: col, type, min:null, max: null, mean: null, median: null,
sd: null, missing:pct_missing, n_categories:categories.length}
break;
// Date columns
case "date":
// Calculate and format start / end
const start = d3.min(data, d => +d[col])
const end = d3.max(data, d => +d[col])
mean = d3.mean(data, d => +d[col]);
median = d3.median(data, d => +d[col]);
sd = d3.deviation(data, d => +d[col]);
// Calculate pct. missing
pct_missing = data.filter(d => d[col] === null).length / data.length
chart = Histogram(data, col, type)
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
<td><div style="position:relative;">${chart}</div></td>
<td>${pct_format(pct_missing)}</td>
<td>-</td>
<td>-</td>
<td>-</td>
</tr>`
value = {column: col, type, min:start, max: end, mean: null, median: null,
sd: null, missing:pct_missing, n_categories:null}
break;
// Continuous columns
default:
// Compute values
format = d3.format(",.0f")
min = d3.min(data, d => +d[col])
max = d3.max(data, d => +d[col])
mean = d3.mean(data, d => +d[col])
median = d3.median(data, d => +d[col])
sd = d3.deviation(data, d => +d[col])
pct_missing = data.filter(d => d[col] === null).length / data.length
chart = Histogram(data, col, type)
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
<td><div style="position:relative;top:3px;">${chart}</div></td>
<td>${pct_format(pct_missing)}</td>
<td>${format(mean)}</td>
<td>${format(median)}</td>
<td>${format(sd)}</td>
</tr>`
value = {column: col, type, min, max, mean, median, sd, missing:pct_missing, n_categories:null}
break;
}
el.value = value;
el.appendChild(html`<style>td {vertical-align:middle;} </style>`)
return el
}
Insert cell
Insert cell
// Function that returns a summary table
SummaryTable = (data, {label="Summary"} = {}) => {
const sample = data[0] || {};
const cols = data.columns || Object.keys(sample);
let value = []

// Create the summary card and track data shape
const summaryCard = SummaryCard(data, label)
value.n_rows = summaryCard.value.n_rows
value.n_columns = summaryCard.value.n_columns
value.columns = cols

// Compose the element
const element = htl.html`<div style="display:inline-block; vertical-align:top;">${summaryCard}</div>
<div style="display:inline-block;">
<table style="vertical-align:middle; display:block; overflow-x:auto; max-width:${width}px;">
<thead style="z-index:-999;">
<th>Column</th>
<th style="min-width:250px">Snapshot</th>
<th>Missing</th>
<th>Mean</th>
<th>Median</th>
<th>SD</th>
</thead>
${cols.map(d => {
const ele = SummarizeColumn(data, d)
value.push(ele.value) // get the value from the element
return ele
})}
</table>
</div>`
element.value = value;
return element
}
Insert cell
Insert cell
SmallStack = (categoryData, col) => {
// Get a horizontal stacked bar
const label = categoryData.length === 1 ? " category" : " categories"
return addTooltips(
Plot.barX(categoryData, {x:"count", fill:col, y:0, title: d => d[col] + "\n" + pct_format(d.pct)}).plot({
color:{scheme:"blues"},
marks:[
Plot.text([0,0], {x:0, dy:13, text:d => d3.format(",.0f")(categoryData.length) + `${label}`})
],
style:{
paddingTop:0,
paddingBottom:15,
textAnchor:"start",
overflow:"visible"
},
x:{axis:null},
color:{
domain:categoryData.map(d => d[col]),
scheme:"blues",
reverse: true
},
height:30,
width:205,
y:{
axis:null,
range:[30, 3]
},
}
), {fill:"darkblue"})
}
Insert cell
Histogram = (data, col, type = "continuous") => {
// Compute color + mean
const barColor = colorMap.get(type).brighter
const mean = d3.mean(data, d => +d[col])
// Formatter for the mean
const format = type === "date" ? d3.utcFormat("%m/%d/%Y"):d3.format(",.0f")
const rules = [{label:"mean", value:mean}]
return addTooltips(
Plot.plot({
height:55,
width:240,
style:{
display:"inline-block"
},
x:{
label:"",
ticks:[d3.min(data, d => +d[col]), d3.max(data, d => +d[col])],
tickFormat:format
},
y:{
axis:null
},
marks:[
Plot.rectY(data, Plot.binX({y:"count", title: (elems) => {
// compute range for the elements
const extent = d3.extent(elems, d => d[col]);
return `${elems.length} rows\n[${format(extent[0])} to ${format(extent[1])}]`}
}, {x:col, fill: barColor})
),
Plot.ruleY([0]),
Plot.ruleX(rules, {x:"value", strokeWidth:2, title:d => `${d.label} ${col}: ${format(d.value)}` })
],
style:{
marginLeft:-17,
background:"none",
overflow: "visible"
}
}), {opacity:1, fill:colorMap.get(type).color})
}
Insert cell
icon_fns = ({
ordinal:() => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("ordinal").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="4" y="4" width="2" height="2" fill="white"/>
<rect x="7" y="4" width="6" height="2" fill="white"/>
<rect x="4" y="7" width="2" height="2" fill="white"/>
<rect x="7" y="7" width="6" height="2" fill="white"/>
<rect x="4" y="10" width="2" height="2" fill="white"/>
<rect x="7" y="10" width="6" height="2" fill="white"/>
</svg>
</div>`,
date: () => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("date").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="4" y="5" width="8" height="1" fill="white"/>
<rect x="5" y="4" width="2" height="1" fill="white"/>
<rect x="9" y="4" width="2" height="1" fill="white"/>
<rect x="4" y="7" width="8" height="5" fill="white"/>
</svg>
</div>`,
continuous:() => html`<div style="display:inline-block; border-radius:100%; width: 16px; height: 16px; background-color: ${colorMap.get("continuous").color}; transform: scale(1.3); vertical-align: middle; align-items: center;margin-right:8px;}">
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="4" y="12" width="4" height="2" transform="rotate(-90 4 12)" fill="white"/>
<rect x="7" y="12" width="6" height="2" transform="rotate(-90 7 12)" fill="white"/>
<rect x="10" y="12" width="8" height="2" transform="rotate(-90 10 12)" fill="white"/>
</svg>
</div>`
})
Insert cell
colorMap = new Map([["ordinal","rgba(78, 121, 167, 1)"],
["continuous", "rgba(242, 142, 44, 1)"],
["date", "rgba(225,87,89, 1)"]
].map(d => {
const col = d3.color(d[1])
const color_copy = _.clone(col)
color_copy.opacity = .6
return [d[0], {color:col.formatRgb(), brighter:color_copy.formatRgb()}]
}))
Insert cell
getType = (data, column) => {
for (const d of data) {
const value = d[column];
if (value == null) continue;
if (typeof value === "number") return "continuous";
if (value instanceof Date) return "date";
return "ordinal"
}
// if all are null, return ordinal
return "ordinal"
}
Insert cell
pct_format = d3.format(".1%");
Insert cell
Insert cell
import {addTooltips} from "@mkfreeman/plot-tooltip"
Insert cell
import {dataInput} from "@john-guerra/file-input-with-default-value"

Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more