Public
Edited
Aug 19, 2024
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
filtered_data = data.filter((e) => e["bill_depth"] < 18)
Insert cell
//Create a view of the summary statistics
//viewof summary_data = ExtendedSummaryTable(data, { label: "Penguins Data" })
Insert cell
Insert cell
// Use the summary data (an array of objects, with additional properties)
//summary_data
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// viewof split_summary_table_1 = ExtendedSummaryTable(data, {
// label: "Penguin Data",
// split_attr: "sex",
// split_values: ["MALE", "FEMALE"]
// })

Insert cell
// viewof split_summary_table_2 = ExtendedSummaryTable(data, {
// label: "Penguin Data",
// subset: filtered_data
// })

Insert cell
viewof split_summary_table = ExtendedSummaryTable(data, {
label: "Penguin Data",
subset: filtered_data,
split_attr: "sex",
split_values: ["MALE", "FEMALE"],
split_colors: ["#e41a1c", "#377eb8"]
})
Insert cell
Insert cell
// A small inset showing the type of column, the categories, and a few (unlabeled) summary stats
//viewof col_summary = SummarizeColumn(data, "sex")
Insert cell
// A function to summarize a single column
SummarizeColumn = (
data,
col,
{
subset = [],
split_attr = "",
split_values = [],
split_data_sets = [],
split_subset_sets = [],
split_colors = []
} = {}
) => {
let content,
value,
format,
el,
chart,
missing_label,
pct_missing,
min,
max,
median,
mean,
sd;

// Construct content based on type
const type = getType(data, col);

const col1 = htl.html`<td style="white-space: nowrap;vertical-align:middle;padding-right:5px;padding-left:3px;">${icon_fns[
type
]()}<strong style="vertical-align:middle;">${
col === "" ? "unlabeled" : col
}</strong></td>`;

switch (type) {
// Categorical columns
case "ordinal":
format = d3.format(",.0f");

let combined_categories_lst = [];
for (let i = 0; i < split_data_sets.length; i++) {
combined_categories_lst[i] = get_combined_categoryData(
get_subset_categories(
split_data_sets[i],
col,
split_data_sets[i].length
),
get_subset_categories(
split_subset_sets[i],
col,
split_data_sets[i].length
),
col
);
}

// Calculate category percent and count
const categories = d3
.rollups(
data,
(v) => ({
count: v.length,
pct: v.length / data.length || 1,
domain: "whole"
}),
(d) => d[col]
)
.sort((a, b) => b[1].count - a[1].count)
.map((d) => {
let obj = {};
obj[col] = d[0] === null || d[0] === "" ? "(missing)" : d[0];
obj.count = d[1].count;
obj.pct = d[1].pct;
obj.domain = d[1].domain;
return obj;
});

const subset_categories =
subset.length != 0
? d3
.rollups(
subset,
(v) => ({
count: v.length,
pct: v.length / data.length || 1,
domain: "subset"
}),
(d) => d[col]
)
.sort((a, b) => b[1].count - a[1].count)
.map((d) => {
let obj = {};
obj[col] = d[0] === null || d[0] === "" ? "(missing)" : d[0];
obj.count = d[1].count;
obj.pct = d[1].pct;
obj.domain = d[1].domain;
return obj;
})
: [];

const combined_categories =
subset_categories.length != 0
? get_combined_categoryData(categories, subset_categories, col)
: [];

// Calculate pct. missing
pct_missing =
data.filter((d) => d[col] === null || d[col] === "").length /
data.length;

// Create the chart
const stack_chart =
combined_categories.length != 0
? SmallStack(combined_categories, col)
: SmallStack(categories, col);

// element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
${
combined_categories_lst.length != 0
? combined_categories_lst.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${SmallStack(
d,
col,
{
split_color:
split_colors.length != 0 ? split_colors[index] : ""
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${stack_chart}</div></td>`
}
</tr>`;

value = {
column: col,
type,
missing: pct_missing,
n_categories: categories.length
};
break;

// Date columns
// Not configured for split_attr
case "date":
// Calculate and format start / end
const start = d3.min(data, (d) => +d[col]);
const end = d3.max(data, (d) => +d[col]);
mean = d3.mean(data, (d) => +d[col]);
median = d3.median(data, (d) => +d[col]);
sd = d3.deviation(data, (d) => +d[col]);

// Calculate pct. missing
pct_missing =
data.filter((d) => d[col] === null || d[col] === "").length /
data.length;

chart = Histogram(data, col, { type: type, subset: subset });

// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
<td><div style="position:relative;">${chart}</div></td>
${
split_data_sets.length != 0
? split_data_sets.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${Histogram(
d,
col,
{
type: type,
subset: split_subset_data_sets[index]
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${chart}</div></td>`
}
</tr>`;
value = {
column: col,
type,
missing: pct_missing,
n_categories: null
};
break;

// Continuous columns
default:
// Compute values
min = d3.min(data, (d) => +d[col]);
max = d3.max(data, (d) => +d[col]);
mean = d3.mean(data, (d) => +d[col]);
median = d3.median(data, (d) => +d[col]);
sd = d3.deviation(data, (d) => +d[col]);
format = d3.format(",." + d3.precisionFixed(sd / 10) + "f");
pct_missing =
data.filter((d) => d[col] === null || isNaN(d[col])).length /
data.length;
chart = Histogram(data, col, { type: type, subset: subset });
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
${
split_data_sets.length != 0
? split_data_sets.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${Histogram(
d,
col,
{
type: type,
subset: split_subset_sets[index],
split_color:
split_colors.length != 0 ? split_colors[index] : ""
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${chart}</div></td>`
}
</tr>`;

value = {
column: col,
type,
missing: pct_missing,
n_categories: null
};
break;
}
el.value = value;
el.appendChild(html`<style>td {vertical-align:middle;} </style>`);
return el;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// viewof temp_table_1 = ExtendedSummaryTable(temp, {
// label: "Temp_Data",
// subset: temp_subset
// })

Insert cell
// viewof temp_table = ExtendedSummaryTable(temp, {
// label: "Temp_Data",
// subset: temp_subset,
// split_attr: "Sex",
// split_values: ["male", "female"],
// split_colors: ["#e41a1c", "#377eb8"]
// })

Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Histogram = (
data,
col,
{ type = "continuous", subset = [], split_color = "" } = {}
) => {
// Compute color + mean
let barColor = subset.length != 0 ? "in_subset" : "lightgrey";
const mean = d3.mean(data, (d) => d[col]);
const bin = d3.bin();
const values = data.map(function (d) {
return d[col];
});
const num_of_data_bins = bin(values).length;

console.log("data", data);
const plot_data =
subset.length != 0 ? get_continuous_dataset(data, subset, col) : data;
console.log("plot_data", plot_data);

// Formatter for the mean
const extent = d3.extent(data, (d) => d[col]);
const format =
type === "date"
? getDateFormat(extent)
: Math.floor(extent[0]) === Math.floor(extent[1])
? d3.format(",.2f")
: d3.format(",.0f");
const rules = [{ label: "mean", value: mean }];

// determine color range
let r;
if (split_color != "" && subset.length != 0) {
let d3_split_color_lighter_op = d3.color(split_color);
d3_split_color_lighter_op.opacity = 0.3;
r = [split_color, d3_split_color_lighter_op];
} else if (split_color != "" && subset.length === 0) {
r = split_color;
barColor = split_color;
} else if (split_color === "" && subset.length != 0) {
r = ["steelblue", "lightgrey"];
} else {
r = "lightgrey";
}

return addTooltips(
Plot.plot({
height: 75,
width: 240,
style: {
display: "inline-block"
},
x: {
label: "",
ticks: extent,
tickFormat: format
},
y: {
axis: null
},
color: {
range: r,
reverse: true
},
marks: [
Plot.rectY(
plot_data,
Plot.stackY({
...Plot.binX(
{
y: "count",
title: (elems) => {
// compute range for the elements
const [start, end] = d3.extent(elems, (d) => d[col]);
let barFormat;
if (type === "date") {
barFormat = getDateFormat([start, end]);
} else {
barFormat = d3.format(
Math.floor(start) === Math.floor(end) ? ",.2f" : ",.0f"
);
}
return `${elems.length} rows\n[${barFormat(
start
)} to ${barFormat(end)}]`;
}
},
{ x: col, fill: barColor, order: "count" }
),
reverse: true
})
),
Plot.ruleY([0]),
Plot.ruleX(rules, {
x: "value",
strokeWidth: 2,
title: (d) => `${d.label} ${col}: ${format(d.value)}`
})
],
style: {
color: "lightgrey",
marginLeft: -17,
background: "none",
overflow: "visible"
}
}),
{ opacity: 1, fill: r }
);
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function get_continuous_dataset(data, subset, col) {
let res = data.map(({ ...item }) => item);

const subset_ids = subset.map(function (d) {
return d["id"];
});

for (let i = 0; i < data.length; i++) {
if (subset_ids.indexOf(data[i]["id"]) != -1) {
res[i]["in_subset"] = "true";
} else {
res[i]["in_subset"] = "false";
}
}
return res;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
import {addTooltips} from "@mkfreeman/plot-tooltip"
Insert cell
import {dataInput} from "@john-guerra/file-input-with-default-value"
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more