SummarizeColumn = (
data,
col,
{
subset = [],
split_attr = "",
split_values = [],
split_data_sets = [],
split_subset_sets = [],
split_colors = []
} = {}
) => {
let content,
value,
format,
el,
chart,
missing_label,
pct_missing,
min,
max,
median,
mean,
sd;
const type = getType(data, col);
const col1 = htl.html`<td style="white-space: nowrap;vertical-align:middle;padding-right:5px;padding-left:3px;">${icon_fns[
type
]()}<strong style="vertical-align:middle;">${
col === "" ? "unlabeled" : col
}</strong></td>`;
switch (type) {
// Categorical columns
case "ordinal":
format = d3.format(",.0f");
let combined_categories_lst = [];
for (let i = 0; i < split_data_sets.length; i++) {
combined_categories_lst[i] = get_combined_categoryData(
get_subset_categories(
split_data_sets[i],
col,
split_data_sets[i].length
),
get_subset_categories(
split_subset_sets[i],
col,
split_data_sets[i].length
),
col
);
}
// Calculate category percent and count
const categories = d3
.rollups(
data,
(v) => ({
count: v.length,
pct: v.length / data.length || 1,
domain: "whole"
}),
(d) => d[col]
)
.sort((a, b) => b[1].count - a[1].count)
.map((d) => {
let obj = {};
obj[col] = d[0] === null || d[0] === "" ? "(missing)" : d[0];
obj.count = d[1].count;
obj.pct = d[1].pct;
obj.domain = d[1].domain;
return obj;
});
const subset_categories =
subset.length != 0
? d3
.rollups(
subset,
(v) => ({
count: v.length,
pct: v.length / data.length || 1,
domain: "subset"
}),
(d) => d[col]
)
.sort((a, b) => b[1].count - a[1].count)
.map((d) => {
let obj = {};
obj[col] = d[0] === null || d[0] === "" ? "(missing)" : d[0];
obj.count = d[1].count;
obj.pct = d[1].pct;
obj.domain = d[1].domain;
return obj;
})
: [];
const combined_categories =
subset_categories.length != 0
? get_combined_categoryData(categories, subset_categories, col)
: [];
// Calculate pct. missing
pct_missing =
data.filter((d) => d[col] === null || d[col] === "").length /
data.length;
// Create the chart
const stack_chart =
combined_categories.length != 0
? SmallStack(combined_categories, col)
: SmallStack(categories, col);
// element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
${
combined_categories_lst.length != 0
? combined_categories_lst.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${SmallStack(
d,
col,
{
split_color:
split_colors.length != 0 ? split_colors[index] : ""
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${stack_chart}</div></td>`
}
</tr>`;
value = {
column: col,
type,
missing: pct_missing,
n_categories: categories.length
};
break;
// Date columns
// Not configured for split_attr
case "date":
// Calculate and format start / end
const start = d3.min(data, (d) => +d[col]);
const end = d3.max(data, (d) => +d[col]);
mean = d3.mean(data, (d) => +d[col]);
median = d3.median(data, (d) => +d[col]);
sd = d3.deviation(data, (d) => +d[col]);
// Calculate pct. missing
pct_missing =
data.filter((d) => d[col] === null || d[col] === "").length /
data.length;
chart = Histogram(data, col, { type: type, subset: subset });
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
<td><div style="position:relative;">${chart}</div></td>
${
split_data_sets.length != 0
? split_data_sets.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${Histogram(
d,
col,
{
type: type,
subset: split_subset_data_sets[index]
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${chart}</div></td>`
}
</tr>`;
value = {
column: col,
type,
missing: pct_missing,
n_categories: null
};
break;
// Continuous columns
default:
// Compute values
min = d3.min(data, (d) => +d[col]);
max = d3.max(data, (d) => +d[col]);
mean = d3.mean(data, (d) => +d[col]);
median = d3.median(data, (d) => +d[col]);
sd = d3.deviation(data, (d) => +d[col]);
format = d3.format(",." + d3.precisionFixed(sd / 10) + "f");
pct_missing =
data.filter((d) => d[col] === null || isNaN(d[col])).length /
data.length;
chart = Histogram(data, col, { type: type, subset: subset });
// Element to return
el = htl.html`<tr style="font-family:sans-serif;font-size:13px;">
${col1}
${
split_data_sets.length != 0
? split_data_sets.map(
(d, index) =>
htl.html`<td><div style="position:relative;">${Histogram(
d,
col,
{
type: type,
subset: split_subset_sets[index],
split_color:
split_colors.length != 0 ? split_colors[index] : ""
}
)}</div></td>`
)
: htl.html` <td><div style="position:relative;">${chart}</div></td>`
}
</tr>`;
value = {
column: col,
type,
missing: pct_missing,
n_categories: null
};
break;
}
el.value = value;
el.appendChild(html`<style>td {vertical-align:middle;} </style>`);
return el;
}