Public
Edited
May 24, 2023
1 fork
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
chart = ViolinPlot(penguins, {
groupBy: groupBy ? d => d.species: null,
value: d => d.culmen_length_mm,
yLabel: "↑ Culmen Length (mm)",
width,
height: 600,
color: d => color(d.id),
bandwidth,
thresholds,
pad,
drawBars,
drawPoints,
drawHistogram,
})
Insert cell
facetedChart = FacetViolinPlot(penguins, {
attribs: Object.keys(penguins[0]).filter((attr) => !isNaN(penguins[0][attr])),
violinOptions: {
groupBy: groupBy ? (d) => d.species : null,
color: (d) => (groupBy ? color(d[0]) : "steelblue"),
bandwidth,
thresholds,
pad,
drawBars,
drawPoints,
drawHistogram,
}
})
Insert cell
color = d3.scaleOrdinal(d3.schemeCategory10)
Insert cell
chart2 = ViolinPlot(compensation, {
groupBy: d => d["Organization Group"],
value: d => d["Total Salary"],
// gDomain: d3.groupSort(compensation, ([d]) => -d["Total Salary"], d => d["Organization Group"]), // sort by descending Salary
yFormat: ",d",
yLabel: "Total Salary",
width,
height: 500,
color: "steelblue",
bandwidth,
thresholds,
pad,
drawBars,
})
Insert cell
compensation = FileAttachment("SF_Employee_Compensation_2022.csv").csv({
typed: true
})
Insert cell
Insert cell
// Copyright 2021 Observable, Inc.
// Copyright 2023 John Alexis Guerra Gomez
// Released under the ISC license.
// https://observablehq.com/@john-guerra/violin-plot
// Based on https://observablehq.com/@d3/bar-chart
function ViolinPlot(
data,
{
thresholds = 40,
bandwidth = 7,
pad = 4,
groupBy = null, // given d in data, returns the (ordinal) attribute to group by, null for no grouping
value = (d) => d, // given d in data, returns the (quantitative) y-value
title, // given d in data, returns the title text
marginTop = 20, // the top margin, in pixels
marginRight = 0, // the right margin, in pixels
marginBottom = 30, // the bottom margin, in pixels
marginLeft = 80, // the left margin, in pixels
width = 640, // the outer width of the chart, in pixels
height = 500, // the outer height of the chart, in pixels
gDomain, // an array of (ordinal) x-values
xRange = [marginLeft, width - marginRight], // [left, right]
yRange = [height - marginBottom, marginTop], // [bottom, top]
yDomain, // [ymin, ymax]
xPadding = 0.1, // amount of x-range to reserve to separate violins
yFormat, // a format specifier string for the y-axis
yLabel = "↑ Frequency", // a label for the y-axis
color = "currentColor", // bar fill color
drawBars = false,
drawPoints = false,
drawHistogram = false,
drawViolin = true,
pointsJitter,
pointR = 2,
pointStroke = "#333",
drawXAxis = false
} = {}
) {
groupBy = groupBy || (() => "");
const groups = d3
.groups(data, groupBy)
.map(([g, V]) => ({ id: g, V, Y: d3.map(V, value) }));

let bins; // for histograms

if (gDomain === undefined) gDomain = groups.map((g) => g.id);
const gScale = d3.scaleBand(gDomain, xRange).padding(xPadding);
pointsJitter =
pointsJitter !== undefined ? pointsJitter : gScale.range()[1] / 8;

const GDensities = groups
.map(({ id, V, Y }) => ({
id,
densities: Array.from(
kde.density1d(Y, {
bandwidth,
pad,
bins: thresholds
// extent: yScale.domain()
})
).map(({ x, y }) => [x, y]),
Y,
V
}))
.map(({ id, densities, Y, V }) => {
if (drawHistogram) {
// Build Bins for histograms using the whole data
bins = d3
.bin()
.thresholds(thresholds)(
// .thresholds(densities.map((d) => d[0]))
Y
)
.map((b) => Object.assign(b, { realDensity: b.length / Y.length }));
}

return {
id,
densities,
histogram: bins,
Y,
V
};
});

if (yDomain === undefined)
yDomain = d3.extent(
GDensities.map((g) => g.densities.map((d) => d[0])).flat()
);
const yScale = d3.scaleLinear().domain(yDomain).range(yRange);
const thresholdsArray = yScale.ticks(thresholds);
// console.log("thresholdsArray", thresholdsArray);
console.log("gD", yDomain, GDensities);

const maxBinLength = d3.max(GDensities, ({ g, densities }, i) =>
d3.max(densities, (d) => d[1])
);

const xScale = d3
.scaleLinear()
.domain([0, maxBinLength * 2])
.nice()
.range([0, gScale.bandwidth() * (2 - xPadding)]);

// console.log("maxBinLength", maxBinLength);

// Omit any data not present in the x-domain.
// const I = d3.range(X.length).filter(i => gDomain.has(X[i]));

// Construct scales, axes, and formats.

const xAxis = d3.axisBottom(xScale).tickFormat(d3.format(".0%"));
const gAxis = d3.axisBottom(gScale).tickSizeOuter(0);
const yAxis = d3.axisLeft(yScale).tickFormat(yFormat);

// The line generator
const area = d3
.area()
.curve(d3.curveLinear)
.x0((d) => xScale(-d[1] / 2))
.x1((d) => xScale(d[1] / 2))
.y((d) => yScale(d[0]));

const svg = d3
.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height])
.attr(
"style",
"max-width: 100%; height: auto; height: intrinsic; overflow: visible"
);

// ------------- Axis --------------
svg
.append("g")
.attr("transform", `translate(${marginLeft},0)`)
.call(yAxis)
.call((g) => g.select(".domain").remove())
.call((g) =>
g
.selectAll(".tick line")
.clone()
.attr("x2", width - marginLeft - marginRight)
.attr("stroke-opacity", 0.1)
)
.call((g) =>
g
.append("text")
.attr("x", -marginLeft)
.attr("y", 10)
.attr("fill", "currentColor")
.attr("text-anchor", "start")
.text(yLabel)
);

if (drawXAxis) {
svg
.append("g")
.selectAll("g.xAxis")
.data(GDensities)
.join("g")
.attr("class", "xAxis")
.attr(
"transform",
(g) =>
`translate(${gScale(g.id) + gScale.bandwidth() / 2},${
height - marginBottom
})`
)
.call(xAxis);
}

// Don't draw axis for groups when no group selected
if (groupBy(data[0])) {
svg
.append("g")
.attr("id", "gAxis")
.attr(
"transform",
`translate(0,${height - marginBottom + marginBottom / 2})`
)
.call(gAxis)
.call((g) => g.select(".domain").remove())
.call((g) => g.selectAll(".tick line").remove());
}

console.log(GDensities);
// console.log(
// GDensities.map((c) => c[1].map((d) => d[1]))
// .flat()
// .reduce((p, n) => p + n, 0)
// );

// ------------- Violin ---------------
if (drawViolin) {
const violin = svg
.append("g")
.selectAll(".violin")
.data(GDensities)
.join("path")
.attr("fill", color)
.attr("class", "violin")
.attr("d", ({ g, densities }) => area(densities))
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}
// ---------------- / Violin ----------------

// ------------- Ticks -----------------
if (drawPoints) {
svg
.append("g")
.attr("opacity", 0.3)
.selectAll(".points")
.data(data.filter((d) => !isNaN(value(d))))
.join("circle")
.attr("stroke", pointStroke)
// .attr("fill", "none")
.attr("class", "points")
.attr(
"cx",
(d) =>
gScale(groupBy(d)) +
gScale.bandwidth() / 2 +
(Math.random() * pointsJitter - pointsJitter / 2)
)
.attr("cy", (d) => yScale(value(d)))
.attr("r", pointR)
.append("title")
.text((d) => `${value(d)} y ${yScale(value(d))}`);
}

// -------- Density Bars ----------------
if (drawBars) {
const innerHeight = Math.abs(yRange[1] - yRange[0]);
svg
.append("g")
.attr("class", "bars")
.attr("fill", "#aaaa")
.selectAll(".barGroup")
.data(GDensities)
.join("g")
.attr("class", "barGroup")
.each(function ({ g, densities, Y, V }) {
d3.select(this)
.selectAll("rect")
.data(densities)
.join("rect")
.attr("x", 0)
.attr("y", (d) => yScale(d[0]))
.attr("width", (d) => xScale(d[1]))
.attr("height", innerHeight / densities.length - 3)
.append("title")
.text(
(d) =>
`${d[0]} kde density ${d[1]} ${d.length} subsetLength ${
Y.length
} densitiy ${d.length / Y.length} realDensity ${d.realDensity}`
);
})
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}

// --------- Histogram ----------------
if (drawHistogram) {
// https://observablehq.com/@d3/histogram
// Compute bins.

svg
.append("g")
.attr("class", "histogram")
.attr("fill", "#a4c8caaa")
.selectAll(".histGroup")
.data(GDensities)
.join("g")
.attr("class", "histGroup")
.each(function ({ g, densities, histogram, Y, V }) {
d3.select(this)
.selectAll("rect")
.data(histogram)
.join("rect")
.attr("x", (d) => -xScale(d.realDensity))
.attr("y", (d) => yScale(d.x0))
.attr("width", (d) => xScale(d.realDensity))
.attr("height", (d, i, all) => yScale(d.x0) - yScale(d.x1) - 1)
.append("title")
.text(
(d) =>
`${d.x0} ${d.x1} ${d.length} subsetLength ${Y.length} densitiy ${
d.length / Y.length
} realDensity ${d.realDensity}`
);
})
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}

// if (title) bar.append("title")
// .text(title);

debugger;

svg.node().notNullElements = data.filter((d) => value(d) && !isNaN(value(d))).length;
return svg.node();
}
Insert cell
FacetViolinPlot = (
data,
{
columns = 2,
facetBy = null, // provide either facetBy a function to the attribute to face
attribs = [], // or a list of attributes to useß, width
violinOptions = {
width: width / columns,
height: 150
}
} = {}
) => {
attribs = attribs || d3.groups(data, facetBy).map((d) => d[0]);
const getViolinFacet = (attr) =>
ViolinPlot(data, { ...violinOptions, value: (d) => d[attr] });
return htl.html`<div style="display:flex; flex-wrap: wrap">
${attribs.map(
(attr) => {
let violin = getViolinFacet(attr);
return html`<div style="flex: ${100 / columns}%">
<h2>${attr} (${violin.notNullElements})</h2>
${violin}
</div>`
})}
</div>`;
}
Insert cell
values1d = penguins.map((d) => d.culmen_length_mm)
Insert cell
pk = kde.density1d(
penguins.map((d) => d.culmen_length_mm),
{
bandwidth,
pad,
bins: thresholds
}
)
Insert cell
Plot.plot({

y: { grid: true },
marks: [
// use bandwidth method to update efficiently without re-binning
Plot.areaY(pk.bandwidth(bandwidth), {x: 'x', y: 'y', fill: '#ccc' }),
Plot.dot(values1d, {x: d => d, y: 0, fill: 'black' })
],
width,
height: 250
})
Insert cell
import {howto, altplot} from "@d3/example-components"
Insert cell
kde = require("fast-kde")
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more