Public
Edited
May 24, 2023
1 fork
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
chart = ViolinPlot(penguins, {
groupBy: groupBy ? d => d.species: null,
value: d => d.culmen_length_mm,
yLabel: "↑ Culmen Length (mm)",
width,
height: 600,
color: d => color(d.id),
bandwidth,
thresholds,
pad,
drawBars,
drawPoints,
drawHistogram,
})
Insert cell
facetedChart = FacetViolinPlot(penguins, {
attribs: Object.keys(penguins[0]).filter((attr) => !isNaN(penguins[0][attr])),
violinOptions: {
groupBy: groupBy ? (d) => d.species : null,
color: (d) => (groupBy ? color(d[0]) : "steelblue"),
bandwidth,
thresholds,
pad,
drawBars,
drawPoints,
drawHistogram,
}
})
Insert cell
color = d3.scaleOrdinal(d3.schemeCategory10)
Insert cell
chart2 = ViolinPlot(compensation, {
groupBy: d => d["Organization Group"],
value: d => d["Total Salary"],
// gDomain: d3.groupSort(compensation, ([d]) => -d["Total Salary"], d => d["Organization Group"]), // sort by descending Salary
yFormat: ",d",
yLabel: "Total Salary",
width,
height: 500,
color: "steelblue",
bandwidth,
thresholds,
pad,
drawBars,
})
Insert cell
compensation = FileAttachment("SF_Employee_Compensation_2022.csv").csv({
typed: true
})
Insert cell
Insert cell
// Copyright 2021 Observable, Inc.
// Copyright 2023 John Alexis Guerra Gomez
// Released under the ISC license.
// https://observablehq.com/@john-guerra/violin-plot
// Based on https://observablehq.com/@d3/bar-chart
function ViolinPlot(
data,
{
thresholds = 40,
bandwidth = 7,
pad = 4,
groupBy = null, // given d in data, returns the (ordinal) attribute to group by, null for no grouping
value = (d) => d, // given d in data, returns the (quantitative) y-value
title, // given d in data, returns the title text
marginTop = 20, // the top margin, in pixels
marginRight = 0, // the right margin, in pixels
marginBottom = 30, // the bottom margin, in pixels
marginLeft = 80, // the left margin, in pixels
width = 640, // the outer width of the chart, in pixels
height = 500, // the outer height of the chart, in pixels
gDomain, // an array of (ordinal) x-values
xRange = [marginLeft, width - marginRight], // [left, right]
yRange = [height - marginBottom, marginTop], // [bottom, top]
yDomain, // [ymin, ymax]
xPadding = 0.1, // amount of x-range to reserve to separate violins
yFormat, // a format specifier string for the y-axis
yLabel = "↑ Frequency", // a label for the y-axis
color = "currentColor", // bar fill color
drawBars = false,
drawPoints = false,
drawHistogram = false,
drawViolin = true,
pointsJitter,
pointR = 2,
pointStroke = "#333",
drawXAxis = false
} = {}
) {
groupBy = groupBy || (() => "");
const groups = d3
.groups(data, groupBy)
.map(([g, V]) => ({ id: g, V, Y: d3.map(V, value) }));

let bins; // for histograms

if (gDomain === undefined) gDomain = groups.map((g) => g.id);
const gScale = d3.scaleBand(gDomain, xRange).padding(xPadding);
pointsJitter =
pointsJitter !== undefined ? pointsJitter : gScale.range()[1] / 8;

const GDensities = groups
.map(({ id, V, Y }) => ({
id,
densities: Array.from(
kde.density1d(Y, {
bandwidth,
pad,
bins: thresholds
// extent: yScale.domain()
})
).map(({ x, y }) => [x, y]),
Y,
V
}))
.map(({ id, densities, Y, V }) => {
if (drawHistogram) {
// Build Bins for histograms using the whole data
bins = d3
.bin()
.thresholds(thresholds)(
// .thresholds(densities.map((d) => d[0]))
Y
)
.map((b) => Object.assign(b, { realDensity: b.length / Y.length }));
}

return {
id,
densities,
histogram: bins,
Y,
V
};
});

if (yDomain === undefined)
yDomain = d3.extent(
GDensities.map((g) => g.densities.map((d) => d[0])).flat()
);
const yScale = d3.scaleLinear().domain(yDomain).range(yRange);
const thresholdsArray = yScale.ticks(thresholds);
// console.log("thresholdsArray", thresholdsArray);
console.log("gD", yDomain, GDensities);

const maxBinLength = d3.max(GDensities, ({ g, densities }, i) =>
d3.max(densities, (d) => d[1])
);

const xScale = d3
.scaleLinear()
.domain([0, maxBinLength * 2])
.nice()
.range([0, gScale.bandwidth() * (2 - xPadding)]);

// console.log("maxBinLength", maxBinLength);

// Omit any data not present in the x-domain.
// const I = d3.range(X.length).filter(i => gDomain.has(X[i]));

// Construct scales, axes, and formats.

const xAxis = d3.axisBottom(xScale).tickFormat(d3.format(".0%"));
const gAxis = d3.axisBottom(gScale).tickSizeOuter(0);
const yAxis = d3.axisLeft(yScale).tickFormat(yFormat);

// The line generator
const area = d3
.area()
.curve(d3.curveLinear)
.x0((d) => xScale(-d[1] / 2))
.x1((d) => xScale(d[1] / 2))
.y((d) => yScale(d[0]));

const svg = d3
.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height])
.attr(
"style",
"max-width: 100%; height: auto; height: intrinsic; overflow: visible"
);

// ------------- Axis --------------
svg
.append("g")
.attr("transform", `translate(${marginLeft},0)`)
.call(yAxis)
.call((g) => g.select(".domain").remove())
.call((g) =>
g
.selectAll(".tick line")
.clone()
.attr("x2", width - marginLeft - marginRight)
.attr("stroke-opacity", 0.1)
)
.call((g) =>
g
.append("text")
.attr("x", -marginLeft)
.attr("y", 10)
.attr("fill", "currentColor")
.attr("text-anchor", "start")
.text(yLabel)
);

if (drawXAxis) {
svg
.append("g")
.selectAll("g.xAxis")
.data(GDensities)
.join("g")
.attr("class", "xAxis")
.attr(
"transform",
(g) =>
`translate(${gScale(g.id) + gScale.bandwidth() / 2},${
height - marginBottom
})`
)
.call(xAxis);
}

// Don't draw axis for groups when no group selected
if (groupBy(data[0])) {
svg
.append("g")
.attr("id", "gAxis")
.attr(
"transform",
`translate(0,${height - marginBottom + marginBottom / 2})`
)
.call(gAxis)
.call((g) => g.select(".domain").remove())
.call((g) => g.selectAll(".tick line").remove());
}

console.log(GDensities);
// console.log(
// GDensities.map((c) => c[1].map((d) => d[1]))
// .flat()
// .reduce((p, n) => p + n, 0)
// );

// ------------- Violin ---------------
if (drawViolin) {
const violin = svg
.append("g")
.selectAll(".violin")
.data(GDensities)
.join("path")
.attr("fill", color)
.attr("class", "violin")
.attr("d", ({ g, densities }) => area(densities))
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}
// ---------------- / Violin ----------------

// ------------- Ticks -----------------
if (drawPoints) {
svg
.append("g")
.attr("opacity", 0.3)
.selectAll(".points")
.data(data.filter((d) => !isNaN(value(d))))
.join("circle")
.attr("stroke", pointStroke)
// .attr("fill", "none")
.attr("class", "points")
.attr(
"cx",
(d) =>
gScale(groupBy(d)) +
gScale.bandwidth() / 2 +
(Math.random() * pointsJitter - pointsJitter / 2)
)
.attr("cy", (d) => yScale(value(d)))
.attr("r", pointR)
.append("title")
.text((d) => `${value(d)} y ${yScale(value(d))}`);
}

// -------- Density Bars ----------------
if (drawBars) {
const innerHeight = Math.abs(yRange[1] - yRange[0]);
svg
.append("g")
.attr("class", "bars")
.attr("fill", "#aaaa")
.selectAll(".barGroup")
.data(GDensities)
.join("g")
.attr("class", "barGroup")
.each(function ({ g, densities, Y, V }) {
d3.select(this)
.selectAll("rect")
.data(densities)
.join("rect")
.attr("x", 0)
.attr("y", (d) => yScale(d[0]))
.attr("width", (d) => xScale(d[1]))
.attr("height", innerHeight / densities.length - 3)
.append("title")
.text(
(d) =>
`${d[0]} kde density ${d[1]} ${d.length} subsetLength ${
Y.length
} densitiy ${d.length / Y.length} realDensity ${d.realDensity}`
);
})
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}

// --------- Histogram ----------------
if (drawHistogram) {
// https://observablehq.com/@d3/histogram
// Compute bins.

svg
.append("g")
.attr("class", "histogram")
.attr("fill", "#a4c8caaa")
.selectAll(".histGroup")
.data(GDensities)
.join("g")
.attr("class", "histGroup")
.each(function ({ g, densities, histogram, Y, V }) {
d3.select(this)
.selectAll("rect")
.data(histogram)
.join("rect")
.attr("x", (d) => -xScale(d.realDensity))
.attr("y", (d) => yScale(d.x0))
.attr("width", (d) => xScale(d.realDensity))
.attr("height", (d, i, all) => yScale(d.x0) - yScale(d.x1) - 1)
.append("title")
.text(
(d) =>
`${d.x0} ${d.x1} ${d.length} subsetLength ${Y.length} densitiy ${
d.length / Y.length
} realDensity ${d.realDensity}`
);
})
.attr(
"transform",
({ id, bins, densities }) =>
`translate(${gScale(id) + gScale.bandwidth() / 2}, 0)`
);
}

// if (title) bar.append("title")
// .text(title);

debugger;

svg.node().notNullElements = data.filter((d) => value(d) && !isNaN(value(d))).length;
return svg.node();
}
Insert cell
FacetViolinPlot = (
data,
{
columns = 2,
facetBy = null, // provide either facetBy a function to the attribute to face
attribs = [], // or a list of attributes to useß, width
violinOptions = {
width: width / columns,
height: 150
}
} = {}
) => {
attribs = attribs || d3.groups(data, facetBy).map((d) => d[0]);
const getViolinFacet = (attr) =>
ViolinPlot(data, { ...violinOptions, value: (d) => d[attr] });
return htl.html`<div style="display:flex; flex-wrap: wrap">
${attribs.map(
(attr) => {
let violin = getViolinFacet(attr);
return html`<div style="flex: ${100 / columns}%">
<h2>${attr} (${violin.notNullElements})</h2>
${violin}
</div>`
})}
</div>`;
}
Insert cell
values1d = penguins.map((d) => d.culmen_length_mm)
Insert cell
pk = kde.density1d(
penguins.map((d) => d.culmen_length_mm),
{
bandwidth,
pad,
bins: thresholds
}
)
Insert cell
Plot.plot({

y: { grid: true },
marks: [
// use bandwidth method to update efficiently without re-binning
Plot.areaY(pk.bandwidth(bandwidth), {x: 'x', y: 'y', fill: '#ccc' }),
Plot.dot(values1d, {x: d => d, y: 0, fill: 'black' })
],
width,
height: 250
})
Insert cell
import {howto, altplot} from "@d3/example-components"
Insert cell
kde = require("fast-kde")
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more