Public
Edited
Oct 12, 2023
Paused
Importers
7 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
/**
* Gives the coefficient that describes the number of people
* still living from that year with the specified sex.
*/
function survivalCoefficient(year, sex) {
year = Number(year);
const hasSex = _.reverse(_.sortBy(lifetable, (d) => d.year)).filter(
(d) => d.sex === sex
);
// Linearly interpolate between years with no data
const left = hasSex.find((d) => d.year <= year);
const right = hasSex.find((d) => d.year <= year + 10);
const delta = right.survival - left.survival;
const progress = (year - left.year) / 10;

return (left.survival + delta * progress) / 100_000;
}
Insert cell
Insert cell
interpolatedLifetable = {
const acc = [];
for (const year of d3.range(DATA_YEAR_START, DATA_YEAR_END + 1)) {
for (const sex of ["M", "F"]) {
acc.push({
year,
sex,
survival: survivalCoefficient(year, sex)
});
}
}

return acc;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
duckDb = DuckDBClient.of({
babynames: FileAttachment("babynames-3.parquet"),
lifetable: interpolatedLifetable
})
Insert cell
Insert cell
Insert cell
topNames = duckDb
.query(
`SELECT name FROM (${ROLLUP_QUERY}) ORDER BY totalAlive DESC LIMIT 10000`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
topMaleNames = duckDb
.query(
`
SELECT name
FROM
(SELECT * FROM (${ROLLUP_QUERY})
WHERE sex = 'M' AND totalAlive > ${aliveThreshold}
ORDER BY totalAlive DESC
LIMIT 25)
ORDER BY averageYear
`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
topFemaleNames = duckDb
.query(
`
SELECT name
FROM
(SELECT * FROM (${ROLLUP_QUERY})
WHERE sex = 'F' AND totalAlive > ${aliveThreshold}
ORDER BY totalAlive DESC
LIMIT 25)
ORDER BY averageYear`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
youngestMaleNames = duckDb
.query(
`SELECT name
FROM (${ROLLUP_QUERY})
WHERE sex = 'M' AND totalAlive > ${aliveThreshold}
ORDER BY averageYear DESC NULLS LAST
LIMIT 25;`).then((rows) => rows.map((row) => row.name))
Insert cell
oldestMaleNames = duckDb
.query(
`SELECT name
FROM (${ROLLUP_QUERY})
WHERE sex = 'M' AND totalAlive > ${aliveThreshold}
ORDER BY averageYear ASC NULLS LAST
LIMIT 25;
`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
youngestFemaleNames =
duckDb.query(
`SELECT name
FROM (${ROLLUP_QUERY})
WHERE sex = 'F' AND totalAlive > ${aliveThreshold}
ORDER BY averageYear DESC NULLS LAST
LIMIT 25;
`
).then((rows) => rows.map((row) => row.name))
Insert cell
oldestFemaleNames = duckDb
.query(
`
SELECT name
FROM (${ROLLUP_QUERY})
WHERE sex = 'F' AND totalAlive > ${aliveThreshold}
ORDER BY averageYear ASC NULLS LAST
LIMIT 25;
`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
Insert cell
topMaleVariance = duckDb
.query(
`
SELECT name
FROM (${VARIANCE_QUERY})
WHERE sex = 'M'
ORDER BY variance ASC NULLS LAST
LIMIT 25
`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
topFemaleVariance = duckDb
.query(
`
SELECT name
FROM (${VARIANCE_QUERY})
WHERE sex = 'F'
ORDER BY variance ASC NULLS LAST
LIMIT 25
`
)
.then((rows) => rows.map((row) => row.name))
Insert cell
Insert cell
getMedian = (nameData) => {
if (!nameData.length) return new Date("1970-01-01");

const totalLiving = _.sumBy(nameData, (d) => d.alive);
const index = d3
.cumsum(nameData.map((d) => d.alive))
.findIndex((d) => d >= totalLiving / 2);
return nameData[index].date;
}
Insert cell
Insert cell
tooltip = ({ year, born, alive }) =>
`${year}\nBorn: ${born.toFixed(0)}\nLiving: ${(alive || NaN).toFixed(0)}`
Insert cell
Insert cell
nameFigure = async ({ name, sexes }) => {
const plots = await (sexes
? Promise.all(sexes.map((sex) => plotName({ name, sex })))
: plotName({ name }));

return htl.html`
<div>
<h2 style="font-family: var(--sans-serif)">Age distribution of Americans named ${name}</h2>
<h3 style="font-family: var(--sans-serif)">By year of birth</h3>
${plots}
<figcaption>Source: <a href="https://www.ssa.gov/oact/babynames/limits.html">Social Security Administration</figcaption>
</div>`;
}
Insert cell
getNameData = async ({ name, sex }) => {
return duckDb.query(`
SELECT
babynames.sex,
make_date(babynames.year, 1, 1) as date,
babynames.born,
(lifetable.survival * babynames.born) as alive
FROM
lifetable JOIN babynames ON (lifetable.year = babynames.year AND lifetable.sex = babynames.sex)
WHERE
babynames.name = '${name}'
AND babynames.sex = '${sex}'`);
}
Insert cell
plotName = async ({ name, sex, caption }) => {
const nameData = await getNameData({name, sex})

return Plot.plot({
caption,
style: "overflow: visible;",
width: 800,
tip: true,
height: sex ? 350 : 700,
facet: {
data: nameData,
y: "sex"
},
color: {
domain: ["M", "F"],
scheme: "BuRd"
},
x: {
domain: xDomain
},
y: {
label: "↑ count"
},
grid: true,
marks: [
Plot.ruleY([0]),
Plot.line(nameData, {
clip: true,
x: "date",
y: "born",
tip: true,
title: tooltip
}),
Plot.areaY(nameData, {
x: "date",
y: "alive",
fill: "sex",
fillOpacity: 0.6
}),
Plot.text(
nameData,
Plot.selectMaxY({
x: "date",
frameAnchor: "middle",
y: "born",
stroke: "white",
fill: "black",
text: (d) => `Number of ${EXPANDED_SEX[d.sex]} ${name}s
born each year`
})
),

Plot.ruleX(
nameData,
Plot.select(
{
// find the median cumsum
// https://github.com/observablehq/plot/blob/main/README.md#plotselectselector-options
y: (I, V) => {
const sum = d3.sum(I, (i) => V[i]);
const i = d3
.cumsum(I, (i) => V[i])
.findIndex((d) => d >= sum / 2);
return [I[i]];
}
},
{
x: "date",
y: "alive",
stroke: "sex",
strokeWidth: 4
}
)
),
Plot.text(
nameData,
Plot.selectMaxY({
x: "date",
frameAnchor: "bottom",
y: (d) => d.alive * 0.1,
stroke: "white",
fill: "sex",
opacity: 0.9,
text: (d) => `Number of ${EXPANDED_SEX[d.sex]} ${name}s born each year
estimated to be alive on Jan 1 ${DATA_YEAR_END + 1}`
})
),
Plot.text(
nameData,
Plot.select(
{
// find the median cumsum
// https://github.com/observablehq/plot/blob/main/README.md#plotselectselector-options
y: (I, V) => {
const sum = d3.sum(I, (i) => V[i]);
const i = d3
.cumsum(I, (i) => V[i])
.findIndex((d) => d >= sum / 2);
return [I[i]];
}
},
{
x: "date",
frameAnchor: "top-right",
y: "alive",
stroke: "white",
fill: "sex",
text: (d) =>
`The median living ${EXPANDED_SEX[d.sex]} ${name}
in ${DATA_YEAR_END} was ${DATA_YEAR_END - d.date.getFullYear()}`
}
)
)
]
});
}
Insert cell
Insert cell
Insert cell
plotSpread = ({ names, sex }) => {
const nameData = babynames
.filter((d) => names.includes(d.name) && d.sex === sex)
.map((d) => ({
...d,
alive: survivalCoefficient(d.year, d.sex) * d.born,
date: new Date(d.year + "-01-01")
}));

return Plot.plot({
grid: true,
height: 600,
width: 400,
facet: {
data: nameData,
y: "name",
marginRight: 100
},
fy: {
domain: names
},
x: {
domain: xDomain
},
y: {
ticks: false
},
color: {
domain: ["M", "F"],
scheme: "BuRd"
},
marginLeft: 100,
marks: [
Plot.line(nameData, {
clip: true,
x: "date",
y: "born",
tip: true,
title: tooltip
}),
Plot.areaY(nameData, {
x: "date",
y: "alive",
fill: "sex"
}),
Plot.dotX(
nameData,
Plot.select(
{
// find the median cumsum
// https://github.com/observablehq/plot/blob/main/README.md#plotselectselector-options
y: (I, V) => {
const sum = d3.sum(I, (i) => V[i]);
const i = d3
.cumsum(I, (i) => V[i])
.findIndex((d) => d >= sum / 2);
return [I[i]];
}
},
{
x: "date",
y: "alive",
stroke: "white",
fill: "red",
title: (d) => `Median: ${d.year}`
}
)
)
]
});
}
Insert cell
spreadFigure = ({ title, maleNames, femaleNames }) => {
return htl.html`
<div>
<h2 style="font-family: var(--sans-serif)">${title}</h2>
<h3 style="font-family: var(--sans-serif)">Among Americans estimated to be alive as of Jan. 1, ${
DATA_YEAR_END + 1
}</h3>

<div style="display: flex">
${plotSpread({ names: femaleNames, sex: "F" })}
${plotSpread({ names: maleNames, sex: "M" })}
</div>

<figcaption>Source: <a href="https://www.ssa.gov/oact/babynames/limits.html">Social Security Administration</figcaption>
</div>`;
}
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more