Public
Edited
Feb 3
Insert cell
Insert cell
Insert cell
lodash = require('lodash@4.17.21')
Insert cell
CensusData = FileAttachment("census2000-1.csv").csv()
Insert cell
printTable(CensusData)
Insert cell
Nominal Data: categorical data (i.e. countries, sex, gender); is value A the same or different than value B? (A = B)
- Yes, Data Analysis => compare categories(a = b?; a != b?)
- Yes, Visual Analysis => readily perceive which category looking at. use position, color hue (color - not perceptually ordered), shape.
-> i.e. green a, red b; star a, circle b
- No, Encoding Channel => using size channel might mislead.. suggesting rank-order or magnitude that does not exist

Ordinal Data: data with specific ordering (i.e. year); does value A come before or after value B? (A < B)
- Yes, Data Analysis => compare rank ordering of values (a < b?; a > b?)
- Yes, Visual Analysis => perceive sense of rank order. use position, seize, color value (brightness-perceptually valued/ordered)

Quantitative Data: measure numerical differences among values; 0 is meaningful (fertility, life expectancy)
- Subtypes:
(A) Interval Quantitative Data: what is the distance to value A from value B? (A - B)
- measure distance/interval between points (a - b); year, year 0 is subjective
(B) Ratio Quantitative Data: zero meaningful for calculating proportions - value A is what proportion of B? (A/B) = “A is 10% of B” or “B is 7 times larger than A”.
- zero point is meaningful, can measure proportions or scale factors
- Yes, Data Analysis => position, size, color value, etc; axis with 0 baseline is ESSENTIAL for proportional comparisons or ratio values, can be safely omitted for interval comparisons

Temporal: time units

Moreover, these data types do not provide a fixed categorization. For example, just because a data field is represented using a number doesn't mean we have to treat it as a quantitative type! We might interpret a set of ages (10 years old, 20 years old, etc.) as nominal (underage or overage), ordinal (grouped by year), or quantitative (calculate average age).

Input Data Types:
1. Sex: "1", "2"
- GIVEN -> Categorical/Nominal (N): Sex consists of two categories: (1) Male (2) Female (3)..
- EXPRESS -> categorical with more categories?
2. Year: "1900", "2000"
- GIVEN -> Interval Quant or Ordinal
- EXPRESS ->

3. Age: 0, 5, 10, ..
- GIVEN -> Interval Quant or Ordinal

4. People: "1064581"
- Given -> Quant (ratio)
6.
Insert cell
{

// Group by Year and Age, calculating totals
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);
// Transform the grouped data to calculate percentages
const withTotals = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
// Calculate total population for this age-year group
const total = lodash.sumBy(group, d => parseInt(d.People));
// Find population counts for each sex
const sex1 = lodash.find(group, {Sex: "1"})?.People || 0;
const sex2 = lodash.find(group, {Sex: "2"})?.People || 0;
return {
Year: year,
Age: age,
Sex1Percent: (parseInt(sex1) / total) * 100,
Sex2Percent: (parseInt(sex2) / total) * 100,
Total: total
};
});

// Further transform the data for visualization
const transformedData = withTotals.map(d => ({
Age: parseInt(d.Age),
Year: d.Year,
Sex1Percent: parseFloat(d.Sex1Percent.toFixed(2)),
Sex2Percent: parseFloat(d.Sex2Percent.toFixed(2)),
DiffFromEqual: parseFloat((d.Sex1Percent - 50).toFixed(2))
}));

// Create and render the chart
return vl.markLine()
.data(transformedData)
.encode(
vl.x().fieldQ('Age').title('Age Group'),
vl.y().fieldQ('DiffFromEqual').title('Difference from Equal Distribution (%)'),
vl.color().fieldN('Year')
)
.width(600)
.height(400)
.title('Change in Sex Distribution by Age (1900 vs 2000)')
.config({
axis: {
grid: true,
tickCount: 10
}
})
.render(); // This is the key addition for static rendering
}
Insert cell
vl.markBar()
.data(CensusData)
.transform(
// Filter only for 1900 and 2000 (if desired)
vl.filter('datum.Year === "1900" || datum.Year === "2000"'),
// Map the Sex field to include "Male", "Female", "Other", or "Missing"
vl.calculate(
'datum.Sex ? (datum.Sex === "1" ? "Male" : datum.Sex === "2" ? "Female" : "Other") : "Missing"'
).as("sex"),
// Convert People to a number for correct arithmetic
vl.calculate("toNumber(datum.People)").as("PeopleNum"),
// Calculate total People per Age and Year to compute percentages
vl.window([{ op: "sum", field: "PeopleNum", as: "TotalPeople" }])
.groupby(["Age", "Year"]),
// Calculate the percentage of each record within its Age-Year group
vl.calculate("datum.PeopleNum / datum.TotalPeople * 100").as("Percentage")
)
.encode(
vl.x().fieldO("Age").title("Age Group"),
vl.y().fieldQ("Percentage")
.stack("normalize") // This will stack the percentages to sum to 100%
.title("Percentage"),
// Use color to differentiate between sex categories, including Missing
vl.color().fieldN("sex")
.scale({ range: ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"] })
.title("Sex"),
vl.tooltip(["Year", "Age", "sex", "Percentage"])
)
// Optionally, facet by Year so you can compare 1900 and 2000 side by side
.facet(
vl.facet().fieldN("Year").title("Year")
)
.resolve({ scale: { x: "independent", y: "independent" } })
.config({
// Set facet cell dimensions if faceting
facet: { cell: { width: 250, height: 400 } }
})
.render();
Insert cell
vl.markPoint()
.data(CensusData)
.encode(
vl.x().fieldQ('People'),
vl.y().fieldT('Year')
)
.render()
Insert cell
vl.markBar()
.data(CensusData)
.encode(
vl.x().fieldQ('People').bin({maxbins: 20}),
vl.y().fieldT('Year')
)
.render()
Insert cell
{
// custom age axis to use between the mirrored plots
const customAxis = {
titleX: -10, titleY: -2, titleAngle: 0, titleAlign: 'center', // place title above
domain: false, ticks: false, // hide domain and tick lines
labelPadding: 10, labelAlign: 'center' // space and center axis labels
};
// generate a subplot specification
const plot = (sex, axis) => {
return vl
.markBar({ opacity: 0.4 })
.transform(vl.filter(`datum.Sex === "${sex}"`)) // filter to given value
.encode(
vl.x().sum('People')
.scale({ domain: [0, 12e6] })
.sort(axis ? 'ascending' : 'descending') // flip directions
.axis({ format: 's' })
.title(`${sex}s`),
vl.y().fieldO('Age')
.axis(axis) // pass in custom axis setting
.sort('descending')
.title('Age'),
vl.color().fieldN('sex')
.scale({ range: ['#675193', '#ca8861'] })
.title('Sex')
)
.width(250)
.height(350);
};

return vl
.data(CensusData)
.transform(
vl.calculate('datum.Sex === 1 ? "Male" : "Female"').as('Sex'),
vl.filter('datum.Year === 1900')
)
// .params(
// vl.param('Year').bind(viewof year) // bind year parameter to scrubber
// )
.hconcat(
plot('Female', null), // subplot with no age axis
plot('Male', customAxis) // subplot with custom age axis
)
.bounds('flush') // ignore axes when spacing subplots
.spacing(20) // place subplots 20 pixels apart
.render();
}
Insert cell
{
// Custom age axis to use between the mirrored plots
const customAxis = {
titleX: -10,
titleY: -2,
titleAngle: 0,
titleAlign: 'center', // Place title above
domain: false,
ticks: false, // Hide domain and tick lines
labelPadding: 10,
labelAlign: 'center' // Space and center axis labels
};
// Generate a subplot specification
const plot = (sex, axis) => {
return vl
.markBar({ opacity: 0.4 })
.transform(vl.filter(`datum.Sex === "${sex}"`)) // Filter to given value
.encode(
vl.x()
.sum('People')
.scale({ domain: [0, 12e6] })
.sort(axis ? 'ascending' : 'descending') // Flip directions
.axis({ format: 's' })
.title(`${sex}s`),
vl.y()
.fieldO('Age')
.axis(axis) // Pass in custom axis setting
.sort('descending')
.title('Age'),
vl.color()
.fieldN('Sex')
.scale({ range: ['#675193', '#ca8861'] })
.title('Sex')
)
.width(250)
.height(350);
};

return vl
.data(CensusData) // Ensure CensusData has fields: year, sex, people, age
.transform(
vl.calculate('datum.Sex === 1 ? "Male" : "Female"').as('sex'),
vl.filter('datum.Year === 2000') // Set a fixed year value
)
.hconcat(
plot('Female', null), // Subplot with no age axis
plot('Male', customAxis) // Subplot with custom age axis
)
.bounds('flush') // Ignore axes when spacing subplots
.spacing(20) // Place subplots 20 pixels apart
.render();
}

Insert cell
console.log(CensusData);
Insert cell

{
// Custom age axis to use between the mirrored plots
const customAxis = {
titleX: -10,
titleY: -2,
titleAngle: 0,
titleAlign: 'center', // Place title above
domain: false,
ticks: false, // Hide domain and tick lines
labelPadding: 10,
labelAlign: 'center' // Space and center axis labels
};
// Generate a subplot specification
const plot = (sex, axis) => {
return vl
.markBar({ opacity: 0.4 })
.transform(vl.filter(`datum.sex === "${sex}"`)) // Filter based on transformed `sex` field
.encode(
vl.x()
.sum('People')
.scale({ domain: [0, 12e6] })
.sort(axis ? 'ascending' : 'descending') // Flip directions
.axis({ format: 's' })
.title(`${sex}s`),
vl.y()
.fieldO('Age')
.axis(axis) // Pass in custom axis setting
.sort('descending')
.title('Age'),
vl.color()
.fieldN('sex') // Use the transformed `sex` field
.scale({ range: ['#675193', '#ca8861'] })
.title('Sex')
)
.width(250)
.height(350);
};

return vl
.data(CensusData) // Use the loaded and cleaned data
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Transform `Sex` to `sex`
vl.filter('datum.Year === "1900"') // Set a fixed year value (string)
)
.hconcat(
plot('Female', null), // Subplot with no age axis
plot('Male', customAxis) // Subplot with custom age axis
)
.bounds('flush') // Ignore axes when spacing subplots
.spacing(20) // Place subplots 20 pixels apart
.render();
}
Insert cell
{
// Custom age axis to use between the mirrored plots
const customAxis = {
titleX: -10,
titleY: -2,
titleAngle: 0,
titleAlign: 'center', // Place title above
domain: false,
ticks: false, // Hide domain and tick lines
labelPadding: 10,
labelAlign: 'center' // Space and center axis labels
};
// Generate a subplot specification
const plot = (sex, axis) => {
return vl
.markBar({ opacity: 0.7 }) // Increased opacity for better overlap visibility
.transform(vl.filter(`datum.sex === "${sex}"`)) // Filter based on transformed `sex` field
.encode(
vl.x()
.sum('People')
.scale({ domain: [0, 12e6] }) // Adjust population range if necessary
.sort(axis ? 'ascending' : 'descending') // Flip directions
.axis({ format: 's' })
.title(`${sex}s`),
vl.y()
.fieldO('Age')
.axis(axis) // Pass in custom axis setting
.sort('descending')
.title('Age'),
vl.color()
.fieldN('sex') // Use male/female for color encoding
.scale({ range: ['#1f77b4', '#ff7f0e'] }) // Male = blue, Female = orange
.title('Sex'),
vl.opacity()
.fieldN('Year') // Use year for opacity encoding
.scale({ range: [1, 0.4] }) // 1900 = lower opacity, 2000 = higher opacity
.title('Year')
)
.width(250)
.height(350);
};

return vl
.data(CensusData) // Use the loaded and cleaned data
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Transform `Sex` to `sex`
vl.filter('datum.Year === "1900" || datum.Year === "2000"') // Include data for both years
)
.hconcat(
plot('Female', null), // Subplot with no age axis
plot('Male', customAxis) // Subplot with custom age axis
)
.bounds('flush') // Ignore axes when spacing subplots
.spacing(20) // Place subplots 20 pixels apart
.render();
}
Insert cell

vl.layer(
// Main stacked bar chart
vl
.markBar()
.data(CensusData)
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Convert sex encoding
// vl.filter('datum.Year === "1900" || datum.Year === "2000"'), // Keep only selected years
vl.window([{ op: "sum", field: "People", as: "TotalPeople" }]).groupby(["Age", "Year"]), // Compute total per age group
vl.calculate("datum.People / datum.TotalPeople * 100").as("Percentage") // Convert to percentage
)
.encode(
vl.x()
.fieldO("Age") // Age groups on the y-axis
// .sort("descending")
.title("Age Group"),
vl.y()
.fieldQ("Percentage") // X-axis shows percentage
.stack(false) // Stack male and female to sum to 100%
.title("Percentage of Age Group"),
vl.color()
.fieldN("sex") // Color encodes male vs female
.scale({ range: ["#1f77b4", "#ff7f0e"] }) // Blue for males, orange for females
.title("Sex"),
vl.opacity()
.fieldN("Year") // Use year for opacity encoding
.scale({ range: [1, 0.2] }) // 1900 = lower opacity, 2000 = higher opacity
.title("Year"),
vl.tooltip(["Year", "Age", "sex", "Percentage"]) // Add tooltip to see values on hover
),

// 50% reference line
vl.markRule()
.encode(
vl.x().datum(50), // Draw at 50% on x-axis
vl.color().value("black"),
vl.size().value(1)
)
)
.width(500)
.height(400)
.render();

Insert cell
vl.markBar()
.data(CensusData)
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Convert sex encoding
vl.filter('datum.Year === "1900" || datum.Year === "2000"'), // Keep only selected years
vl.window([{ op: "sum", field: "People", as: "TotalPeople" }]).groupby(["Age", "Year"]), // Compute total per age group
vl.calculate("datum.People / datum.TotalPeople * 100").as("Percentage") // Convert to percentage
)
.encode(
vl.y()
.fieldO("Age") // Age groups on the y-axis
.sort("descending")
.title("Age Group"),
vl.x()
.fieldQ("Percentage") // X-axis shows percentage
.stack(true) // Stack male and female to sum to 100%
.title("Percentage of Age Group"),
vl.color()
.fieldN("sex") // Color encodes male vs female
.scale({ range: ["#1f77b4", "#ff7f0e"] }) // Blue for males, orange for females
.title("Sex"),
vl.opacity()
.fieldN("Year") // Use year for opacity encoding
.scale({ range: [0.4, 1] }) // 1900 = lower opacity, 2000 = higher opacity
.title("Year"),
vl.tooltip(["Year", "Age", "sex", "Percentage"]) // Add tooltip to see values on hover
)
.width(500)
.height(400)
.render();
Insert cell
{
return vl
.layer(
// Main stacked bar chart
vl
.markBar()
.data(CensusData)
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Convert sex encoding
vl.filter('datum.Year === "1900" || datum.Year === "2000"'), // Keep only selected years
vl.window([{ op: "sum", field: "People", as: "TotalPeople" }]).groupby(["Age", "Year"]), // Compute total per age group
vl.calculate("datum.People / datum.TotalPeople * 100").as("Percentage") // Convert to percentage
)
.encode(
vl.y()
.fieldO("Age") // Age groups on the y-axis
.sort("descending")
.title("Age Group"),
vl.x()
.fieldQ("Percentage") // X-axis shows percentage
.stack(true) // Stack male and female to sum to 100%
.title("Percentage of Age Group"),
vl.color()
.fieldN("sex") // Color encodes male vs female
.scale({ range: ["#377eb8", "#e78ac3"] }) // Blue for Male, Pink for Female
.title("Sex"),
vl.opacity()
.fieldN("Year") // Use year for opacity encoding
.scale({ range: [1, 0.2] }) // 1900 = lower opacity, 2000 = higher opacity
.title("Year"),
vl.tooltip(["Year", "Age", "sex", "Percentage"]) // Add tooltip to see values on hover
),

// 50% reference line
vl.markRule()
.encode(
vl.x().datum(50), // Draw at 50% on x-axis
vl.y().value(0), // Extend the line across the entire height
vl.y2().value(1), // Ensure the rule spans the entire visualization
vl.color().value("black"), // Make the line black
vl.size().value(2) // Make the line thicker for visibility
)
)
.width(500)
.height(400)
.render();
}

Insert cell
{

// First, let's calculate percentages for both males and females by age group
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);
// Transform data to get male and female percentages for each age group
const withPercentages = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const females = parseInt(lodash.find(group, {Sex: "2"})?.People || 0);
return {
Year: year,
Age: parseInt(age),
MalePercentage: (males / total * 100).toFixed(1),
FemalePercentage: (females / total * 100).toFixed(1)
};
});

// Filter for just the year 2000 and reshape data for visualization
const year2000Data = lodash.filter(withPercentages, {Year: "2000"});
// Create a long-format dataset where each row represents either male or female percentage
const longFormatData = [];
year2000Data.forEach(d => {
longFormatData.push({
Age: d.Age,
Sex: "Male",
Percentage: parseFloat(d.MalePercentage)
});
longFormatData.push({
Age: d.Age,
Sex: "Female",
Percentage: parseFloat(d.FemalePercentage)
});
});

// Sort by age to ensure proper line connection
const sortedData = lodash.sortBy(longFormatData, 'Age');

return vl.markLine()
.data(sortedData)
.encode(
vl.x().fieldQ('Age').title('Age Group'),
vl.y().fieldQ('Percentage').title('Percentage of Population'),
vl.color().fieldN('Sex').title('Sex')
.scale({
domain: ['Male', 'Female'],
range: ['#4477AA', '#EE6677'] // Using colorblind-friendly colors
})
)
.width(600)
.height(400)
.title('Male and Female Population Distribution by Age (2000)')
.config({
axis: {
grid: true
}
})
.render();
}
Insert cell
vl.layer([
// First layer: Main stacked bar chart
vl.markBar()
.data(CensusData)
.transform([
// Convert sex codes to readable labels
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'),
// Filter to show only years 1900 and 2000
vl.filter('datum.Year === "1900"'),
// || datum.Year === "2000"'),
// Calculate the total population for each age group within each year
vl.window([{
op: "sum",
field: "People",
as: "TotalPeople"
}]).groupby(["Age"]),
// "Year"]),
// Convert raw numbers to percentages of each age group
vl.calculate("datum.People / datum.TotalPeople * 100").as("Percentage")
])
.encode(
// Y-axis: Age groups in descending order
vl.y()
.fieldO("Age")
.sort("descending")
.title("Age Group"),
// X-axis: Percentage distribution
vl.x()
.fieldQ("Percentage")
.stack(false) // Ensures bars stack to 100%
.title("Percentage of Age Group"),
// Color encoding: Blue for females, orange for males
vl.color()
.fieldN("sex")
.scale({ range: ["#1f77b4", "#ff7f0e"] })
.title("Sex"),
// // Opacity: Different for each year to show comparison
// vl.opacity()
// .fieldN("Year")
// .scale({ range: [, 0.2] }) // 2000 more transparent than 1900
// .title("Year"),
// Interactive tooltips showing details on hover
vl.tooltip(["Year", "Age", "sex", "Percentage"])
),

// Second layer: Vertical reference line at 50%
vl.markRule()
.encode(
vl.x().datum(50), // Places line at 50% mark
vl.color().value("black"),
vl.size().value(1)
)
])
.width(500) // Set visualization width
.height(400) // Set visualization height
.render() // Render the visualization
Insert cell
{
const bar =
vl
.markBar()
.data(CensusData)
.transform(
vl.calculate('datum.Sex === "1" ? "Male" : "Female"').as('sex'), // Convert sex encoding
// vl.filter('datum.Year === "1900" || datum.Year === "2000"'), // Keep only selected years
vl.window([{ op: "sum", field: "People", as: "TotalPeople" }]).groupby(["Age", "Year"]), // Compute total per age group
vl.calculate("datum.People / datum.TotalPeople * 100").as("Percentage") // Convert to percentage
)
.encode(
vl.x()
.fieldO("Age") // Age groups on the y-axis
// .sort("descending")
.title("Age Group"),
vl.y()
.fieldQ("Percentage") // X-axis shows percentage
.stack(false) // Stack male and female to sum to 100%
.title("Percentage of Age Group"),
vl.color()
.fieldN("sex") // Color encodes male vs female
.scale({ range: ["#1f77b4", "#ff7f0e"] }) // Blue for males, orange for females
.title("Sex"),
vl.opacity()
.fieldN("Year") // Use year for opacity encoding
.scale({ range: [1, 0.2] }) // 1900 = lower opacity, 2000 = higher opacity
.title("Year"),
vl.tooltip(["Year", "Age", "sex", "Percentage"]) // Add tooltip to see values on hover
);


return vl.layer(bar).render();

}

Insert cell
{// First, let's calculate percentages for both males and females by age group
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);

// Transform data to get male and female percentages for each age group
const withPercentages = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const females = parseInt(lodash.find(group, {Sex: "2"})?.People || 0);
return {
Year: year,
Age: parseInt(age),
MalePercentage: (males / total * 100).toFixed(1),
FemalePercentage: (females / total * 100).toFixed(1)
};
});

// Create a long-format dataset for both years
const longFormatData = [];
withPercentages.forEach(d => {
// Add male percentage data point
longFormatData.push({
Age: d.Age,
Sex: "Male",
Year: d.Year,
Percentage: parseFloat(d.MalePercentage),
Category: `${d.Year} Male`
});
// Add female percentage data point
longFormatData.push({
Age: d.Age,
Sex: "Female",
Year: d.Year,
Percentage: parseFloat(d.FemalePercentage),
Category: `${d.Year} Female`
});
});

// Sort by age to ensure proper line connection
const sortedData = lodash.sortBy(longFormatData, ['Year', 'Age']);

return vl.markLine({
point: true, // Add points at each data point for better readability
strokeWidth: 2 // Make lines slightly thicker
})
.data(sortedData)
.encode(
vl.x().fieldQ('Age')
.title('Age Group')
.scale({zero: false}) // Start from the first age group
.axis({
grid: true,
tickMinStep: 5 // Show ticks every 5 years
}),
vl.y().fieldQ('Percentage')
.title('Percentage of Population')
.scale({
domain: [0, 100], // Force scale to go from 0 to 100
nice: true
})
.axis({
grid: true,
format: '.0f', // Remove decimal places
title: 'Percentage (%)'
}),
vl.color().fieldN('Category')
.title('Gender by Year')
.scale({
domain: ['1900 Male', '1900 Female', '2000 Male', '2000 Female'],
range: [
'#2E5C97', // Dark blue for 1900 Male
'#D64C66', // Deep rose for 1900 Female
'#66CCEE', // Light blue for 2000 Male
'#EE99AA' // Light rose for 2000 Female
]
}),
vl.tooltip([
{field: 'Age', title: 'Age'},
{field: 'Percentage', title: 'Percentage', format: '.1f'},
{field: 'Category', title: 'Group'}
])
)
.width(800)
.height(500)
.title({
text: 'Gender Distribution by Age: 1900 vs 2000',
fontSize: 16,
anchor: 'middle'
})
.config({
axis: {
gridColor: '#EEEEEE',
gridOpacity: 0.5,
labelFontSize: 12,
titleFontSize: 14
},
legend: {
orient: 'right',
labelFontSize: 12
},
point: {
size: 30, // Smaller points for cleaner look
filled: true
}
})
.render();}
Insert cell
{// Process the census data to calculate percentages
const processedData = CensusData.map(d => ({
Age: d.Age,
Gender: d.Sex === "1" ? "Male" : "Female",
Year: d.Year,
Value: parseInt(d.People)
}));

// Calculate percentages within each age group and year
const groupedByAge = _.groupBy(processedData, 'Age');
const percentageData = _.flatMap(groupedByAge, (group, age) => {
const byYear = _.groupBy(group, 'Year');
return _.flatMap(byYear, (yearGroup, year) => {
const total = _.sumBy(yearGroup, 'Value');
return yearGroup.map(g => ({
Age: age,
Gender: g.Gender,
Year: year,
Percentage: (g.Value / total * 100),
Category: `${g.Gender} ${year}`
}));
});
});

// Create a layered visualization
return vl.layer([
// Layer 1: 2000 data (shown on top with full opacity)
vl.markBar({
width: 20,
opacity: 1
})
.transform([
{filter: "datum.Year === '2000'"}
])
.encode(
vl.x().fieldO('Age').title('Age Group'),
vl.y().fieldQ('Percentage').title('Percentage of Age Group').scale({domain: [0, 100]}),
vl.color().fieldN('Category')
.scale({
domain: ['Male 2000', 'Female 2000'],
range: ['#4477AA', '#EE6677']
})
),
// Layer 2: 1900 data (shown underneath with transparency)
vl.markBar({
width: 20,
opacity: 0.6
})
.transform([
{filter: "datum.Year === '1900'"}
])
.encode(
vl.x().fieldO('Age'),
vl.y().fieldQ('Percentage'),
vl.color().fieldN('Category')
.scale({
domain: ['Male 1900', 'Female 1900'],
range: ['rgba(68,119,170,0.6)', 'rgba(238,102,119,0.6)']
})
)
])
.data(percentageData)
.width(800)
.height(500)
.config({
axis: {
grid: true,
gridColor: '#EEEEEE'
},
legend: {
title: 'Gender and Year',
orient: 'right'
},
view: {
stroke: null
}
})
.title({
text: 'Gender Distribution by Age Group: 1900 vs 2000',
fontSize: 16,
anchor: 'middle'
})
.render();}
Insert cell
{// Process the census data to calculate percentages
const processedData = CensusData.map(d => ({
Age: d.Age,
Gender: d.Sex === "1" ? "Male" : "Female",
Year: d.Year,
Value: parseInt(d.People)
}));

// Calculate percentages within each age group and year
const groupedByAge = _.groupBy(processedData, 'Age');
const percentageData = _.flatMap(groupedByAge, (group, age) => {
const byYear = _.groupBy(group, 'Year');
return _.flatMap(byYear, (yearGroup, year) => {
const total = _.sumBy(yearGroup, 'Value');
return yearGroup.map(g => ({
Age: age,
Gender: g.Gender,
Year: year,
Percentage: (g.Value / total * 100),
Category: `${g.Gender} ${year}`
}));
});
});

// Create a layered visualization with improved colors
return vl.layer([
// Layer 1: 2000 data (shown on top with full opacity)
// Using stronger, more saturated colors for the recent year
vl.markBar({
width: 20,
opacity: 1
})
.transform([
{filter: "datum.Year === '2000'"}
])
.encode(
vl.x().fieldO('Age').title('Age Group'),
vl.y().fieldQ('Percentage').title('Percentage of Age Group').scale({domain: [0, 100]}),
vl.color().fieldN('Category')
.scale({
domain: ['Male 2000', 'Female 2000'],
// Using more vibrant colors for 2000 data
range: ['#2E5C97', '#D64C66'] // Deeper blue and rose red
})
),
// Layer 2: 1900 data (shown underneath with pattern)
// Using lighter, desaturated colors for the historical year
vl.markBar({
width: 20,
opacity: 0.7 // Slightly increased opacity for better visibility
})
.transform([
{filter: "datum.Year === '1900'"}
])
.encode(
vl.x().fieldO('Age'),
vl.y().fieldQ('Percentage'),
vl.color().fieldN('Category')
.scale({
domain: ['Male 1900', 'Female 1900'],
// Using lighter, more transparent colors for 1900 data
range: ['rgba(147,180,220,0.8)', 'rgba(255,182,193,0.8)'] // Light blue and light pink
})
)
])
.data(percentageData)
.width(800)
.height(500)
.config({
axis: {
grid: true,
gridColor: '#EEEEEE',
labelFontSize: 12, // Improved readability of axis labels
titleFontSize: 14 // Improved readability of axis titles
},
legend: {
title: 'Gender and Year',
orient: 'right',
labelFontSize: 12 // Improved readability of legend labels
},
view: {
stroke: null
}
})
.title({
text: 'Gender Distribution by Age Group: 1900 vs 2000',
fontSize: 16,
anchor: 'middle',
font: 'Arial'
})
.render();}
Insert cell
{// First, let's transform our census data into a format suitable for a heatmap.
// We'll calculate the percentage of males for each age group and year, as this will
// give us a single value to represent the gender distribution (since females = 100 - males)
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);

// Transform the data to calculate male percentages for each age-year combination
const heatmapData = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const malePercentage = (males / total * 100).toFixed(1);
return {
Year: year,
Age: parseInt(age),
// Using male percentage as our heatmap value
Value: parseFloat(malePercentage)
};
});

// Sort the data to ensure consistent ordering
const sortedData = lodash.sortBy(heatmapData, ['Year', 'Age']);

// Create the heatmap visualization
return vl.markRect()
.data(sortedData)
.encode(
// X-axis represents age groups
vl.x().fieldQ('Age')
.title('Age Group')
.axis({
grid: true,
tickMinStep: 5,
labelFontSize: 12
}),
// Y-axis shows the two years we're comparing
vl.y().fieldN('Year')
.title('Census Year')
.axis({
labelFontSize: 12
}),
// Color encoding represents the percentage of males
vl.color().fieldQ('Value')
.title('Percentage Male')
.scale({
domain: [0, 100],
// Using a diverging color scheme centered at 50%
range: ['#EE6677', '#FFFFFF', '#4477AA'],
// Center the color scale at 50% for balanced visualization
domainMid: 50
})
.legend({
title: 'Percentage Male',
format: '.0f',
labelFontSize: 12
}),
// Add tooltips for detailed information
vl.tooltip([
{field: 'Age', title: 'Age Group'},
{field: 'Year', title: 'Year'},
{field: 'Value', title: 'Male Percentage', format: '.1f'},
// Calculate and show female percentage in tooltip
{
expr: "100 - datum.Value",
title: 'Female Percentage',
format: '.1f'
}
])
)
.width(800)
.height(200) // Reduced height since we only have two years
.title({
text: 'Gender Distribution by Age Group and Year',
fontSize: 16,
anchor: 'middle'
})
.config({
view: {
stroke: null
},
axis: {
domain: true,
tickColor: '#888'
}
})
.render();}
Insert cell
{
// First, process the census data with improved data transformation
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);

// Transform data with enhanced precision
const withPercentages = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const females = parseInt(lodash.find(group, {Sex: "2"})?.People || 0);
return {
Year: year,
Age: parseInt(age),
MalePercentage: (males / total * 100).toFixed(1),
FemalePercentage: (females / total * 100).toFixed(1)
};
});

// Create the long-format dataset with improved structure
const longFormatData = [];
withPercentages.forEach(d => {
longFormatData.push({
Age: d.Age,
Sex: "Male",
Year: d.Year,
Percentage: parseFloat(d.MalePercentage),
Category: `${d.Year} Male`
});
longFormatData.push({
Age: d.Age,
Sex: "Female",
Year: d.Year,
Percentage: parseFloat(d.FemalePercentage),
Category: `${d.Year} Female`
});
});

// Sort data properly to ensure line continuity
const sortedData = lodash.sortBy(longFormatData, ['Category', 'Age']);

// Create layered visualization with separate line and point layers
const base = vl.data(sortedData)
.encode(
vl.x().fieldQ('Age')
.scale({zero: false})
.axis({
grid: true,
tickMinStep: 5,
title: 'Age (years)',
labelFontSize: 11
}),
vl.y().fieldQ('Percentage')
.scale({domain: [0, 100]})
.axis({
grid: true,
title: 'Percentage (%)',
labelFontSize: 11
}),
vl.color().fieldN('Category')
.scale({
domain: ['1900 Male', '1900 Female', '2000 Male', '2000 Female'],
range: [
'#A8C8E8', // Light blue for 1900 Male
'#FFAFAF', // Light red for 1900 Female
'#1B4B82', // Dark blue for 2000 Male
'#B22222' // Dark red for 2000 Female
]
})
.title('Population Groups')
);

// Layer the visualization with lines and points
return vl.layer(
base.mark({
type: 'line',
strokeWidth: 2.5,
interpolate: 'linear'
}),
base.mark({
type: 'point',
size: 50,
filled: true
})
.encode(
vl.shape().field('Sex')
.scale({
domain: ['Male', 'Female'],
range: ['square', 'circle']
})
)
)
.width(800)
.height(500)
.title({
text: 'Has the distribution of sex across age groups changed from 1900 to 2000?',
subtitle: 'Showing percentage of population by gender across age groups, a century apart',
fontSize: 16,
subtitleFontSize: 12
})
.config({
axis: {
gridColor: '#EEEEEE',
gridOpacity: 0.5
},
legend: {
orient: 'right',
labelFontSize: 11
}
})
.render();}
Insert cell
{
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);
// Transform data with enhanced precision
const processedData = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const females = parseInt(lodash.find(group, {Sex: "2"})?.People || 0);
return {
Year: year,
Age: parseInt(age),
MalePercentage: (males / total * 100).toFixed(1),
FemalePercentage: (females / total * 100).toFixed(1)
};
});

// Create long-format dataset with unified shape and color encoding
const longFormatData = [];
processedData.forEach(d => {
longFormatData.push({
Age: d.Age,
Year: d.Year,
Percentage: parseFloat(d.MalePercentage),
ShapeColor: `${d.Year} Male`
});
longFormatData.push({
Age: d.Age,
Year: d.Year,
Percentage: parseFloat(d.FemalePercentage),
ShapeColor: `${d.Year} Female`
});
});

// Sort data for line continuity
const sortedData = lodash.sortBy(longFormatData, ['ShapeColor', 'Age']);

// Create visualization with unified legend
const base = vl.data(sortedData)
.encode(
vl.x().fieldQ('Age')
.scale({zero: false})
.axis({
grid: true,
tickMinStep: 5,
title: 'Age (years)',
labelFontSize: 11
}),
vl.y().fieldQ('Percentage')
.scale({domain: [0, 100]})
.axis({
grid: true,
title: 'Percentage (%)',
labelFontSize: 11
}),
vl.color().fieldN('ShapeColor')
.scale({
domain: ['1900 Male', '1900 Female', '2000 Male', '2000 Female'],
range: [
'#A8C8E8', // Light blue for 1900 Male
'#FFAFAF', // Light red for 1900 Female
'#1B4B82', // Dark blue for 2000 Male
'#B22222' // Dark red for 2000 Female
]
})
.title('Sex by Year'),
vl.shape().fieldN('ShapeColor')
.scale({
domain: ['1900 Male', '1900 Female', '2000 Male', '2000 Female'],
range: ['square', 'circle', 'square', 'circle']
})
);

return vl.layer(
base.mark({
type: 'line',
strokeWidth: 2.5,
interpolate: 'linear'
}),
base.mark({
type: 'point',
size: 50,
filled: true
})
)
.width(800)
.height(500)
.title({
text: 'Has the distribution of sex across age groups evolved from 1900 to 2000?',
fontSize: 16,
subtitleFontSize: 12
})
.config({
axis: {
gridColor: '#EEEEEE',
gridOpacity: 0.5
},
legend: {
orient: 'right',
labelFontSize: 11
}
})
.render();
}
Insert cell
{// Data processing remains the same
const groupedData = lodash.groupBy(CensusData, d => `${d.Year}-${d.Age}`);

const withPercentages = lodash.map(groupedData, (group, key) => {
const [year, age] = key.split('-');
const total = lodash.sumBy(group, d => parseInt(d.People));
const males = parseInt(lodash.find(group, {Sex: "1"})?.People || 0);
const females = parseInt(lodash.find(group, {Sex: "2"})?.People || 0);
return {
Year: year,
Age: parseInt(age),
MalePercentage: (males / total * 100).toFixed(1),
FemalePercentage: (females / total * 100).toFixed(1)
};
});

const longFormatData = [];
withPercentages.forEach(d => {
longFormatData.push({
Age: d.Age,
Sex: "Male",
Year: d.Year,
Percentage: parseFloat(d.MalePercentage),
Category: `${d.Year} Male`
});
longFormatData.push({
Age: d.Age,
Sex: "Female",
Year: d.Year,
Percentage: parseFloat(d.FemalePercentage),
Category: `${d.Year} Female`
});
});

const sortedData = lodash.sortBy(longFormatData, ['Category', 'Age']);

// Common encoding specifications
const baseEncoding = (scale) => vl.data(sortedData)
.encode(
vl.x().fieldQ('Age')
.scale({zero: false})
.axis({
grid: true,
tickMinStep: 5,
title: 'Age (years)',
labelFontSize: 11
}),
vl.y().fieldQ('Percentage')
.scale(scale)
.axis({
grid: true,
labelFontSize: 11
}),
vl.color().fieldN('Category')
.scale({
domain: ['1900 Male', '1900 Female', '2000 Male', '2000 Female'],
range: [
'#A8C8E8', // Light blue for 1900 Male
'#FFAFAF', // Light red for 1900 Female
'#1B4B82', // Dark blue for 2000 Male
'#B22222' // Dark red for 2000 Female
]
})
.title('Gender and Year')
);

// Create both visualizations side by side
const percentageScale = {domain: [0, 100], type: 'linear'};
const logScale = {type: 'log', domain: [1, 100]};

return vl.hconcat(
// Percentage scale visualization
vl.layer(
baseEncoding(percentageScale).mark({
type: 'line',
strokeWidth: 2.5,
interpolate: 'linear'
}),
baseEncoding(percentageScale).mark({
type: 'point',
size: 100,
filled: true
})
.encode(
vl.shape().field('Sex')
.scale({
domain: ['Male', 'Female'],
range: ['square', 'circle']
})
)
)
.width(400)
.height(400)
.title({
text: 'Linear Percentage Scale',
subtitle: '0-100% scale',
fontSize: 14,
subtitleFontSize: 11
}),

// Logarithmic scale visualization
vl.layer(
baseEncoding(logScale).mark({
type: 'line',
strokeWidth: 2.5,
interpolate: 'linear'
}),
baseEncoding(logScale).mark({
type: 'point',
size: 100,
filled: true
})
.encode(
vl.shape().field('Sex')
.scale({
domain: ['Male', 'Female'],
range: ['square', 'circle']
})
)
)
.width(400)
.height(400)
.title({
text: 'Logarithmic Scale',
subtitle: 'Log scale emphasizes relative changes',
fontSize: 14,
subtitleFontSize: 11
})
)
.config({
axis: {
gridColor: '#EEEEEE',
gridOpacity: 0.5
},
legend: {
orient: 'right',
labelFontSize: 11
}
})
.render();}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more