Public
Edited
Apr 16
2 forks
Insert cell
Insert cell
d3 = require('d3@7')
Insert cell
// Label encoding qualitative variables
function encodeCategory(value, categories) {
const index = categories.indexOf(value);
return index !== 0 ? index : 0; // Return 0 if the category doesn't exist
}
Insert cell
// Scale correlation to the range [-1, 1]
scaleCorrelation = (correlation) => {
const scale = d3.scaleLinear()
.domain([-5, 5])
.range([-1, 1]);

return scale(correlation);
}
Insert cell
data = d3.csv('https://raw.githubusercontent.com/balleromair12/DataViz_finalproject/main/heart_2020_cleaned.csv', d => {
return {
heartDisease: d.HeartDisease === 'Yes' ? 1 : 0, // Heart disease
bmi: +d.BMI, // Body Mass Index
smoking: d.Smoking === 'Yes' ? 1 : 0, // Smoking
alcoholDrinking: d.AlcoholDrinking === 'Yes' ? 1 : 0, // Alcohol Drinking
stroke: d.Stroke === 'Yes' ? 1 : 0, // Stroke
physicalHealth: +d.PhysicalHealth, // Physical health condition
mentalHealth: +d.MentalHealth, // Mental health condition
diffWalking: d.DiffWalking === 'Yes' ? 1 : 0, // Difficulty walking
sex: d.Sex === 'Male' ? 1 : 0, // Sex
// Label encoding for AgeCategory
ageCategory: encodeCategory(d.AgeCategory, ['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older']),
// Label encoding for Race
raceCategory: encodeCategory(d.Race, ['American Indian/Alaskan Native', 'White', 'Black', 'Asian', 'Hispanic', 'Other']),
diabetic: d.Diabetic === 'Yes' ? 1 : 0, // Diabetic
physicalActivity: d.PhysicalActivity === 'Yes' ? 1 : 0, // Physical Activity
// Label encoding for GenHealth
genHealth: encodeCategory(d.GenHealth, ['Excellent', 'Very Good', 'Good', 'Fair', 'Poor']),
sleepTime: +d.SleepTime, // Sleep time
asthma: d.Asthma === 'Yes' ? 1 : 0, // Asthma
kidneyDisease: d.KidneyDisease === 'Yes' ? 1 : 0, // Kidney Disease
skinCancer: d.SkinCancer === 'Yes' ? 1 : 0, // Skin Cancer
};
});

Insert cell
// Compute point-biserial correlation
pointBiserialCorrelation = (data, target, variable) => {
const targetValues = data.map(d => d[target]);
const variableValues = data.map(d => d[variable]);

// Separate the variable values based on target variable
const targetGroup1 = variableValues.filter((_, index) => targetValues[index] === 1);
const targetGroup0 = variableValues.filter((_, index) => targetValues[index] === 0);

// Mean and standard deviation of the groups
const mean1 = d3.mean(targetGroup1);
const mean0 = d3.mean(targetGroup0);
const stdDev1 = d3.deviation(targetGroup1);
const stdDev0 = d3.deviation(targetGroup0);

if (stdDev1 === 0 || stdDev0 === 0) {
return 0; // If any group has no variance, return 0 correlation
}

// Calculate point-biserial correlation coefficient
let correlation = 0
const denom = Math.sqrt((targetGroup1.length * targetGroup0.length) / (data.length * (targetGroup1.length + targetGroup0.length)) * (Math.pow(stdDev1, 2) + Math.pow(stdDev0, 2)))

// Handle edge cases when correlation is undefined or infinite
if (denom != 0) {
correlation = (mean1 - mean0) / denom;
}

if (Number.isNaN(correlation) || !Number.isFinite(correlation)) {
correlation = 0;
}

return scaleCorrelation(correlation);
}
Insert cell
// Research question 1: factors correlations to diseases
{
const chartWidth = 1000;
const chartHeight = 600;
const margin = { top: 80, right: 110, bottom: 60, left: 90 };
const targets = ['heartDisease', 'kidneyDisease', 'skinCancer'];
const factors = ['bmi', 'smoking', 'alcoholDrinking', 'stroke', 'physicalHealth',
'mentalHealth', 'diffWalking', 'sex', 'ageCategory', 'raceCategory',
'diabetic', 'physicalActivity', 'genHealth', 'sleepTime', 'asthma'];

// Calculate the correlation matrix
const correlationMatrix = [];
targets.forEach(target => {
factors.forEach(factor => {
const correlation = pointBiserialCorrelation(data, target, factor);
correlationMatrix.push({target, factor, correlation});
});
});

// Set the scales for the x and y axes
const xScale = d3.scaleBand()
.domain(targets)
.range([margin.left, chartWidth - margin.right])
.padding(0.05);

const yScale = d3.scaleBand()
.domain(factors)
.range([margin.top, chartHeight - margin.bottom])
.padding(0.05);

// Define a color scale for correlations
const colorScale = d3.scaleSequential(d3.interpolateRdBu)
.domain([1, -1]);

// Make the labels easier to read
const labelMap = {heartDisease: 'Heart Disease',
bmi: 'BMI',
smoking: 'Smoking',
alcoholDrinking: 'Alcohol Drinking',
stroke: 'Stroke',
physicalHealth: 'Physical Health',
mentalHealth: 'Mental Health',
diffWalking: 'Difficulty Walking',
sex: 'Sex',
ageCategory: 'Age Category',
raceCategory: 'Race',
diabetic: 'Diabetic',
physicalActivity: 'Physical Activity',
genHealth: 'General Health',
sleepTime: 'Sleep Time',
asthma: 'Asthma',
kidneyDisease: 'Kidney Disease',
skinCancer: 'Skin Cancer'
};

// Transform the label
const formatLabel = (label) => labelMap[label] || label;

// Create the SVG element for the heatmap
const svg = d3.create('svg')
.attr('width', chartWidth)
.attr('height', chartHeight);

// Create cells for the heatmap
svg.selectAll('rect')
.data(correlationMatrix)
.enter()
.append('rect')
.attr('x', d => xScale(d.target))
.attr('y', d => yScale(d.factor))
.attr('width', xScale.bandwidth())
.attr('height', yScale.bandwidth())
.attr('fill', d => colorScale(d.correlation)); // Set color based on correlation value

// Add labels for the correlation values inside the cells
svg.selectAll('text')
.data(correlationMatrix)
.enter()
.append('text')
.attr('x', d => xScale(d.target) + xScale.bandwidth() / 2)
.attr('y', d => yScale(d.factor) + yScale.bandwidth() / 2)
.attr('dy', '.35em')
.attr('text-anchor', 'middle')
.text(d => d3.format('.2f')(d.correlation)) // Round to 2 decimal places
.attr('fill', 'black');

// Disease names on the x-axis
svg.append('g')
.attr('transform', `translate(0, ${margin.top})`)
.call(d3.axisTop(xScale))
.selectAll('text')
.text(d => formatLabel(d));

// Factors on the y-axis
svg.append('g')
.attr('transform', `translate(${margin.left}, 0)`)
.call(d3.axisLeft(yScale))
.selectAll('text')
.text(d => formatLabel(d));

return svg.node();
}
Insert cell
// Research question 4: disease co-occurrences. From the chart it looks like if you have heart disease, you likely have kidney disease as well.
{
const chartWidth = 1000;
const chartHeight = 600;
const margin = { top: 80, right: 110, bottom: 60, left: 90 };
const targets = ['heartDisease', 'kidneyDisease', 'skinCancer'];
const factors = ['heartDisease', 'kidneyDisease', 'skinCancer'];

// Calculate the correlation matrix
const correlationMatrix = [];
targets.forEach(target => {
factors.forEach(factor => {
const correlation = pointBiserialCorrelation(data, target, factor);
correlationMatrix.push({target, factor, correlation});
});
});

// Set the scales for the x and y axes
const xScale = d3.scaleBand()
.domain(targets)
.range([margin.left, chartWidth - margin.right])
.padding(0.05);

const yScale = d3.scaleBand()
.domain(factors)
.range([margin.top, chartHeight - margin.bottom])
.padding(0.05);

// Define a color scale for correlations
const colorScale = d3.scaleSequential(d3.interpolateRdBu)
.domain([1, -1]);

// Make the lables easier to read
const labelMap = {heartDisease: 'Heart Disease',
bmi: 'BMI',
smoking: 'Smoking',
alcoholDrinking: 'Alcohol Drinking',
stroke: 'Stroke',
physicalHealth: 'Physical Health',
mentalHealth: 'Mental Health',
diffWalking: 'Difficulty Walking',
sex: 'Sex',
ageCategory: 'Age Category',
raceCategory: 'Race',
diabetic: 'Diabetic',
physicalActivity: 'Physical Activity',
genHealth: 'General Health',
sleepTime: 'Sleep Time',
asthma: 'Asthma',
kidneyDisease: 'Kidney Disease',
skinCancer: 'Skin Cancer'
};

// Transform the label
const formatLabel = (label) => labelMap[label] || label;

// Create the SVG element for the heatmap
const svg = d3.create('svg')
.attr('width', chartWidth)
.attr('height', chartHeight);

// Create cells for the heatmap
svg.selectAll('rect')
.data(correlationMatrix)
.enter()
.append('rect')
.attr('x', d => xScale(d.target))
.attr('y', d => yScale(d.factor))
.attr('width', xScale.bandwidth())
.attr('height', yScale.bandwidth())
.attr('fill', d => colorScale(d.correlation)); // Set color based on correlation value

// Add labels for the correlation values inside the cells
svg.selectAll('text')
.data(correlationMatrix)
.enter()
.append('text')
.attr('x', d => xScale(d.target) + xScale.bandwidth() / 2)
.attr('y', d => yScale(d.factor) + yScale.bandwidth() / 2)
.attr('dy', '.35em')
.attr('text-anchor', 'middle')
.text(d => d3.format('.2f')(d.correlation)) // Round to 2 decimal places
.attr('fill', 'black');

// Disease names on the x-axis
svg.append('g')
.attr('transform', `translate(0, ${margin.top})`)
.call(d3.axisTop(xScale))
.selectAll('text')
.text(d => formatLabel(d));

// Factors on the y-axis
svg.append('g')
.attr('transform', `translate(${margin.left}, 0)`)
.call(d3.axisLeft(yScale))
.selectAll('text')
.text(d => formatLabel(d));

return svg.node();
}
Insert cell
Insert cell
viewof bmi = Inputs.range([10, 50], {label: 'BMI', step: 0.1, value: 25})
Insert cell
viewof smoking = Inputs.checkbox(['Smoking?'], {value: []})
Insert cell
viewof alcoholDrinking = Inputs.checkbox(['Alcohol Drinking?'], {value: []})
Insert cell
viewof stroke = Inputs.checkbox(['Stroke History?'], {value: []})
Insert cell
viewof physicalHealth = Inputs.range([0, 30], {label: 'Physical Health Status (smaller value indicates better status)', step: 1, value: 5})
Insert cell
viewof mentalHealth = Inputs.range([0, 30], {label: 'Mental Health Status (smaller value indicates better status)', step: 1, value: 5})
Insert cell
viewof diffWalking = Inputs.checkbox(['Difficulty Walking?'], {value: []})
Insert cell
viewof sex = Inputs.select(['Male', 'Female'], {label: 'Sex'})
Insert cell
viewof ageCategory = Inputs.select(['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'], {label: 'Age Category'})
Insert cell
viewof raceCategory = Inputs.select(['American Indian/Alaskan Native', 'White', 'Black', 'Asian', 'Hispanic', 'Other'], {label: 'Race'})
Insert cell
viewof diabetic = Inputs.checkbox(['Diabetic?'], {value: []})
Insert cell
viewof physicalActivity = Inputs.checkbox(['Physically Active?'], {value: []})
Insert cell
viewof genHealth = Inputs.select(['Excellent', 'Very Good', 'Good', 'Fair', 'Poor'], {label: 'General Health'})
Insert cell
viewof sleepTime = Inputs.range([0, 24], {label: 'Sleep Time', step: 1, value: 1})
Insert cell
viewof asthma = Inputs.checkbox(['Has Asthma?'], {value: []})
Insert cell
// Sigmoid function
function sigmoid(z) {
return 1 / (1 + Math.exp(-z));
}
Insert cell
// Normalization
function normalize(value, min, max) {
return (value - min) / (max - min);
}
Insert cell
// Prediction function using dot product
function predict(input, weights, bias = 0) {
let z = bias;
for (let i = 0; i < input.length; i++) {
z += input[i] * weights[i];
}
return sigmoid(z);
}
Insert cell
// Function to create features
function createFeatures({
bmi,
smoking,
alcoholDrinking,
stroke,
physicalHealth,
mentalHealth,
diffWalking,
sex,
ageCategory,
raceCategory,
diabetic,
physicalActivity,
genHealth,
sleepTime,
asthma
}) {
return {
bmi: normalize(bmi, 10, 50),
smoking: smoking.includes('Smoker?') ? 1 : 0,
alcoholDrinking: alcoholDrinking.includes('Drinks Alcohol?') ? 1 : 0,
stroke: stroke.includes('Stroke History?') ? 1 : 0,
physicalHealth: normalize(physicalHealth, 0, 30),
mentalHealth: normalize(mentalHealth, 0, 30),
diffWalking: diffWalking.includes('Difficulty Walking?') ? 1 : 0,
sex: sex === 'Male' ? 1 : 0,
ageCategory: encodeCategory(ageCategory, ['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older']),
raceCategory: encodeCategory(raceCategory, ['American Indian/Alaskan Native', 'White', 'Black', 'Asian', 'Hispanic', 'Other']),
diabetic: diabetic.includes('Diabetic?') ? 1 : 0,
physicalActivity: physicalActivity.includes('Physically Active?') ? 1 : 0,
genHealth: encodeCategory(genHealth, ['Excellent', 'Very Good', 'Good', 'Fair', 'Poor']),
sleepTime: normalize(sleepTime, 0, 24),
asthma: asthma.includes('Asthma?') ? 1 : 0
};
}
Insert cell
features = createFeatures({
bmi,
smoking,
alcoholDrinking,
stroke,
physicalHealth,
mentalHealth,
diffWalking,
sex,
ageCategory,
raceCategory,
diabetic,
physicalActivity,
genHealth,
sleepTime,
asthma
});
Insert cell
inputVector = Object.values(features);
Insert cell
// Get weights of features; weights are obtained from correlation matrix
function getWeights() {
return {
heartDisease: [0.09, 0.2, -0.06, 0.24, 0.25, 0.05, 0.31, 0.13, 0.5, -0.08, 0.28, -0.17, 0.39, 0.01, 0.07],
kidneyDisease: [0.19, 0.14, -0.13, 0.26, 0.45, 0.13, 0.50, -0.4, 0.55, -0.05, 0.50, -0.3, 0.62, 0.02, 0.15],
skinCancer: [-0.06, 0.06, -0.01, 0.07, 0.07, -0.06, 0.1, 0.02, 0.53, -0.23, 0.06, 0, 0.06, 0.07, 0]};
}
Insert cell
weights = getWeights();
Insert cell
// Predict the probability for heart disease
predict(inputVector, weights.heartDisease)
Insert cell
// Predict the probability for kidney disease
predict(inputVector, weights.kidneyDisease)
Insert cell
// Predict the probability for skin cancer
predict(inputVector, weights.skinCancer)
Insert cell
// Heart disease (yes/no)
heartPrediction = predict(inputVector, weights.heartDisease) > 0.98 ? 'Yes' : 'No';
Insert cell
// Kidney disease (yes/no)
kidneyPrediction = predict(inputVector, weights.kidneyDisease) > 0.98 ? 'Yes' : 'No';
Insert cell
// Skin cancer (yes/no)
skinPrediction = predict(inputVector, weights.skinCancer) > 0.98 ? 'Yes' : 'No';
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more