function generateKaplanMeierData(cohort) {
if (!cohort || !cohort.names || !cohort.data) {
throw new Error("Invalid cohort input");
}
const colStudyEntry = cohort.names.indexOf("study_entry_age");
const colFollowup = cohort.names.indexOf("observed_followup");
const colExitAge = cohort.names.indexOf("study_exit_age");
const colOnset = cohort.names.indexOf("time_of_onset");
if (
colStudyEntry === -1 ||
colFollowup === -1 ||
colExitAge === -1 ||
colOnset === -1
) {
throw new Error("Missing required columns in cohort data");
}
const subjects = cohort.data.map((row) => {
const entryAge = row[colStudyEntry];
const followup = row[colFollowup];
const exitAge = row[colExitAge];
const onsetAge = row[colOnset];
// The event occurs if time_of_onset <= study_exit_age
// timeSinceEntryOfEvent = onsetAge - entryAge
const eventTimeSinceEntry = onsetAge - entryAge;
const censorTime = followup; // = exitAge - entryAge
// Observed time is min(eventTimeSinceEntry, censorTime)
const observedTime = Math.min(eventTimeSinceEntry, censorTime);
// eventIndicator = 1 if onsetAge <= exitAge, 0 otherwise
const isEvent = eventTimeSinceEntry <= censorTime ? 1 : 0;
return {
time: observedTime,
event: isEvent
};
});
// Exclude any negative or zero times (shouldn't happen if T>=entryAge)
// but just in case numeric rounding etc.
const cleaned = subjects.filter((subj) => subj.time >= 0);
// Sort by time ascending
cleaned.sort((a, b) => a.time - b.time);
// Kaplan-Meier calculation
// S(0) = 1
// For each unique event time t_j:
// r_j = number at risk just prior to t_j
// d_j = number of events at t_j
// S(t_j) = S(t_{j-1}) * (1 - d_j / r_j)
//
// Greenwood variance:
// var( S(t_j) ) = S(t_j)^2 * sum_{i=1..j} [ d_i / ( r_i * (r_i - d_i) ) ]
// 95% CI => S(t_j) +/- 1.96 * sqrt( var( S(t_j) ) )
let atRisk = cleaned.length; // at time=0
let prevSurv = 1.0; // S(0)
let prevVarTerm = 0.0; // sum_{i} [ d_i / (r_i*(r_i - d_i)) ]
const kmData = [];
kmData.push({
time: 0,
survival: 1.0,
lower: 1.0,
upper: 1.0
});
let idx = 0;
while (idx < cleaned.length) {
const currentTime = cleaned[idx].time;
let nEventsAtThisTime = 0;
let nTotalAtThisTime = 0;
const thisTime = currentTime;
while (idx < cleaned.length && cleaned[idx].time === thisTime) {
nTotalAtThisTime++;
if (cleaned[idx].event === 1) {
nEventsAtThisTime++;
}
idx++;
}
// r_j = 'atRisk' just prior to t_j
const rj = atRisk;
const dj = nEventsAtThisTime;
// If no events at this time, survival does not jump down
if (dj > 0) {
const newSurv = prevSurv * (1 - dj / rj);
// Greenwood increment
const increment = dj / (rj * (rj - dj));
prevVarTerm += increment;
// variance of S(t_j)
const varSurv = (newSurv * newSurv) * prevVarTerm;
const sdSurv = Math.sqrt(varSurv);
const z = 1.96; // ~95% normal approximation
const lower = Math.max(0, newSurv - z * sdSurv);
const upper = Math.min(1, newSurv + z * sdSurv);
prevSurv = newSurv;
kmData.push({
time: thisTime,
survival: newSurv,
lower,
upper
});
} else {
// push a point in case we want a step at an event-free time
kmData.push({
time: thisTime,
survival: prevSurv,
lower: Math.max(0, prevSurv - 1e-8),
upper: Math.min(1, prevSurv + 1e-8)
});
}
// Everyone who had time == thisTime is no longer at risk after
// (whether event or censored).
atRisk -= nTotalAtThisTime;
}
return kmData;
}