Public
Edited
Jan 30, 2024
Insert cell
Insert cell
import { aq, op } from '@uwdata/arquero'
Insert cell
// Function to calculate the distance between two points
function euclideanDistance(point1, point2) {
console.log({point1, point2})
return Math.sqrt(
Math.pow(point1.x - point2.x, 2) + Math.pow(point1.y - point2.y, 2)
);
}

Insert cell
// Function to assign each point to the nearest medoid
// function assignToMedoids(data, medoids) {
// const assignments = [];
// for (const point of data) {
// const distances = medoids.map(medoid =>
// euclideanDistance(point, medoid)
// );
// const minDistanceIndex = distances.indexOf(Math.min(...distances));
// assignments.push(minDistanceIndex);
// }
// return assignments;
// }
Insert cell
// Function to calculate the total cost of a clustering (sum of distances to medoids)
// function calculateCost(data, medoids, assignments) {
// let cost = 0;
// for (let i = 0; i < data.length; i++) {
// cost += euclideanDistance(data[i], medoids[assignments[i]]);
// }
// return cost;
// }
Insert cell
// Function to find the best medoid for a cluster
function findBestMedoid(cluster, data) {
let bestMedoidIndex = -1;
let minCost = Infinity;

for (const i of cluster) {
const newMedoid = data[i];
const clusterCost = data.reduce((acc, point, j) => {
return acc + euclideanDistance(point, newMedoid);
}, 0);

if (clusterCost < minCost) {
minCost = clusterCost;
bestMedoidIndex = i;
}
}

return bestMedoidIndex;
}
Insert cell
// K-Medoids main function
function kMedoids(data, k, maxIterations = 100) {
const n = data.length;

// Randomly initialize k medoids
let medoids = [];
for (let i = 0; i < k; i++) {
medoids.push(data[Math.floor(Math.random() * n)]);
}

let iterations = 0;
let converged = false;

while (!converged && iterations < maxIterations) {
// holds the current cluster assignments for each data point.
const assignments = assignToMedoids(data, medoids);

// Update medoids
const newMedoids = [];
for (let i = 0; i < k; i++) {
// Group by cluster
const cluster = assignments
.map((assignment, j) => (assignment === i ? j : -1))
.filter(index => index !== -1);
// Then calculate the cost in each cluster and define the best Medoid
const bestMedoidIndex = findBestMedoid(cluster, data);
newMedoids.push(data[bestMedoidIndex]);
}

// Check for convergence
converged = JSON.stringify(medoids) === JSON.stringify(newMedoids);

medoids = newMedoids;
iterations++;
}

const finalAssignments = assignToMedoids(data, medoids);
const finalCost = calculateCost(data, medoids, finalAssignments);

return { medoids, assignments: finalAssignments, cost: finalCost };
}
Insert cell
test = [
{ x: 6, y: 9 },
{ x: 4, y: 10 },
{ x: 4, y: 4 },
{ x: 8, y: 5 },
{ x: 8, y: 3 },
{ x: 5, y: 2 },
{ x: 5, y: 8 },
{ x: 6, y: 4 },
{ x: 4, y: 8 },
{ x: 3, y: 9 },
]
Insert cell
function randomChoice(list, n) {
const clonedData = structuredClone(list)
const choices = []
for (let i = 0; i < n; i++) {
let index = Math.floor(Math.random()* (clonedData.length))
choices.push(clonedData[index])
clonedData.splice(index, 1)
}
return choices
}
Insert cell
data = [
{ x: 1, y: 2 },
{ x: 5, y: 8 },
{ x: 1, y: 6 },
{ x: 8, y: 3 },
{ x: 6, y: 2 },
{ x: 7, y: 8 }
];
Insert cell
initialMedoids = randomChoice(data, 2)
Insert cell
dt = aq.from(data)
Insert cell
dt.view()
Insert cell
function buildMedoidObj(medoids) {
const obj = {};
for (let i=0; i<medoids.length; i++) {
obj[`c${i}`] = medoids[i]
}
return obj
}
Insert cell
allColumns = initialMedoids.map((c, i) => `c${i}Dist`) // ['c0Dist', 'c1Dist']
Insert cell
function configureDataTable(medoids) {
// you cannot include variables within an arquero datatable by default
// So, we're declaring the n medoids as params like this { c1: medoid1, c2: medoid2, ... cn: medoidn}
const params = medoids.reduce((obj, medoid, index) => {
obj[`c${index}`] = medoid;
return obj;
}, {});
params["allColumns"] = allColumns;
return dt.params(params);
}
Insert cell
function addMedoidDistances(dt, medoids) {
for (let [index, value] of medoids.entries()) {
dt = dt.derive({
[`c${index}Dist`]: aq.escape((d, $) =>
euclideanDistance(d, $[`c${index}`])
)
});
}
return dt
}
Insert cell
function indexOfMax(arr) {
if (arr.length === 0) {
return -1; // Return -1 for an empty array
}

const max = Math.max(...arr);
return arr.indexOf(max);
}
Insert cell
function addClosestMedoids(dt) {
return dt.derive({
cluster: aq.escape((d, $) =>
// dt.columnName(initialMedoids.length + indexOfMax($.allColumns.map((col) => d[col])))
indexOfMax($.allColumns.map((col) => d[col]))
)
}).array('cluster')
// return dt.derive({
// cluster: aq.escape((d, $) =>
// // dt.columnName(initialMedoids.length + indexOfMax($.allColumns.map((col) => d[col])))
// indexOfMax($.allColumns.map((col) => d[col]))
// )
// });
}
Insert cell
function assignToMedoids(dt, medoids) {
// Find the distance between each point and all medoids and the save theses values in columns
dt = addMedoidDistances(dt, medoids)
// Find the closest medoid to each point
dt = addClosestMedoids(dt)
return dt
}
Insert cell
function getClusters(dt, medoids) {
// Configure datatable
let dtd = configureDataTable(medoids)
// assign each point to the nearest medoid
const assignments = assignToMedoids(dtd, medoids)
return assignments
// return allColumns.map((col, idx) => dtd.filter(aq.escape((d, $) => d.cluster === $.allColumns[idx])).select(["x", "y"]).objects())
}
Insert cell
assignments = getClusters(dt, initialMedoids)
Insert cell
function calculateCost(data, medoids, assignments) {
let cost = 0;
for (let i = 0; i < data.length; i++) {
cost += euclideanDistance(data[i], medoids[assignments[i]]);
}
return cost;
}
Insert cell
calculateCost(data, initialMedoids, assignments)
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more