Published
Edited
Jun 21, 2019
23 stars
Insert cell
Insert cell
make_centroid = (ndim) => {
const low = -spread/2;
const high = spread/2;
const point_gen = d3.randomUniform(low, high);
return () => [...(new Array(ndim))].map(point_gen);
}
Insert cell
Insert cell
gen_centroids = (n, ndim) => {
const center_maker = make_centroid(ndim);
return [...(new Array(n))].map(center_maker)
}
Insert cell
centroids = gen_centroids(number_of_centroids, number_of_dimensions)
Insert cell
Insert cell
sample_from_centroid = (nsamples, centroid, centroid_id) => {
const rnorm = d3.randomNormal(0, sample_std_dev);
const get_sample = () => ({group:centroid_id, location : centroid.map(d => d + rnorm())});
return [...(new Array(nsamples))].map(get_sample);
}
Insert cell
// Generate a bunch of data samples from our array of centroids keeping info on their centroid id with them.
function gen_data_from_centroids(centroids, nsamples){
return centroids.reduce(
(data, d, i) => [...data, ...sample_from_centroid(nsamples, d, i)],
[])
.map((d,i) => (Object.assign({id: i},d,{}))) // add in node id in addition to the centroid one.
}
Insert cell
// generate our data
point_data = gen_data_from_centroids(centroids, number_of_samples)
Insert cell
Insert cell
pairwise_distances = {
const distances = [];
for(let i = 0; i < point_data.length; i++){
for(let j = i; j < point_data.length; j++){
const p1_loc = point_data[i].location;
const p2_loc = point_data[j].location;
const sum_of_square_diffs = p1_loc
.map((d, ind) => Math.pow(d - p2_loc[ind], 2))
.reduce((summed, d) => summed + d, 0);
distances.push({source: i, target: j, value: Math.sqrt(sum_of_square_diffs)})
}
}
// filter out the self links as they are not needed.
return distances.filter(({value}) => value > 0)
}
Insert cell
Insert cell
number_of_dimensions = 10 // dimensionality of our generated data
Insert cell
spread = 25 // width of the distribution determining each centroids location on the dimensions
Insert cell
sample_std_dev = 1 // standard deviation of the normal samples around each centroid
Insert cell
number_of_samples = 25 // how many samples we have per centroid
Insert cell
number_of_centroids = 4 // how many centroids we have
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more