Published
Edited
Jul 10, 2019
79 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
pairwise_distances = {
// initialize some variables we use inside our loops
let x_i, x_j, distance;
// make_array is another helper function that just generates a new empty array of a given size.
const pw_dists = make_array(squared(num_points));
for(let i = 0; i < num_points; i++){
x_i = data[i].location;
// deal with the point itself here.
pw_dists[mat_ind(i,i)] = {source: i, target: i, distance:0};
for(let j = i + 1; j < num_points; j++){
x_j = data[j].location;
distance = calc_distance(x_i, x_j);
pw_dists[mat_ind(i,j)] = {source: i, target: j, distance};
pw_dists[mat_ind(j,i)] = {source: j, target: i, distance};
}
}
return pw_dists
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function calc_entropy(probs){
return probs.reduce(
(sum, p) => sum - (p>1e-7 ? p*log(p): 0), // super small probabilities can be ignored
0
)
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
target_entropy = log(perplexity) // entropy level we're shooting for given perplexity
Insert cell
Insert cell
Insert cell
function gaussian_prob(distance, variance){
return exp(-squared(distance) / (2*variance))
}
Insert cell
Insert cell
Insert cell
Insert cell
function calc_gaussian_probs(distances, variance){
let sum_probs = 0;

// calc un-normalized probs and accumulate sum of probs for normalization
return distances.map( (dist, i) => {
const curr_prob = dist !== 0 ? gaussian_prob(dist, variance): 0;
sum_probs += curr_prob; // accumulate the sums
return curr_prob; // send probability to our array
}).map(p => p/sum_probs) // return the prob vector after normalizing it using sum.
}
Insert cell
Insert cell
Insert cell
Insert cell
P_i_given_j = {
let p_row, i, j;
const P_i_given_j = make_array(num_points*num_points);
for(i = 0; i < num_points; i++){
p_row = variance_binary_search(i, pairwise_distances) // probs from best variance.
for(j=0;j<num_points;j++) {
P_i_given_j[mat_ind(i,j)] = p_row[j];
}
}
return P_i_given_j
}
Insert cell
Insert cell
P = {
const P = make_array(num_points*num_points);
for(let i=0; i<num_points; i++) {
for(let j=0; j<num_points; j++) {
P[mat_ind(i,j)] = Math.max(
(P_i_given_j[mat_ind(i,j)] + P_i_given_j[mat_ind(j,i)])/(num_points*2),
1e-10 // substitute 0s with very small number to improve stability due to rounding errors.
);
}
}
return P
}
Insert cell
Insert cell
Insert cell
function t_prob(distance){
// un-normalized t probability
return 1 / (1 + squared(distance))
}
Insert cell
function find_Q(Y){
// takes the a given Y mapping and produces the unnormalized q values for it.
const Q = make_array(num_points*num_points);
let distances_from_i,
q_sum = 0,
dist_ij,
q_ij; // given combination's q value
// scan over all points
for(let i=0; i<num_points; i++){
for(let j=0; j<num_points; j++){
// get distance for pair of points
dist_ij = calc_distance(Y[i], Y[j]);
// calculate un-normalized probability
q_ij = t_prob(dist_ij);
// add to sum of q's for later normalization.
q_sum += i !== j ? q_ij: 0;
// fill in big Q matrix with the found q.
Q[mat_ind(i,j)] = q_ij;
Q[mat_ind(j,i)] = q_ij;
}
}
// normalize Q values
return Q.map(q => q/q_sum);
}
Insert cell
Insert cell
Insert cell
Insert cell
function KLD(p, q){
return p * log(p/q)
}
Insert cell
Insert cell
Insert cell
Insert cell
function cost_grad_calc(Y, i, P, Q, iter){
let grad = make_array(map_dims).map( () => 0),
cost = 0,
// the paper recomends exagerating the distances of the high dimensional points
// for a few iterations at first, this checks if we are still exagerating.
early_exag = iter < early_exag_itts? early_exag_amount: 1,
grad_scaler, p_ij, q_ij;
for(let j=0; j<num_points; j++){
if(i === j) break; // ignore the points to themselves
p_ij = P[mat_ind(i,j)];
q_ij = Q[mat_ind(i,j)];

cost += KLD(p_ij,q_ij); // accumulate our cost/KLD measure

grad_scaler = 4*(early_exag*p_ij - q_ij*q_ij);
// loop over the mapping dimensions and update gradient
for(let d=0; d<map_dims; d++){
grad[d] += grad_scaler * (Y[i][d] - Y[j][d]);
}
}
return {cost, grad}
}
Insert cell
Insert cell
function step_down_gradient(iter, Y, Y_step){
const momentum = momentums[iter > momentum_switch ? 1: 0],
Q = find_Q(Y);
let average_y = make_array(map_dims).map(d=>0),
cost_total = 0,
grad_id,
step_id,step_id_new;
for(let i=0; i<num_points; i++){
let {cost, grad} = cost_grad_calc(Y, i, P, Q, iter);
cost_total += cost;

// update each dimension of our mapping
for(let d=0; d<map_dims; d++){
// pull out the gradient, last step and gain for point i dimension d
grad_id = grad[d];
step_id = Y_step[i][d];

// compute the update amount
step_id_new = momentum*step_id - learning_rate*grad_id;
Y_step[i][d] = step_id_new

// update point i's y value at dimension d
Y[i][d] += step_id_new;

// accumulate the mean so we can center
average_y[d] += Y[i][d];
}
}

// center Y
for(let i=0; i<num_points; i++){
for(let d=0; d<map_dims; d++){
Y[i][d] -= average_y[d]/num_points
}
}
return {Y_new: Y, Y_step, cost: cost_total}
}
Insert cell
Insert cell
Y_and_costs = {
reset_button;
let iter = 1,
costs = [],
gradient_step,
Y_new = initialize_points({n: num_points, dim: map_dims}),
Y_step = initialize_points({n:num_points, dim: map_dims, fill:0});

while(iter < num_itterations){
iter++
gradient_step = step_down_gradient(iter, Y_new, Y_step);
Y_new = gradient_step.Y_new;
Y_step = gradient_step.Y_step;
costs.push(gradient_step.cost);
yield {Y:Y_new, costs}
}
yield {Y:Y_new, costs}
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more