t-SNE explained in plain javascript / Nick Strayer

Nick Strayer

Engineer on the Shiny team at RStudio. Biostats Ph.D. from Vanderbilt.

Workspace

Published

Explorables

Edited

Jul 10, 2019

Fork of t-SNE explain in plain javascript

•

5 forks

80 stars

pairwise_distances = {

// initialize some variables we use inside our loops

let x_i, x_j, distance;

// make_array is another helper function that just generates a new empty array of a given size.

const pw_dists = make_array(squared(num_points));

for(let i = 0; i < num_points; i++){

x_i = data[i].location;

// deal with the point itself here.

pw_dists[mat_ind(i,i)] = {source: i, target: i, distance:0};

for(let j = i + 1; j < num_points; j++){

x_j = data[j].location;

distance = calc_distance(x_i, x_j);

pw_dists[mat_ind(i,j)] = {source: i, target: j, distance};

pw_dists[mat_ind(j,i)] = {source: j, target: i, distance};

}

return pw_dists

}

function calc_entropy(probs){

return probs.reduce(

(sum, p) => sum - (p>1e-7 ? p*log(p): 0), // super small probabilities can be ignored

)

}

target_entropy = log(perplexity) // entropy level we're shooting for given perplexity

function gaussian_prob(distance, variance){

return exp(-squared(distance) / (2*variance))

}

function calc_gaussian_probs(distances, variance){

let sum_probs = 0;

// calc un-normalized probs and accumulate sum of probs for normalization

return distances.map( (dist, i) => {

const curr_prob = dist !== 0 ? gaussian_prob(dist, variance): 0;

sum_probs += curr_prob; // accumulate the sums

return curr_prob; // send probability to our array

}).map(p => p/sum_probs) // return the prob vector after normalizing it using sum.

}

P_i_given_j = {

let p_row, i, j;

const P_i_given_j = make_array(num_points*num_points);

for(i = 0; i < num_points; i++){

p_row = variance_binary_search(i, pairwise_distances) // probs from best variance.

for(j=0;j<num_points;j++) {

P_i_given_j[mat_ind(i,j)] = p_row[j];

}

return P_i_given_j

}

P = {

const P = make_array(num_points*num_points);

for(let i=0; i<num_points; i++) {

for(let j=0; j<num_points; j++) {

P[mat_ind(i,j)] = Math.max(

(P_i_given_j[mat_ind(i,j)] + P_i_given_j[mat_ind(j,i)])/(num_points*2),

1e-10 // substitute 0s with very small number to improve stability due to rounding errors.

);

}

return P

}

function t_prob(distance){

// un-normalized t probability

return 1 / (1 + squared(distance))

}

function find_Q(Y){

// takes the a given Y mapping and produces the unnormalized q values for it.

const Q = make_array(num_points*num_points);

let distances_from_i,

q_sum = 0,

dist_ij,

q_ij; // given combination's q value

// scan over all points

for(let i=0; i<num_points; i++){

for(let j=0; j<num_points; j++){

// get distance for pair of points

dist_ij = calc_distance(Y[i], Y[j]);

// calculate un-normalized probability

q_ij = t_prob(dist_ij);

// add to sum of q's for later normalization.

q_sum += i !== j ? q_ij: 0;

// fill in big Q matrix with the found q.

Q[mat_ind(i,j)] = q_ij;

Q[mat_ind(j,i)] = q_ij;

}

// normalize Q values

return Q.map(q => q/q_sum);

}

function KLD(p, q){

return p * log(p/q)

}

function cost_grad_calc(Y, i, P, Q, iter){

let grad = make_array(map_dims).map( () => 0),

cost = 0,

// the paper recomends exagerating the distances of the high dimensional points

// for a few iterations at first, this checks if we are still exagerating.

early_exag = iter < early_exag_itts? early_exag_amount: 1,

grad_scaler, p_ij, q_ij;

for(let j=0; j<num_points; j++){

if(i === j) break; // ignore the points to themselves

p_ij = P[mat_ind(i,j)];

q_ij = Q[mat_ind(i,j)];

cost += KLD(p_ij,q_ij); // accumulate our cost/KLD measure

grad_scaler = 4*(early_exag*p_ij - q_ij*q_ij);

// loop over the mapping dimensions and update gradient

for(let d=0; d<map_dims; d++){

grad[d] += grad_scaler * (Y[i][d] - Y[j][d]);

}

return {cost, grad}

}

function step_down_gradient(iter, Y, Y_step){

const momentum = momentums[iter > momentum_switch ? 1: 0],

Q = find_Q(Y);

let average_y = make_array(map_dims).map(d=>0),

cost_total = 0,

grad_id,

step_id,step_id_new;

for(let i=0; i<num_points; i++){

let {cost, grad} = cost_grad_calc(Y, i, P, Q, iter);

cost_total += cost;

// update each dimension of our mapping

for(let d=0; d<map_dims; d++){

// pull out the gradient, last step and gain for point i dimension d

grad_id = grad[d];

step_id = Y_step[i][d];

// compute the update amount

step_id_new = momentum*step_id - learning_rate*grad_id;

Y_step[i][d] = step_id_new

// update point i's y value at dimension d

Y[i][d] += step_id_new;

// accumulate the mean so we can center

average_y[d] += Y[i][d];

}

// center Y

for(let i=0; i<num_points; i++){

for(let d=0; d<map_dims; d++){

Y[i][d] -= average_y[d]/num_points

}

return {Y_new: Y, Y_step, cost: cost_total}

}

Y_and_costs = {

reset_button;

let iter = 1,

costs = [],

gradient_step,

Y_new = initialize_points({n: num_points, dim: map_dims}),

Y_step = initialize_points({n:num_points, dim: map_dims, fill:0});

while(iter < num_itterations){

iter++

gradient_step = step_down_gradient(iter, Y_new, Y_step);

Y_new = gradient_step.Y_new;

Y_step = gradient_step.Y_step;

costs.push(gradient_step.cost);

yield {Y:Y_new, costs}

}

yield {Y:Y_new, costs}

}

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more