t-SNE in plain javascript / Nick Strayer

Nick Strayer

Engineer on the Shiny team at RStudio. Biostats Ph.D. from Vanderbilt.

Workspace

Published

Edited

Feb 2, 2018

1 fork

1 star

pairwise_distances = {

let x_i, x_j, distance;

const pw_dists = new Array(num_points*num_points);

for(let i = 0; i < num_points; i++){

x_i = data[i].location;

for(let j = i+1; j < num_points; j++){

x_j = data[j].location;

distance = calc_distance(x_i, x_j);

pw_dists[i*num_points+j] = {source: i, target: j, distance};

pw_dists[j*num_points+i] = {source: j, target: i, distance};

}

return pw_dists

}

target_entropy = log(perplexity) // entropy level we're shooting for given perplexity

function beta_binary_search(p_row, i, pw_dists, entropy_tol = 1e-3, max_attempts = 40){

let beta_min = -Infinity, // beta upper bound

beta_max = Infinity, // beta lower bound

beta = 1, // initial beta value

searching_for_beta = true,

attempts = 0; // number of iterations through while looking

while(searching_for_beta){

// binary search to find precision beta so our output entropy is what we want.

// we need to zero out our sum of probabilities and entropy at each iteration

let sum_p = 0;

let entropy = 0; // initial entropy for this loop

attempts++ // increment out attempts counter

// first we calculate the vector of probabilties for pi_{all other points}

for(let j=0; j<num_points; j++){

//calculate the p_(i_j), give diagonals 0 as we don't care about them.

p_row[j] = i === j ? 0 : exp(-pw_dists[i*num_points+j].distance * beta);

// add to our sum of probabilities holder with this conditional

sum_p += p_row[j];

}

// normalize our conditional probs and compute entropy

for(let j = 0; j < num_points; j++){

// divide conditional prob by the sum of all in row to normalize.

p_row[j] = sum_p !== 0 ? p_row[j]/sum_p : 0;

// if the conditional probability is bigger than our threshold, subtract its log from our entropy.

if(p_row[j] > 1e-7){

entropy -= p_row[j]*log(p_row[j])

}

// check where our entropy lies in context to the target

if(entropy > target_entropy){ // distribution is too even.

beta_min = beta; // move our min bound up

// set new beta based on our bounds.

beta = beta_max === Infinity ? beta*2 : (beta+beta_max)/2;

} else { // distribution is too clumped.

beta_max = beta; // move our max bound down

// set new beta based on our bounds.

beta = beta_min === -Infinity ? beta/2 : (beta+beta_min)/2;

}

const too_many_attempts = attempts > max_attempts;

const close_to_target = Math.abs(entropy - target_entropy) < entropy_tol;

searching_for_beta = !(too_many_attempts || close_to_target)

}

return p_row

}

P_i_given_j = {

const P_i_given_j = make_array(num_points*num_points);

const p_row = make_array(num_points);

for(let i = 0; i < num_points; i++){

// run binary search to find the best beta.

let new_p_row = beta_binary_search(p_row, i, pairwise_distances)

for(let j=0;j<num_points;j++) {

P_i_given_j[i*num_points+j] = p_row[j];

}

return P_i_given_j

}

P = {

const P = make_array(num_points*num_points);

for(let i=0; i<num_points; i++) {

for(let j=0; j<num_points; j++) {

P[i*num_points+j] = Math.max(

(P_i_given_j[i*num_points+j] + P_i_given_j[j*num_points+i])/(num_points*2),

1e-10

);

}

return P

}

function find_Q(Y_curr){

// takes the a given Y mapping and produces the unnormalized q values for it.

const Q = make_array(num_points*num_points);

let sum_q = 0,

q_ij, // given combination's q value

dim_sum; // dimension sum

for(let i=0; i<num_points; i++){

for(let j=0; j<num_points; j++){

//scan over the mapping dimensions of i and j and sum their squared differences

dim_sum = make_array(map_dims)

.reduce((sum, _, dim) => squared(Y_curr[i][dim] - Y_curr[j][dim]));

q_ij = 1/(1 + dim_sum); // set the q value for this combination

Q[i*num_points+j] = q_ij; // assign it to the big Q array

Q[j*num_points+i] = q_ij;

sum_q += 2*q_ij; // accumulate for later normalization

}

// normalize Q values

return Q.map(q => max(q/sum_q, 1e-99));

}

function calc_cost_grad(Y, iters){

let cost_acc = 0,

pre_mult, // this is poorly named;

grad_sum,

early_exag = iters < early_exag_itts? early_exag_amount: 1;

const Q = find_Q(Y),

grad_acc = make_array(num_points);

for(let i=0; i<num_points; i++){

grad_sum = make_array(map_dims).map(()=>0); // initialize array of zeros for gradient accumulation

for(let j=0; j<num_points; j++){

cost_acc += -P[i*num_points+j] * log(Q[i*num_points+j]);

// this may be a mistake

pre_mult = 4*(early_exag*P[i*num_points+j] - Q[i*num_points+j])*Q[i*num_points+j]

grad_sum = grad_sum.map((grad, dim) => grad + (pre_mult * (Y[i][dim] - Y[j][dim])))

}

grad_acc[i] = grad_sum;

}

return {cost: cost_acc, grad: grad_acc}

}

Y_costs = {

let iters = 0

const costs = [];

// initial random y positions along with a vector to store the last y gradient step and the gains.

const Y_new = initialize_points({n: num_points, dim: map_dims}),

Y_step = initialize_points({n:num_points, dim: map_dims, fill:0}),

Y_gains = initialize_points({n: num_points, dim: map_dims, fill: 1});

let average_y = make_array(map_dims);

// calculate the cost and gradient for current y locations

while(iters < num_itterations){

iters++

average_y = average_y.map(() => 0) // zero out y average for new iteration

let {cost, grad} = calc_cost_grad(Y_new, iters),

momentum = momentums[iters > momentum_switch ? 1: 0],

grad_id,

step_id,

gain_id,

gain_id_new,

step_id_new;

costs.push(cost);

for(let i=0; i<num_points; i++){

for(let d=0; d<map_dims; d++){

// pull out the gradient, last step and gain for point i dimension d

grad_id = grad[i][d];

step_id = Y_step[i][d];

gain_id = Y_gains[i][d];

// compute the gain component and clamp above some constant

gain_id_new = max(

sign(gain_id) === sign(step_id) ?

gain_id * gain_scales_same:

gain_id + gain_scales_diff,

gain_clamp

)

Y_gains[i][d] = gain_id_new;

// compute the update amount

step_id_new = momentum*step_id - learning_rate*gain_id_new*grad_id;

Y_step[i][d] = step_id_new

// update the y value

Y_new[i][d] += step_id_new;

// accumulate the mean so we can center

average_y[d] += Y_new[i][d];

}

// center Y

for(let i=0; i<num_points; i++){

for(let d=0; d<map_dims; d++){

Y_new[i][d] -= average_y[d]/num_points

}

//yield Promises.delay(100,Y_new)

if(iters % 5) yield {Y:Y_new, costs}

}

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.

Learn more