Published
Edited
Feb 3, 2018
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
pairwise_distances = {
// initialize some variables we use inside our loops
let x_i, x_j, distance;
// make_array is another helper function that just generates a new empty array of a given size.
const pw_dists = make_array(squared(num_points));
for(let i = 0; i < num_points; i++){
x_i = data[i].location;
// deal with the point itself here.
pw_dists[mat_ind(i,i)] = {source: i, target: i, distance:0};
for(let j = i + 1; j < num_points; j++){
x_j = data[j].location;
distance = calc_distance(x_i, x_j);
pw_dists[mat_ind(i,j)] = {source: i, target: j, distance};
pw_dists[mat_ind(j,i)] = {source: j, target: i, distance};
}
}
return pw_dists
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
target_entropy = log(perplexity) // entropy level we're shooting for given perplexity
Insert cell
Insert cell
Insert cell
function gaussian_prob(distance, beta){
return exp(-distance * beta)
}
Insert cell
Insert cell
function calc_gaussian_probs(distances, beta){
return distances
.map(dist => ({dist, prob: (dist !== 0 ? gaussian_prob(dist, beta) : 0)}));
}
Insert cell
Insert cell
Insert cell
function calc_entropy(probs){
const sum_probs = probs.reduce((sum, d) => sum + d, 0);
return probs.reduce(
(sum, prob) => {
const prob_normed = prob/sum_probs;
return sum - (prob_normed > 1e-7 ? prob_normed*log(prob_normed): 0)
},
0
)
}
Insert cell
Insert cell
Insert cell
Insert cell
P_i_given_j = {
const P_i_given_j = make_array(num_points*num_points);
for(let i = 0; i < num_points; i++){
// run binary search to find the best beta.
let p_row = beta_binary_search(i, pairwise_distances)
for(let j=0;j<num_points;j++) {
P_i_given_j[i*num_points+j] = p_row[j];
}
}
return P_i_given_j
}
Insert cell
Insert cell
P = {
const P = make_array(num_points*num_points);
for(let i=0; i<num_points; i++) {
for(let j=0; j<num_points; j++) {
P[i*num_points+j] = Math.max(
(P_i_given_j[i*num_points+j] + P_i_given_j[j*num_points+i])/(num_points*2),
1e-10 // substitute 0s with very small number to improve stability due to rounding errors.
);
}
}
return P
}
Insert cell
Insert cell
Insert cell
function find_Q(Y_curr){
// takes the a given Y mapping and produces the unnormalized q values for it.
const Q = make_array(num_points*num_points);
let sum_q = 0,
q_ij, // given combination's q value
dim_sum; // dimension sum
for(let i=0; i<num_points; i++){
for(let j=0; j<num_points; j++){
//scan over the mapping dimensions of i and j and sum their squared differences
dim_sum = make_array(map_dims)
.reduce((sum, _, dim) => squared(Y_curr[i][dim] - Y_curr[j][dim]));
q_ij = 1/(1 + dim_sum); // set the q value for this combination
Q[i*num_points+j] = q_ij; // assign it to the big Q array
Q[j*num_points+i] = q_ij;
sum_q += 2*q_ij; // accumulate for later normalization
}
}
// normalize Q values
return Q.map(q => max(q/sum_q, 1e-99));
}
Insert cell
function calc_cost_grad(Y, iters){
let cost_acc = 0,
pre_mult, // this is poorly named;
grad_sum,
early_exag = iters < early_exag_itts? early_exag_amount: 1;

const Q = find_Q(Y),
grad_acc = make_array(num_points);
for(let i=0; i<num_points; i++){
grad_sum = make_array(map_dims).map(()=>0); // initialize array of zeros for gradient accumulation
for(let j=0; j<num_points; j++){
cost_acc += -P[i*num_points+j] * log(Q[i*num_points+j]);
// this may be a mistake
pre_mult = 4*(early_exag*P[i*num_points+j] - Q[i*num_points+j])*Q[i*num_points+j]
grad_sum = grad_sum.map((grad, dim) => grad + (pre_mult * (Y[i][dim] - Y[j][dim])))
}
grad_acc[i] = grad_sum;
}
return {cost: cost_acc, grad: grad_acc}
}
Insert cell
Insert cell
Insert cell
Y_costs = {
let iters = 0
const costs = [];
// initial random y positions along with a vector to store the last y gradient step and the gains.
const Y_new = initialize_points({n: num_points, dim: map_dims}),
Y_step = initialize_points({n:num_points, dim: map_dims, fill:0}),
Y_gains = initialize_points({n: num_points, dim: map_dims, fill: 1});
let average_y = make_array(map_dims);
// calculate the cost and gradient for current y locations
while(iters < num_itterations){
iters++
average_y = average_y.map(() => 0) // zero out y average for new iteration
let {cost, grad} = calc_cost_grad(Y_new, iters),
momentum = momentums[iters > momentum_switch ? 1: 0],
grad_id,
step_id,
gain_id,
gain_id_new,
step_id_new;

costs.push(cost);

for(let i=0; i<num_points; i++){
for(let d=0; d<map_dims; d++){
// pull out the gradient, last step and gain for point i dimension d
grad_id = grad[i][d];
step_id = Y_step[i][d];
gain_id = Y_gains[i][d];
// compute the gain component and clamp above some constant
gain_id_new = max(
sign(gain_id) === sign(step_id) ?
gain_id * gain_scales_same:
gain_id + gain_scales_diff,
gain_clamp
)
Y_gains[i][d] = gain_id_new;

// compute the update amount
step_id_new = momentum*step_id - learning_rate*gain_id_new*grad_id;
Y_step[i][d] = step_id_new

// update the y value
Y_new[i][d] += step_id_new;
// accumulate the mean so we can center
average_y[d] += Y_new[i][d];
}
}
// center Y
for(let i=0; i<num_points; i++){
for(let d=0; d<map_dims; d++){
Y_new[i][d] -= average_y[d]/num_points
}
}
//yield Promises.delay(100,Y_new)
if(iters % 5) yield {Y:Y_new, costs}
}
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more