Published
Edited
May 29, 2019
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
pairwise_distances = {
// initialize some variables we use inside our loops
let x_i, x_j, distance;
// make_array is another helper function that just generates a new empty array of a given size.
const pw_dists = make_array(squared(num_points));
for(let i = 0; i < num_points; i++){
x_i = data[i].location;
// deal with the point itself here.
pw_dists[mat_ind(i,i)] = {source: i, target: i, distance:0};
for(let j = i + 1; j < num_points; j++){
x_j = data[j].location;
distance = calc_distance(x_i, x_j);
pw_dists[mat_ind(i,j)] = {source: i, target: j, distance};
pw_dists[mat_ind(j,i)] = {source: j, target: i, distance};
}
}
return pw_dists
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
function calc_entropy(probs){
return probs.reduce(
(sum, p) => sum - (p>1e-7 ? p*log(p): 0), // super small probabilities can be ignored
0
)
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
target_entropy = log(perplexity) // entropy level we're shooting for given perplexity
Insert cell
Insert cell
Insert cell
function gaussian_prob(distance, variance){
return exp(-squared(distance) / (2*variance))
}
Insert cell
Insert cell
Insert cell
Insert cell
function calc_gaussian_probs(distances, variance){
let sum_probs = 0;

// calc un-normalized probs and accumulate sum of probs for normalization
return distances.map( (dist, i) => {
const curr_prob = dist !== 0 ? gaussian_prob(dist, variance): 0;
sum_probs += curr_prob; // accumulate the sums
return curr_prob; // send probability to our array
}).map(p => p/sum_probs) // return the prob vector after normalizing it using sum.
}
Insert cell
Insert cell
Insert cell
Insert cell
P_i_given_j = {
let p_row, i, j;
const P_i_given_j = make_array(num_points*num_points);
for(i = 0; i < num_points; i++){
p_row = variance_binary_search(i, pairwise_distances) // probs from best variance.
for(j=0;j<num_points;j++) {
P_i_given_j[mat_ind(i,j)] = p_row[j];
}
}
return P_i_given_j
}
Insert cell
Insert cell
P = {
const P = make_array(num_points*num_points);
for(let i=0; i<num_points; i++) {
for(let j=0; j<num_points; j++) {
P[mat_ind(i,j)] = Math.max(
(P_i_given_j[mat_ind(i,j)] + P_i_given_j[mat_ind(j,i)])/(num_points*2),
1e-10 // substitute 0s with very small number to improve stability due to rounding errors.
);
}
}
return P
}
Insert cell
Insert cell
Insert cell
function t_prob(distance){
// un-normalized t probability
return 1 / (1 + squared(distance))
}
Insert cell
function find_Q(Y){
// takes the a given Y mapping and produces the unnormalized q values for it.
const Q = make_array(num_points*num_points);
let distances_from_i,
q_sum = 0,
dist_ij,
q_ij; // given combination's q value
// scan over all points
for(let i=0; i<num_points; i++){
for(let j=0; j<num_points; j++){
// get distance for pair of points
dist_ij = calc_distance(Y[i], Y[j]);
// calculate un-normalized probability
q_ij = t_prob(dist_ij);
// add to sum of q's for later normalization.
q_sum += i !== j ? q_ij: 0;
// fill in big Q matrix with the found q.
Q[mat_ind(i,j)] = q_ij;
Q[mat_ind(j,i)] = q_ij;
}
}
// normalize Q values
return Q.map(q => q/q_sum);
}
Insert cell
Insert cell
Insert cell
Insert cell
function KLD(p, q){
return p * log(p/q)
}
Insert cell
Insert cell
Insert cell
Insert cell
function cost_grad_calc(Y, i, P, Q, iter){
let grad = make_array(map_dims).map( () => 0),
cost = 0,
// the paper recomends exagerating the distances of the high dimensional points
// for a few iterations at first, this checks if we are still exagerating.
early_exag = iter < early_exag_itts? early_exag_amount: 1,
grad_scaler, p_ij, q_ij;
for(let j=0; j<num_points; j++){
if(i === j) break; // ignore the points to themselves
p_ij = P[mat_ind(i,j)];
q_ij = Q[mat_ind(i,j)];

cost += KLD(p_ij,q_ij); // accumulate our cost/KLD measure

grad_scaler = 4*(early_exag*p_ij - q_ij*q_ij);
// loop over the mapping dimensions and update gradient
for(let d=0; d<map_dims; d++){
grad[d] += grad_scaler * (Y[i][d] - Y[j][d]);
}
}
return {cost, grad}
}
Insert cell
Insert cell
function step_down_gradient(iter, Y, Y_step){
const momentum = momentums[iter > momentum_switch ? 1: 0],
Q = find_Q(Y);
let average_y = make_array(map_dims).map(d=>0),
cost_total = 0,
grad_id,
step_id,step_id_new;
for(let i=0; i<num_points; i++){
let {cost, grad} = cost_grad_calc(Y, i, P, Q, iter);
cost_total += cost;

// update each dimension of our mapping
for(let d=0; d<map_dims; d++){
// pull out the gradient, last step and gain for point i dimension d
grad_id = grad[d];
step_id = Y_step[i][d];

// compute the update amount
step_id_new = momentum*step_id - learning_rate*grad_id;
Y_step[i][d] = step_id_new

// update point i's y value at dimension d
Y[i][d] += step_id_new;

// accumulate the mean so we can center
average_y[d] += Y[i][d];
}
}

// center Y
for(let i=0; i<num_points; i++){
for(let d=0; d<map_dims; d++){
Y[i][d] -= average_y[d]/num_points
}
}
return {Y_new: Y, Y_step, cost: cost_total}
}
Insert cell
Insert cell
Y_and_costs = {
reset_button;
let iter = 1,
costs = [],
gradient_step,
Y_new = initialize_points({n: num_points, dim: map_dims}),
Y_step = initialize_points({n:num_points, dim: map_dims, fill:0});

while(iter < num_itterations){
iter++
gradient_step = step_down_gradient(iter, Y_new, Y_step);
Y_new = gradient_step.Y_new;
Y_step = gradient_step.Y_step;
costs.push(gradient_step.cost);
yield {Y:Y_new, costs}
}
yield {Y:Y_new, costs}
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more