Published
Edited
Jul 13, 2019
1 star
Insert cell
Insert cell
Insert cell
edges = southern_women.map(d => ({event: d.event, ind: d.individual}))
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
make_bisbm = function(bigraph){
// arbitrary constant used to scale probability or random choice
const epsilon = 0.1;
const types = [bigraph.a_type, bigraph.b_type];
const [a_type, b_type] = types;

const clusters = {
[a_type]: new Map(),
[b_type]: new Map(),
};
// Keep track of how many clusters we've had to avoid name overlap
const cluster_inds = {
[a_type]: 0,
[b_type]: 0,
};
// Start with every node in its own group.
bigraph.all_nodes.forEach(node => {
// Setup a new cluster
const cluster = create_cluster(node.type);
// Add the node to the cluster
cluster.add_member(node);
// Add cluster to the node's cluster property
node.cluster = cluster;
});

function n_clusters(type){
return clusters[type].size;
}
function create_cluster(type){
// Create new cluster
const cluster = new Cluster(`${type}_c${cluster_inds[type]++}`);
// Add new cluster to the clusters object for its given class
clusters[type].set(cluster.id, cluster);
return cluster;
}

function swap_node_cluster(node, cluster_to_swap){
// Only make move if we need to
if(node.cluster.id !== cluster_to_swap.id){
// Remove node from its old cluster
node.cluster.remove_member(node);
// Add to new cluster
cluster_to_swap.add_member(node);
// Update node's cluster membership
node.cluster = cluster_to_swap;
}
}
//choose a completely random group of the type of supplied node (could be new).
function get_random_cluster(node) {
// There's a 1 in B_type + 1 chance of a fresh new cluster being created
const make_new_cluster = Math.random() < 1/(n_clusters(node.type) + 1);

return make_new_cluster ?
create_cluster(node.type) :
sample(Array.from(clusters[node.type].values()));
}

// Remove all clusters with no members
function clean_clusters(){
types.forEach(type => {
for(let [id, cluster] of clusters[type].entries()){
if(cluster.members.length === 0){
clusters[type].delete(id);
}
}
})
}
function get_prob_random_group(type, num_edges){
const size_scalar = epsilon*(n_clusters(type) + 1);
return size_scalar / (num_edges + size_scalar);
}
// Grab a picture of the node membership in current state
function snapshot(i){
let node_to_cluster = [];
const sizes = {};
types.forEach(type => {
sizes[type] = clusters[type].size;
clusters[type].forEach(cluster => {
node_to_cluster.push(
cluster.members.map(node => ({
node: node.id,
cluster: node.cluster,
cluster_of_origin: cluster.id,
type: node.type,
}))
)
})
});
return {
i,
node_status: [].concat(...node_to_cluster),
num_clusters: sizes,
};
}

return {
clusters,
create_cluster,
get_random_cluster,
clean_clusters,
n_clusters,
get_prob_random_group,
swap_node_cluster,
snapshot,
}
}
Insert cell
class Cluster {
constructor(id){
this.id = id;
this.members = [];
this.member_edges = [];
}
// Add a node to cluster
add_member(node){
// Add cluster to node membership array
this.members.push(node);

// Add node's edges to the member_edges variable
node.edge_map.forEach((edge, connected_node_id) => {
this.member_edges.push({
source_id: node.id, // Keep track of what member contributed this edge
target: edge.node, // Direct link to target node
count: edge.count, // Also keep the counts
});
});
}
// Remove a node from the cluster
remove_member(node){
// Remove node from cluster membership array
this.members = this.members.filter(n => n.id !== node.id);
// Remove all edges that were contributed by the node
this.member_edges = this.member_edges.filter(edge => edge.source_id !== node.id);
}
// Gather edges of this cluster's members into counts of edges to other clusters
get_edges(){
const edges = new Map();
// Go through each connected node and add to edges map
const cluster_counts = this.member_edges
.forEach(({target, count}) => {
edges.set(target.cluster, (edges.get(target.cluster) || 0) + count);
});
return {clusters: Array.from(edges.keys()), counts: Array.from(edges.values())};
}
// Draw a weighted sample of all possible clusters connected to this cluster where the
// weight is given by number of edges between the two clusters.
sample_from_neighbors(){
const all_edges = this.get_edges();
return weighted_sample(
normalize(all_edges.counts),
all_edges.clusters
);
}
}
Insert cell
model_fit = {
const bisbm = make_bisbm(sw_graph);
const mcmc_step = () => {
console.log('Running MCMC Step')
// Starting from the first node
sw_graph.all_nodes.forEach((node,i) => {
console.log(`\tStarting node ${node.id}`)
// How many clusters of the current type do we have?
const n_type_clusters = bisbm.n_clusters(node.type);
// Randomly choose neighbor node of other type that the current node
const neighbor_node = node.get_random_edge();
const num_edges_into_neighbor_cluster = node.edges
.filter(e => e.node.cluster == neighbor_node.cluster)
.reduce((total_count, edge) => total_count + edge.count, 0);
// W/ prob proportional to number of edges node i has into group s...
const prob_of_rand_group = bisbm.get_prob_random_group(node.type, num_edges_into_neighbor_cluster);
// Choose a completely random group (could be new)...
// ...or get group from neighbor node's connections
const next_cluster = Math.random() < prob_of_rand_group ?
bisbm.get_random_cluster(node):
neighbor_node.cluster.sample_from_neighbors()
// Swap the nodes cluster with its new cluster value
bisbm.swap_node_cluster(node, next_cluster);
// Clean up any now empty clusters
bisbm.clean_clusters();
});
};
let snapshots = [];
for(let itt = 0; itt < num_itts; itt++){
// perform one MCMC step
mcmc_step();
const snapshot = bisbm.snapshot(itt);
// Record snapshot of results
snapshots.push(snapshot);
yield snapshots;
await Promises.delay(1);
}
}
Insert cell
num_itts = 50;
Insert cell
Insert cell
Insert cell
Insert cell
chart = {
const svg = d3.create("svg")
.attr("viewBox", [0, 0, width, height]);

const is_event = RegExp('event');

const {nodes, links} = sankey(sankey_data);
svg.append("g")
.attr("stroke", "#000")
.selectAll("line")
.data(nodes)
.join("line")
.attr("x1", d => d.x0)
.attr("x2", d => d.x0)
.attr("y1", d => d.y0)
.attr("y2", d => d.y1)
.attr("stroke", 'grey')
.attr("stroke-width", 1)
const link = svg.append("g")
.attr("fill", "none")
.attr("stroke-opacity", 0.5)
.selectAll("g")
.data(links)
.join("g")
.style("mix-blend-mode", "multiply");

link.append("path")
.attr("d", d3.sankeyLinkHorizontal())
.attr("stroke", d => is_event.test(d.source.name) ? 'steelblue': 'orangered')
.attr("stroke-width", d => Math.max(1, d.width));

return svg.node();
}
Insert cell
sankey = {
const sankey = d3.sankey()
.nodeAlign(d3.sankeyCenter)
.nodeWidth(0.1)
.nodePadding(6)
.nodeId(n => n.name)
.extent([[1, 5], [width - 1, height - 5]]);

return ({nodes, links}) => sankey({
nodes: nodes.map(d => Object.assign({}, d)),
links: links.map(d => Object.assign({}, d))
});
}
Insert cell
num_itts_to_show = 25;
Insert cell
sankey_data = {
const group_by_cluster = node_statuses => d3_arr.rollup(
node_statuses,
members => members.map(d => d.node),
n => n.cluster.id
);
const steps_to_show = model_fit.slice(-num_itts_to_show);
const clusters_by_step = steps_to_show.map(step => group_by_cluster(step.node_status));
let cluster_edges = [];
const cluster_nodes = new Set();
let last_step, current_step;
for(let i = 1; i < steps_to_show.length; i++){
last_step = clusters_by_step[i-1];
current_step = clusters_by_step[i];
// Go through each cluster from the last step and find how many of its members went
// to each of the clusters in the next step.
last_step.forEach((members_last, cluster_last) => {
current_step.forEach((members_current, cluster_current) => {
const n_shared_nodes = intersection(members_last, members_current).length;
if(n_shared_nodes !== 0){
const source = `${cluster_last}_step${i-1}`;
cluster_nodes.add(source);

const target = `${cluster_current}_step${i}`;
cluster_nodes.add(target);

cluster_edges.push({source, target, value: n_shared_nodes})
}
})
})
}
return {links: cluster_edges, nodes: [...cluster_nodes].map(d => ({name: d}))}
}
Insert cell
Insert cell
sw_graph.all_nodes
Insert cell
sw_graph.edges
Insert cell
sw_graph = {
const sw_graph = new BiGraph('event', 'ind');
sw_graph.add_edges(edges);
return sw_graph;
}
Insert cell
class Node {
constructor(id, type){
// Unique identifier for this node
this.id = id;
// Type of the node of the two possible
this.type = type;
// Map containing all the nodes connected to this node
this.edge_map = new Map();
// How many total edges this node has
this.degree = 0;
// Cluster membership of this node
this.cluster = null;
}
// Takes node's edge map and a new edge node
// and either increments the edge count up if node is already
// connected, or adds a new edge if it's the first time.
set_edge(edge_node){
const new_edge_id = edge_node.id;
const existing_edge = this.edge_map.get(new_edge_id);
if(existing_edge){
existing_edge.count++;
} else {
this.edge_map.set(new_edge_id, {node: edge_node, count: 1});
}
// Increment the degree
this.degree += 1;
}
// Takes two nodes and adds an edge between the two of them
static new_edge(a_node, b_node){
a_node.set_edge(b_node);
b_node.set_edge(a_node);
}
// Returns an array of edges rather than the map format
get edges(){
return Array.from(this.edge_map.values());
}
// Get a random node connected to this node
get_random_edge(){
const random_connection_id = sample(Array.from(this.edge_map.keys()));
return this.edge_map.get(random_connection_id).node;
}

}
Insert cell
class BiGraph {
constructor(a_type, b_type){
this.a_type = a_type;
this.b_type = b_type;

// Object that holds the node maps for each type
this.nodes = {
[a_type]: new Map(),
[b_type]: new Map(),
};
}

// Add a single node to the graph
add_node({id, type, ...other_props}){
// Initialize a new node object and
// add to its node type's node map
this.nodes[type].set(
id,
new Node(id, type, other_props)
);
return this;
};

// Add multiple nodes at once
add_nodes(node_arr){
node_arr.forEach(node => this.add_node(node));
return this;
};
// Add an edge {type_a: a_id, type_b: b_id} to the graph
add_edge(edge){
// Check if the nodes in edge are already in the node map, if they arent, add them.
const [a_node, b_node] = [this.a_type, this.b_type].map(type => {
// Get node type's edge id
const id = edge[type];
const node_does_not_exist = !this.nodes[type].has(id);

if(node_does_not_exist){
this.add_node({id, type});
}

// Get node object
return this.nodes[type].get(id);
});

// Add each node as a edge for its connection
Node.new_edge(a_node, b_node);

return this;
};
// Add multiple edges at once
add_edges(edge_arr){
edge_arr.forEach(edge => this.add_edge(edge));
return this;
}
// Grabs an array of all the edges in terms of node object to node object and count
get edges(){
let all_edges = [];
this.nodes[this.a_type].forEach(a_node =>
a_node.edges.forEach(({node: b_node, count}) =>
all_edges.push({
[`${this.a_type}_id`]: a_node.id,
[`${this.b_type}_id`]: b_node.id,
[this.a_type]: a_node,
[this.b_type]: b_node,
count: count,
})
)
);
return all_edges
}
// Return all nodes of a given type in array
get_nodes(type){
return Array.from(this.nodes[type].values());
}
// Return all nodes of both types
get all_nodes(){
return [...this.get_nodes(this.a_type), ...this.get_nodes(this.b_type)];
}

get num_nodes(){
const n_a_nodes = this.nodes[this.a_type].size;
const n_b_nodes = this.nodes[this.b_type].size
return {
[this.a_type]: n_a_nodes,
[this.b_type]: n_b_nodes,
all: n_a_nodes + n_b_nodes,
}
}
static create(a_type, b_type){
return new this(a_type, b_type)
}


}
Insert cell
Insert cell
Insert cell
height = 400
Insert cell
Insert cell
sample = arr => arr[gen_discrete_unif(0,arr.length-1)]
Insert cell
// Do a psuedo deep copy on an array of objects.
import {copy_array} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {product} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {gen_discrete_unif} from "@nstrayer/javascript-statistics-snippets@84"
Insert cell
import {init_array} from "@nstrayer/javascript-statistics-snippets@84"
Insert cell
import {weighted_sample} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {unique} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {normalize} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {flatten} from "@nstrayer/javascript-statistics-snippets"
Insert cell
import {intersection} from "@nstrayer/javascript-statistics-snippets"
Insert cell
southern_women = [
{"event":"event_0","individual":"ind_0"},{"event":"event_1","individual":"ind_0"},{"event":"event_2","individual":"ind_0"},{"event":"event_3","individual":"ind_0"},{"event":"event_4","individual":"ind_0"},{"event":"event_5","individual":"ind_0"},{"event":"event_7","individual":"ind_0"},{"event":"event_8","individual":"ind_0"},{"event":"event_0","individual":"ind_1"},{"event":"event_1","individual":"ind_1"},{"event":"event_2","individual":"ind_1"},{"event":"event_4","individual":"ind_1"},{"event":"event_5","individual":"ind_1"},{"event":"event_6","individual":"ind_1"},{"event":"event_7","individual":"ind_1"},{"event":"event_1","individual":"ind_2"},{"event":"event_2","individual":"ind_2"},{"event":"event_3","individual":"ind_2"},{"event":"event_4","individual":"ind_2"},{"event":"event_5","individual":"ind_2"},{"event":"event_6","individual":"ind_2"},{"event":"event_7","individual":"ind_2"},{"event":"event_8","individual":"ind_2"},{"event":"event_0","individual":"ind_3"},{"event":"event_2","individual":"ind_3"},{"event":"event_3","individual":"ind_3"},{"event":"event_4","individual":"ind_3"},{"event":"event_5","individual":"ind_3"},{"event":"event_6","individual":"ind_3"},{"event":"event_7","individual":"ind_3"},{"event":"event_2","individual":"ind_4"},{"event":"event_3","individual":"ind_4"},{"event":"event_4","individual":"ind_4"},{"event":"event_6","individual":"ind_4"},{"event":"event_2","individual":"ind_5"},{"event":"event_4","individual":"ind_5"},{"event":"event_5","individual":"ind_5"},{"event":"event_7","individual":"ind_5"},{"event":"event_4","individual":"ind_6"},{"event":"event_5","individual":"ind_6"},{"event":"event_6","individual":"ind_6"},{"event":"event_7","individual":"ind_6"},{"event":"event_5","individual":"ind_7"},{"event":"event_7","individual":"ind_7"},{"event":"event_8","individual":"ind_7"},{"event":"event_4","individual":"ind_8"},{"event":"event_6","individual":"ind_8"},{"event":"event_7","individual":"ind_8"},{"event":"event_8","individual":"ind_8"},{"event":"event_6","individual":"ind_9"},{"event":"event_7","individual":"ind_9"},{"event":"event_8","individual":"ind_9"},{"event":"event_11","individual":"ind_9"},{"event":"event_7","individual":"ind_10"},{"event":"event_8","individual":"ind_10"},{"event":"event_9","individual":"ind_10"},{"event":"event_11","individual":"ind_10"},{"event":"event_7","individual":"ind_11"},{"event":"event_8","individual":"ind_11"},{"event":"event_9","individual":"ind_11"},{"event":"event_11","individual":"ind_11"},{"event":"event_12","individual":"ind_11"},{"event":"event_13","individual":"ind_11"},{"event":"event_6","individual":"ind_12"},{"event":"event_7","individual":"ind_12"},{"event":"event_8","individual":"ind_12"},{"event":"event_9","individual":"ind_12"},{"event":"event_11","individual":"ind_12"},{"event":"event_12","individual":"ind_12"},{"event":"event_13","individual":"ind_12"},{"event":"event_5","individual":"ind_13"},{"event":"event_6","individual":"ind_13"},{"event":"event_8","individual":"ind_13"},{"event":"event_9","individual":"ind_13"},{"event":"event_10","individual":"ind_13"},{"event":"event_11","individual":"ind_13"},{"event":"event_12","individual":"ind_13"},{"event":"event_13","individual":"ind_13"},{"event":"event_6","individual":"ind_14"},{"event":"event_7","individual":"ind_14"},{"event":"event_9","individual":"ind_14"},{"event":"event_10","individual":"ind_14"},{"event":"event_11","individual":"ind_14"},{"event":"event_7","individual":"ind_15"},{"event":"event_8","individual":"ind_15"},{"event":"event_8","individual":"ind_16"},{"event":"event_10","individual":"ind_16"},{"event":"event_8","individual":"ind_17"},{"event":"event_10","individual":"ind_17"}]
Insert cell
Insert cell
d3 = require("d3@5", "d3-sankey@0.12")
Insert cell
d3_arr = require("d3-array@^2.2")
Insert cell
vegalite = require("@observablehq/vega-lite@0.1")
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more