Public
Edited
Nov 15
Paused
2 forks
Importers
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// clusterHDBScanChart = {
// const edges = vl
// .markLine({ filled: false })
// .encode(
// vl.x().fieldQ("edge[0][0]"),
// vl.y().fieldQ("edge[0][1]"),
// vl.x2().fieldQ("edge[1][0]"),
// vl.y2().fieldQ("edge[1][1]")
// )
// .width(600)
// .height(400)
// .data(hdbscanClusters);

// const boxes = vl
// .markRect({ filled: false })
// .encode(
// vl.x().fieldQ("bbox.minX"),
// vl.y().fieldQ("bbox.minY"),
// vl.x2().fieldQ("bbox.maxX"),
// vl.y2().fieldQ("bbox.maxY")
// )
// .width(Math.max(320, width - 100))
// .height(500)
// .data(hdbscanClusters);

// const mapClusters = new Map(
// hdbscanClusters.map((d, c) => d.opt.map((e) => [e, c])).flat()
// );

// const points = vl
// .markPoint({ tooltip: true })
// .encode(
// vl.x().fieldQ("x"),
// vl.y().fieldQ("y"),
// vl.shape().fieldN("type"),
// vl.size().fieldQ("score"),
// vl.tooltip(["title", "authors", "abstract", "id", "score", "cluster"]),
// vl
// .color()
// .if("datum.cluster == null", vl.value("#ccc7"))
// .fieldQ("cluster")
// .scale({ scheme: "turbo" })
// )
// .data(
// dataToPlotWithClusters.map((d) => ({
// ...d,
// cluster: mapClusters.get(d.id)
// }))
// );

// return vl
// .layer(
// ...[
// edges,
// clusteringSpace === "Reduced Space" ? boxes : null,
// points
// ].filter((d) => d)
// )
// .render();
// }
Insert cell
Insert cell
clusteredTreeViz = Tree(convertTree(hdbScanClusteredTree), {
height: 400,
width,
fit: true,
r: 1,
tree: d3.cluster,
sort: (a,b) => d3.descending(a.data.data.length, b.data.data.length)
})
Insert cell
paperClicked = drSelection.clicked
Insert cell
papersHighlighted = drSelection.brushed
Insert cell
sessionsToAttend = {
// TODO not working until we have dates
query;
semanticSearchResults;
return d3
.groups(
navioSelected.filter((d) => d.score > minScore/100),
// (v) => v.length,
(d) => d.timestamp.toLocaleDateString(...dateOptions),
(d) =>
`${d.timestamp.toLocaleTimeString(...dateOptions)} ${d.roomName} - ${d.sessionName}`
)
.sort((a, b) => d3.ascending(a[0], b[0]))
.map(([g, v]) => [g, v.sort((a, b) => d3.ascending(a[0], b[0]))]);
}
Insert cell
dateOptions = ["en-US", { timeZone: "utc" }]
Insert cell
function getVegaView(dataToPlot, options) {
const {
interactive,
colorScheme,
color,
shape,
x,
y,
size,
id,
tooltip,
colorDomain,
title,
shapeDomain
} = options;

// Make it depend on clustering
clusters;

let colorField;

if (computeClusters) {
colorField = vl
.color()
.if("datum.cluster == undefined", vl.value("#ccc7"))
.fieldN(color)
.scale({
domain: colorDomain,
scheme: colorScheme || options.colorSchemeQuantitative
});
} else {
colorField = vl
.color()
.if("datum.cluster == undefined", vl.value("#ccc7"))
.fieldQ(color)
.scale({
domain: colorDomain,
scheme: colorScheme || options.colorSchemeQuantitative
});
}

let base = vl
.markPoint({ opacity: 0.6, filled: true })
.encode(vl.x().fieldQ(x).axis(null), vl.y().fieldQ(y).axis(null))
.data(dataToPlot.filter((d) => d.type !== "keyword"));
let baseWithParams;

if (color) base = base.encode(colorField);

if (size)
base = base.encode(
vl.size().fieldQ(size).scale({ range: options.sizeRange }),
vl.order().fieldQ(size)
);
if (shape) {
let shapeEncoding = vl.shape().fieldN(shape);

if (shapeDomain) {
shapeEncoding = shapeEncoding.scale({ domain: shapeDomain });
}
base = base.encode(shapeEncoding);
}
const boxes = vl
.markRect({ filled: false })
.encode(
vl.x().fieldQ("bbox.minX"),
vl.y().fieldQ("bbox.minY"),
vl.x2().fieldQ("bbox.maxX"),
vl.y2().fieldQ("bbox.maxY")
)
// .width(Math.max(320, width - 100))
// .height(500)
.data(hdbscanClusters);

// console.log("chart", chart.toObject());

const keywords = vl
.markText({ fontSize: 8, tooltip: true, opacity: 0.7 })
.encode(vl.x().fieldQ("x"), vl.y().fieldQ("y"), vl.text().fieldN("title"))
.data(dataToPlot.filter((d) => d.type === "keyword"));

const events = "mouseover,pointerover,touchmove,click";
if (interactive) {
// console.log("vega interactive!", interactive);
const hover = vl
.selectSingle("hover")
.nearest(true)
.on(events)
.clear("none")
.init({ x: [], y: [] });
const drag = vl.selectInterval("drag");

const click = vl
.selectPoint("click")
.fields([id])
.nearest(true)
.on("click")
.init({ id: [] });

const shiftClick = vl
.selectPoint("shiftClick")
.fields([id])
.on("click[event.altKey]")
.toggle("false")
.init({ id: [] });

baseWithParams = base.params(click, hover, drag, shiftClick).encode(
vl
.stroke()
.condition({ param: "click", value: "black", empty: false })
// .condition({param: "hover", value: "grey", empty: false})
.value(null),
// vl.size().if(vl.or(hover, drag), vl.value(80)).value(50),

vl.tooltip(tooltip)
);

if (color)
baseWithParams = baseWithParams.encode(
vl
.color()
// .if(computeClusters ? "!datum.cluster": "false", vl.value("grey"))
.if(vl.or(hover, drag), colorField)
.value("grey")
);
}

let chart = vl.layer(
...[
baseWithParams,
clusteringSpace === "Reduced Space" && clusteringType === "HDBScan"
? boxes
: null,
includeKeywords ? keywords : null
].filter((d) => d)
);
// console.log("chart", dataToPlot, chart.toObject());

return chart.title(title).width(options.width).height(options.height);
}
Insert cell
function vegaSpecWrapper(spec) {
// const edges = vl
// .markLine({ filled: false })
// .encode(
// vl.x().fieldQ("edge[0][0]"),
// vl.y().fieldQ("edge[0][1]"),
// vl.x2().fieldQ("edge[1][0]"),
// vl.y2().fieldQ("edge[1][1]")
// )
// .width(600)
// .height(400)
// .data(hdbscanClusters);

const boxes = vl
.markRect({ filled: false })
.encode(
vl.x().fieldQ("bbox.minX"),
vl.y().fieldQ("bbox.minY"),
vl.x2().fieldQ("bbox.maxX"),
vl.y2().fieldQ("bbox.maxY")
)
.width(Math.max(320, width - 100))
.height(500)
.data(hdbscanClusters);

// const mapClusters = new Map(
// hdbscanClusters.map((d, c) => d.opt.map((e) => [e, c])).flat()
// );

// const hello = vl
// .markText()
// .encode(
// vl.text().fieldN("text"),
// // vl.x().fieldQ("x"),
// // vl.y().fieldQ("y")
// )
// .data([{ text: "Hello", type: "text", x: 0, y: 0 }]);

// const points = vl
// .markPoint()
// .encode(
// vl.x().fieldQ("x"),
// vl.y().fieldQ("y"),
// vl.shape().fieldN("type"),
// vl.size().fieldQ("score"),
// vl.tooltip(["title", "authors", "abstract", "id", "score", "cluster"]),
// vl
// .color()
// .if("datum.cluster == null", vl.value("#ccc7"))
// .fieldQ("cluster")
// .scale({ scheme: "turbo" })
// )
// .data(dataToPlot.map((d) => ({ ...d, cluster: mapClusters.get(d.id) })));

const keywords = spec
.markText()
.encode(
vl.x().fieldQ("x").axis(null),
vl.y().fieldQ("y").axis(null),
vl.text().fieldN("title")
)
.data(dataToPlot.filter((d) => d.type === "keyword"));

const chart = vl.layer(
[
// computeClusters && clusteringType === "HDBScan" && boxes,
spec,
keywords,
].filter((d) => d)
);

console.log("chart", chart.toObject());
return chart;
}
Insert cell
roomsToGo = {
// TODO not working until we have dates
query;
semanticSearchResults;
return d3
.groups(
navioSelected.filter((d) => d.score > minScore / 100),
// (v) => v.length,
(d) => d.timestamp.toLocaleDateString(...dateOptions),
(d) => `${d.roomName}`
// (d) => d.title
)
.sort((a, b) => d3.ascending(a[0], b[0]))
.map(([g, v]) => [g, v.sort((a, b) => b[1].length - a[1].length)]);
}
Insert cell
Insert cell
viewof maxPapersSpinner = Inputs.bind(htl.html`<input type="number" style="width: 3rem" min=1 value=10>`, viewof maxPapers)
Insert cell
Insert cell
Insert cell
navioSelected
Insert cell
Insert cell
// navio(embeddings.map(d => d.data), {attribWidth: 2, height: 600})
Insert cell
Insert cell
Insert cell
Insert cell
computeEmbeddings && DOM.download(new Blob(
[JSON.stringify(embeddings_computed)],
{type: "application/json"}
), "embeddings.json", "Download Embeddings")
Insert cell
Insert cell
viewof embeddings_computed = {
if (computeEmbeddings) {
const before = performance.now();
restart;
const values = [];
const bar = progress({ interval: 50, invalidation });
yield bar;
let i =0;
for (let row of sample) {
values.push({id: row.id,
embedding: await extractor(embeddingAccessor(row), extractorOptions)
});
await bar.progress(i++, sample.length);
// console.log("i", i);
}
console.log("progress completed in", performance.now() - before);
bar.resolve(values);
} else {
// Not computing embeddgins, return nothing
yield pleaseEnableComputeEmbeddings();
}
}
Insert cell
embeddings = {
if (computeEmbeddings) {
return embeddings_computed;
} else {
return embeddings_cached;
}
}
Insert cell
embeddings_cached = (embeddingsFileSelected === "Title and Abstract Full + TVCG"
? await FileAttachment(
"2024_10Oct_27_vis24_embeddings_full_tvcg_keywords.json"
)
: // (
// await FileAttachment(
// "2024_10Oct_11_vis24_embeddings_full_tvcg_keywords.json"
// ).zip()
// ).file("2024_10Oct_11_vis24_embeddings_full_tvcg_keywords.json")
// TODO add other cached embeddgins here
null
)
.json()
.then((res) => {
for (let row of res) {
row.embedding.data = new Float32Array(
Array.from(Object.values(row.embedding.data))
);
// newRes.push({
// data: new Float32Array(Array.from(Object.values(row.data)))
// });
}
return res;
})
Insert cell
viewof embeddingsFileSelected = Inputs.select(
[
"Title and Abstract Full + TVCG",
],
{ label: "Procomputed embeddings file"}
)
Insert cell
embeddingAccessor = (d) =>
// [d.paper_type].join("\n\n")
computeEmbeddingsUsing.map(a => d[a]).join("\n\n")
Insert cell
embeddingsHash = {
console.log("computing embHash");
return new Map(embeddings.map((d, i) => [d.id, d.embedding]))
}
Insert cell
Insert cell
onClustersChangeColor = {
if (computeClusters === true) {
viewof colorBy.value = "cluster";
viewof colorBy.dispatchEvent(new CustomEvent("input"), { bubbles: true });
viewof colorScheme.value = "turbo";
viewof colorScheme.dispatchEvent(new CustomEvent("input"), { bubbles: true });
} else {
viewof colorBy.value = "score";
viewof colorBy.dispatchEvent(new CustomEvent("input"), { bubbles: true });
viewof colorScheme.value = "brownbluegreen";
viewof colorScheme.dispatchEvent(new CustomEvent("input"), { bubbles: true });
}
}
Insert cell
function renderItem(p) {
return htl.html`
<div style="width: 150px; flex: 1; padding-right: 10px; padding-bottom: 15px">
<strong><a href=${p.url}>${p.title}</a></strong>
<div>Similarity score: ${(p.score * 100).toFixed(2)}% ${
p.awards ? p.awards : ""
}</div>
${p.has_image ? htl.html`<img width= "150px" src="https://ieeevis.b-cdn.net/vis_2024/paper_images/${p.UID}_Image.png" alt="${p.image_caption}"/>` : ""}
<div style="font-style: italic; max-height: 4em; overflow: auto;">${
p.authorNames
}</div>
<div>${p.type} - ${p.sessionName}</div>
<div style="margin-top: 0.5em; max-height: 70px; overflow: auto">${
p.abstract
}</div>
</div>
`;
}
Insert cell
facetedSelected.filter(d => d.title === "A Preliminary Roadmap for LLMs as Visual Data Analysis Assistants" )
Insert cell
facetedSelected = {
const hasKeywordSelected = (d) =>
!keywordsSelected.length ||
keywordsSelected.some((a) =>
d.keywordsList.includes(a.toLocaleLowerCase())
);

return facetedSelectedPhase1.filter(
(d) =>
typesSelected.includes(d.type) &&
(!authorsSelected.length ||
authorsSelected.some((a) => d.authorsList.includes(a))) &&
(!affiliationsSelected.length ||
affiliationsSelected.some((a) =>
d.affiliationsList.includes(a.toLocaleLowerCase())
)) &&
hasKeywordSelected(d)
);
}
Insert cell
papers.filter(d => !d.abstract)
Insert cell
typesSelected
Insert cell
function pleaseEnableComputeEmbeddings() {
return Object.assign(htl.html`Please enable compute embeddings`, {
value: []
});
}
Insert cell
sample = navioSelected
Insert cell
attrs = Object.keys(papers[0]).concat(["score"])
Insert cell
Insert cell
sample.map((d) => embeddingsHash.get(d.id)?.data)
Insert cell
reducerResultComputed = {
embeddings;
console.log(
"Launching DR Worker",
[...embeddingsHash.keys()].length,
sample.length,
sample.at(-1),
typesSelected
);
if (useCachedDR) return [];
restartDR;

console.log("👏🏻 Running DR");
try {
return DruidGenerator(
sample.map((d) => embeddingsHash.get(d.id)?.data),
druid_method,
{... druid_params, d: dimensionality}
);
} finally {
console.log("👏🏻 Completed DR");
mutable lastReducerTimestamp = performance.now();
}
// for await (const step of DruidGenerator(
// sample.map((d) => embeddingsHash.get(d.id).data),
// druid_method,
// druid_params
// )) {
// yield step;
// mutable lastReducerTimestamp = performance.now();
// }
}
Insert cell
Insert cell
oneD = DruidGenerator(
sample.map((d) => embeddingsHash.get(d.id)?.data),
druid_method,
{ ...druid_params, d: 1 }
)
Insert cell
vl
.markCircle({ tooltip: { data: true } })
.encode(
vl.y().fieldQ("pos"),
vl.x().fieldN("cluster").sort(vl.median("y")),
vl.color().fieldO("cluster")
)
.height(900)
.width(width-100)
.data(dataToPlotWithClusters.map((d, i) => ({ ...d, pos: oneD[i] })))
.render()
Insert cell
druid_params
Insert cell
// To keep track of last time the reducer computed
mutable lastReducerTimestamp = performance.now()
Insert cell
reducerResult = {
const res = useCachedDR
? reducerResultCached
: Object.fromEntries(
sample.map((d, id) => [d.id, reducerResultComputed[id]])
);

for (let p in res) {
if (res[p]?.length === 1) {
// for one dimensional reduction
res[p] = [0, res[p][0]];
}
}
return res;
}
Insert cell
{
// DOWNLOAD UMAP
if (!reducerResultComputed.length)
return html`Enable compute UMAP to download`;

return sample.map((d, i) => {
const [x, y] = reducerResultComputed[i];
return { id: d.id, x, y };
});
}
Insert cell
reducerResultCached = FileAttachment("2024_10Oct_27_vis24_umap_full_tvcg_keywords.csv")
.csv({ typed: true })
.then((res) => Object.fromEntries(res.map((d) => [d.id, [d.x, d.y]])))
Insert cell
dataToPlot = {
return (semanticSearchResults ? semanticSearchResults : navioSelected).map(
(row, id) => ({
...row,
embedding: embeddingsHash.get(row.id)?.data,
x: reducerResult[row.id] && reducerResult[row.id][0],
y: reducerResult[row.id] && reducerResult[row.id][1]
})
);
}
Insert cell
dataToPlotWithClusters = {
console.log("🏘️ Recompute dataToPlotWithClusters", clusters);
clusters;
return dataToPlot;
}
Insert cell
Insert cell
semanticSearchResults = cosineSimilarity(query, sample)
Insert cell
// https://github.com/xenova/transformers.js/blob/main/examples/semantic-image-search-client/src/app/worker.js#L46
async function cosineSimilarity(query, data) {
if (!query) {
for (let row of data) {
row.score = 0;
}
return;
}

const query_embeds = (await extractor(query, extractorOptions)).data;

// const database_embeds = sample.map((d) => embeddingsHash.get(d.id).data);

for (let row of data) {
const dbVector = embeddingsHash.get(row.id)?.data;
if (dbVector) {
row.score = pairCosineSimilarity(query_embeds, dbVector);
} else {
console.log("no embedding found for", row.id);
}
}

return data;
}
Insert cell
// Computes the cosine similarity between two embeddings

function pairCosineSimilarity(embeddingA, embeddingB) {
const EMBED_DIM = embeddingA.length;
let dotProduct = 0;
let normEmbeds = 0;
let normDB = 0;

for (let j = 0; j < EMBED_DIM; ++j) {
const embedValue = embeddingA[j];
const dbValue = embeddingB[j];

dotProduct += embedValue * dbValue;
normEmbeds += embedValue * embedValue;
normDB += dbValue * dbValue;
}

const score = dotProduct / (Math.sqrt(normEmbeds) * Math.sqrt(normDB));
return score;
}
Insert cell
function cosineMatrix(embeddings) {
const cosineMatrix = Array.from({ length: embeddings.length }, () =>
Array(embeddings.length).fill(0)
);
for (let i = 0; i < embeddings.length; ++i) {
for (let j = i + 1; j < embeddings.length; ++j) {
cosineMatrix[i][j] = pairCosineSimilarity(embeddings[i], embeddings[j]);
cosineMatrix[j][i] = cosineMatrix[i][j];
}
}

return cosineMatrix;
}
Insert cell
import {
DruidGenerator,
viewof druid_method,
viewof druid_params
} with { default_method } from "@john-guerra/druidjs-generator"
Insert cell
default_method = "UMAP"
Insert cell
transformers = import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.1")
Insert cell
Insert cell
extractor = await transformers.pipeline(
"feature-extraction",
"Xenova/all-MiniLM-L6-v2",
// "allenai/specter2",
// { quantize: false }
extractorOptions
)
Insert cell
visKeywords = (await FileAttachment("visKeywords@1.csv").csv()).filter(d => d.keyword)
Insert cell
extractorOptions = ({
pooling: "mean",
quantize: false,
// normalize: true,
// quantize: true,
// precision: "binary"
})
Insert cell
gDocsPapers = d3
.csv(
"https://docs.google.com/spreadsheets/d/e/2PACX-1vRB4np6KAnB8s1KkX7o8EYu0ii7aQyC3mOd40nj-h9R4j9Qt5cjImyZhyHkTQ_nirK4KZxyvPT_KQ5Z/pub?gid=481334436&single=true&output=csv",
{}
)
.then((res) => {
return res.map((d) => {
d.id = d.title;
d.keywordsList = d.keywords
.split(",")
.map((a) => a.trim().toLocaleLowerCase())
.filter((d) => d);
d.authorsList = d.authors
.split(",")
.map((a) => a.trim())
.filter((d) => d);
d.authorNames = d.authors;
d.affiliationsList =
d?.affiliations
?.split(", ")
.map((a) => a.trim().toLocaleLowerCase())
.filter((d) => d) || [];
return d;
});
})
Insert cell
visPapersGithub = FileAttachment("2024_10Oct_27_papers_from_github.json")
.json()
.then((res) => {
return res.map((d) => {
d.id = d.id;
d.keywordsList = d?.keywords?.length
? d.keywords
.map((a) => a.split(",").map((k) => k.trim().toLocaleLowerCase()))
.flat()
.filter((d) => d)
: [];
d.authorsList = d.authors.map((a) => a.name).filter((d) => d);
d.authorNames = d.authors.map((a) => a.name).join(", ");
d.affiliationsList =
d?.authors
.map((a) => a?.affiliations?.length && a.affiliations[0].trim())
.filter((d) => d) || [];
d.type = d.paper_type_name;
d.url = `https://ieeevis.org/year/2024/program/paper_${d.UID}.html`;
d.timestamp = new Date(d.time_stamp);
d.sessionName = d.session_title;
d.roomName = d.session_room;
return d;
});
})
Insert cell
visPapersGithub.filter(d => d.keywordsList?.length)
Insert cell
papersSource = visPapersGithub
// papersSource = gDocsPapers
Insert cell
papers = {
let res = papersSource;

if (includeKeywords) {
res = res.concat(
visKeywords
.map((k, i) => ({
title: k.keyword,
abstract: k.description,
authorList: [],
authors: "",
authorNames: "",
affiliationsList: [],
id: `keyword:${i}`,
type: "keyword"
}))
);
}
return res;
}
Insert cell
// for CHI
// papers = chiConference.contents.map((d) => {
// d.id = +d.id;
// d.sessions = d.sessionIds.map((s) => {
// const session = maps.get("sessions").get(s);
// session.room = maps.get("rooms").get(session.roomId);
// session.timeSlot = maps.get("timeSlots").get(session.timeSlotId);
// return session;
// });
// d.sessionName = d.sessions[0].name;
// d.roomName = d.sessions[0].room?.name;
// d.timestamp = new Date(d.sessions[0].timeSlot?.startDate);
// d.sessionNameWord = d.sessions[0].name.split(" ")[0];
// d.type = maps.get("tracks").get(d.trackId).name;
// d.authorsExpanded = d.authors.map((a) => maps.get("people").get(a.personId));
// d.authorsList = d.authorsExpanded.map((a) =>
// `${a.firstName?.trim()} ${a.lastName?.trim()}`.toLocaleLowerCase()
// );
// d.authorNames = d.authorsList.join(", ");
// d.affiliationsList = [
// ...new Set(
// d.authors
// .map((a) =>
// a.affiliations.map((f) => f.institution.trim().toLocaleLowerCase())
// )
// .flat()
// )
// ];
// d.url = `https://programs.sigchi.org/chi/2024/program/content/${d.id}`;


// return d;
// })
Insert cell
// maps = {
// const attrs = [
// // "conference",
// // "publicationInfo",
// "sponsors",
// "sponsorLevels",
// "floors",
// "rooms",
// "tracks",
// "contentTypes",
// "timeSlots",
// "sessions",
// "events",
// "contents",
// "people",
// "recognitions"
// ];

// const maps = new Map(
// attrs.map((a) => [a, new Map(chiConference[a].map((d) => [d.id, d]))])
// );
// return maps;
// }
Insert cell
// chiConference = (await FileAttachment("CHI_2024_program_may13.json.zip").zip()).file("CHI_2024_program_may13.json").json()
Insert cell
Insert cell
clusters = {
clusteringSpace;
maxClusters
papersPerClusterRange;
hdbScanClusteredTree;
if (!computeClusters) return null;
console.log("💳 Computing clusters ", clusteringType, clusteringSpace);

if (clusteringType === "Kmeans") {
return await clustersKmeans;
} else {
return await hdbscanClusters;
}
}
Insert cell
clustersKmeans = {
if (
!computeClusters ||
clusteringType !== "Kmeans"
)
return {};
const before = performance.now();
let dataset;

if (clusteringSpace === "Embeddings") {
dataset = navioSelected.map(({ id }) => embeddingsHash.get(id).data);
} else {
dataset = dataToPlot.map(({ x, y }) => [x, y]);
// dataset = Object.entries(reducerResult).map(([k, v]) => v);
}

const clusters = await kmeans.kmeans(dataset, maxClusters);
clusters.clusters.map((c, i) => {
dataToPlot[i].cluster = c;
});
console.log(
"🦜🦜🦜 Kmeans clusters computed in",
(performance.now() - before) / 1000
);
return clusters;
}
Insert cell
HDBScanJS = import('https://cdn.skypack.dev/hdbscanjs@1.0.12?min').then(res => res.default)
Insert cell
Insert cell
hdbScanClusteredTree?.dist || 1
Insert cell
// function enoughTimeSinceLastDRResult() {
// let diff;
// console.log(
// "⏰ enoughTimeSince edit",
// (diff = performance.now() - lastReducerTimestamp),
// diff > minTimeDeltaForClustering
// );
// return (
// performance.now() - lastReducerTimestamp > minTimeDeltaForClustering
// );
// }
Insert cell
hdbScanClusteredTree = {
reducerResult;


if (
!computeClusters ||
clusteringType != "HDBScan"
)
return {};
const before = performance.now();
console.log("🦜 Running HDBScan");

let dataset, distFunc;

if (clusteringSpace === "Embeddings") {
// Cluster embeddings
dataset = dataToPlot.map(({ id }) => ({
data: embeddingsHash.get(id).data,
opt: id
}));
distFunc = pairCosineSimilarity;
} else {
// two distance measure functions are supported:
// 1) euclidean
// 2) geoDist (take inputs as lonlat points)
distFunc = HDBScanJS.distFunc.euclidean;

// // Cluster DR
dataset = dataToPlot.map(({ id, x, y }) => ({ data: [x, y], opt: id }));
// dataset = Object.entries(reducerResult).map(([k, v]) => ({
// data: v,
// opt: k
// }));
}

const cluster = new HDBScanJS(dataset, distFunc);
const treeNode = cluster.getTree();
console.log("🦜 HDBScan ran in ", performance.now() - before);

return treeNode;
}
Insert cell
{
hdbScanClusteredTree;
console.log("⏰⏰⏰⏰")
}
Insert cell
maxClusteringDistance
Insert cell
hdbscanClusters = {
hdbScanClusteredTree;
console.log("🦜🤯 Trying to copy HDBScans clusters");
if (!computeClusters || clusteringType != "HDBScan") return [];

const filterCond = (val) =>
val.data.length >= papersPerClusterRange[0] &&
val.data.length < papersPerClusterRange[1] &&
val.dist <= maxClusteringDistance;

const hdbscanClusters = hdbScanClusteredTree.filter(filterCond);

const mapClusters = new Map(
hdbscanClusters.map((d, c) => d.opt.map((e) => [e, c])).flat()
);

dataToPlot.forEach((d) => {
d.cluster = mapClusters.get(d.id);
// d.cluster = d.cluster === null ? -1 : d.cluster;
});

console.log("🦜🤯 HDBScans clusters copied!");

return hdbscanClusters;
}
Insert cell
corpus = {
if (!computeClusters) return {};
const groupedClusters = hdbscanClusters.map((c, i) =>
c.opt.map((id) => mapPaperId.get(id))
);

return new tfidf.Corpus(
groupedClusters.map((_, i) => i),
groupedClusters.map((c) =>
c
.map(
(d) => d?.title
+ d?.abstract
)
.join("\n")
),
true,
[],
5,
0.9
);
}
Insert cell
groupedClusters = hdbscanClusters.map((c, i) =>
c.opt.map((id) => mapPaperId.get(id))
);
Insert cell
mapPaperId
Insert cell
mapPaperId = new Map(dataToPlot.map(d => [d.id, d]))
Insert cell
function convertTree(t) {
let children = [];
if (t?.left) children = [convertTree(t.left)];
if (t?.right) children = [...children, convertTree(t.right)];
return {
...t,
children
};
}
Insert cell
authors = [...new Set(papers.map(d => d.authorsList).flat())].sort()
Insert cell
affiliations = [
...new Set(
papers
.map((d) => d.affiliationsList.map((i) => i.toLocaleLowerCase()))
.flat()
)
].sort()
Insert cell
viewof initialAffiliations = {
const widget = PersistInput(
"affiliations",
Inputs.checkbox(affiliations, { maxHeight: "100px" })
);
widget.style.height = "100px";
widget.style.overflow = "scroll";
return widget;
}
Insert cell
viewof initialAuthors = {
const widget = await PersistInput(
"authors",
Inputs.checkbox(authors, { maxHeight: 100 })
);
widget.style.height = "100px";
widget.style.overflow = "scroll";
return widget;
}
Insert cell
import {PersistInput} from '@john-guerra/persist-input'

Insert cell
import {multiAutoSelect} from "@john-guerra/multi-auto-select@262"
Insert cell
tfidf = import('https://unpkg.com/tiny-tfidf@0.9.1/index.js?module')
Insert cell
import {Tree} from "@john-guerra/tree-fit"
// import {Tree} from "@d3/cluster"
// https://observablehq.com/@d3/cluster
Insert cell
Insert cell
// visPapers = FileAttachment("vis24a_camera_metadata_downloaded_20240717.json").json()
Insert cell
// visPapers.subs.map((d) => ({
// id: d.id,
// title: d.title,
// abstract: d.abstract,
// authors: d.authors
// .map((a) => `${a.author.first_name} ${a.author.last_name}`)
// .join(", "),
// contact: `${d.contact.first_name} ${d.contact.last_name}`,
// authorIds: d.authors.map((a) => a.author.id).join(", "),
// type: "Full",
// affiliations: d.authors
// .map((a) => `${a.affiliations[0].institution}`)
// .join(", ")
// }))
Insert cell
kmeans = import('https://cdn.skypack.dev/ml-kmeans@6.0.0?min')
Insert cell
import {interval} from '@mootari/range-slider'
Insert cell
import { vl } from "@vega/vega-lite-api-v5"
Insert cell
import {conditionalShow} from "@john-guerra/conditional-show"
Insert cell
import {FacetedSearch} from "@john-guerra/faceted-search"
Insert cell
import {progress} from "@mootari/displaying-progress"
Insert cell
import {navio} from "@john-guerra/navio"
Insert cell
import {scentedCheckbox} from "@john-guerra/scented-checkbox"
Insert cell
import { BrushableScatterPlot } with { getVegaView } from "@john-guerra/brushable-scatterplot"
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more