Public
Edited
Oct 10, 2023
Fork of Worker
Importers
5 stars
Insert cell
Insert cell
Insert cell
resultN = getEmbeddings(
d3.range(1000).map((i) => `${i}`),
{ numChunks: navigator.hardwareConcurrency / 3 }
)
Insert cell
resultN["_time"]
Insert cell
resultN_2 = getEmbeddings(
d3.range(1000).map((i) => `${i}`),
{ numChunks: navigator.hardwareConcurrency / 3, model: "Xenova/gte-small" }
)
Insert cell
resultN_2["_time"]
Insert cell
example_embeddings = getEmbeddings(["foo", "bar", "blah"])
Insert cell
viewof druid_method = druidMethodSelector()
Insert cell
reduceEmbeddings(druid_method, resultN)
Insert cell
Insert cell
function druidMethodSelector() {
return Inputs.select(
[
"UMAP",
"TSNE",
"TriMap",
"PCA",
"LLE",
"LTSA",
"ISOMAP",
"FASTMAP",
"MDS",
"LSP",
/* "LDA", // LDA needs labels, skip this example */
"TopoMap"
].sort(d3.ascending),
{
label: "Please pick a Dimensionality Reduction Method.",
value: "UMAP"
}
);
}
Insert cell
// based on https://observablehq.com/@fil/druidjs-worker
async function reduceEmbeddings(druid_method, embeddings) {
const druidjs_url = await require.resolve("@saehrimnir/druidjs@^0.7.3");
const text = workertext(function* ({ dr_method, data, dynamic }) {
// see https://github.com/saehm/DruidJS/issues/34#issuecomment-1671538179
// and https://github.com/saehm/DruidJS/issues/13#issuecomment-718252666
try {
yield self.druid[dr_method].transform(data, {}); // show the end result
} catch (e) {
yield e;
}
}, `importScripts(${JSON.stringify(druidjs_url)});`);

const w = worker(text, {
dr_method: druid_method,
data: embeddings
});

const p = new Promise((resolve, reject) => {
w((result) => {
resolve(result);
});
invalidation.then(() => reject());
});
return p;
}
Insert cell
Insert cell
Insert cell
async function getEmbeddings(data, { numChunks = 1, ...restOptions } = {}) {
if (numChunks <= 1) {
return internal_getEmbeddings(data, restOptions);
}

const t0 = performance.now();
const r = await Promise.all(
chunkArray(data, numChunks).map((i) =>
internal_getEmbeddings(i, restOptions)
)
);
return Object.assign(r.flat(), { _time: performance.now() - t0 });
}
Insert cell
function internal_getEmbeddings(strings, { model = "Xenova/all-MiniLM-L6-v2" } = {}) {
const w = worker(
`
import * as transformers from 'https://unpkg.com/@xenova/transformers@2.5.4/dist/transformers.min.js'

function isIterable(obj) {
return (
typeof obj[Symbol.iterator] === "function" &&
typeof obj["next"] == "function"
);
}

const __run__ = async function (data) {
const extractor = await transformers.pipeline(
"feature-extraction",
"${model}"
)
const result = await extractor(data[0], {
pooling: "mean",
normalize: true
});

const embeddings = await Promise.all(
data.map((i) =>
extractor(i, {
pooling: "mean",
normalize: true
})
)
)

return embeddings.map((i) => i.data)
};

self.onmessage = async function(e) {
const t0 = performance.now();
let result = await __run__(e.data);
if (typeof result !== "undefined") {
if (!isIterable(result)) result = [result];
for (const p of result) {
postMessage(typeof p !== "object" ? p : Object.assign(p, {_time: performance.now() - t0}));
}
close();
}
}
`,
strings,
null,
{ type: "module" }
);

const p = new Promise((resolve, reject) => {
w((result) => {
resolve(result);
});
invalidation.then(() => reject());
});
return p;
}
Insert cell
worker = function (thetext, initialData, transferList, workerOptions = {}) {
const b = new Blob([thetext], { type: "text/javascript" });
return function (notify) {
const url = URL.createObjectURL(b);
const worker = new Worker(url, workerOptions);
worker.addEventListener("message", (r) => notify(r.data));
worker.postMessage(initialData, transferList);
return () => {
worker.terminate();
URL.revokeObjectURL(url);
};
};
}
Insert cell
// https://observablehq.com/@fil/druidjs-worker
// just like Observable, we don't want to iterate on arrays
// so we check if our iterator has a method .next()
function workertext(f, preamble = "") {
return `
${preamble}

function isIterable(obj) {
return (
typeof obj[Symbol.iterator] === "function" &&
typeof obj["next"] == "function"
);
}

const __run__ = ${typeof f === "function" ? function_stringify(f) : f};

self.onmessage = async function(e) {
const t0 = performance.now();
let result = await __run__(e.data);
if (typeof result !== "undefined") {
if (!isIterable(result)) result = [result];
for (const p of result) {
postMessage(typeof p !== "object" ? p : Object.assign(p, {_time: performance.now() - t0}));
}
close();
}

}`;
}
Insert cell
// https://observablehq.com/@fil/druidjs-worker
// On iOS, [generator].toString() doesn't give "function*" but "function". Fix this.
function function_stringify(f) {
let g = f.toString();
if (f.prototype && f.prototype.toString() === "[object Generator]")
g = g.replace(/function\*?/, "function*");
return g;
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more