Public
Edited
Feb 8, 2024
1 star
Insert cell
Insert cell
Insert cell
Plot.plot({
x: {round: true, axis: false},
color: {scheme: "BuRd"},
marks: [
Plot.barX(transformedArray, {
x: "dim",
fill: "value",
fy: "word",
inset: 0 // no gaps
})
]
})
Insert cell
viewof words = Inputs.checkbox(["toto", "chat", "chien", "roi", "reine", "data scientist"], {label: "Mot", value: ["toto"]})
Insert cell
Insert cell
bert = await transformers.pipeline('embeddings', 'Xenova/bert-base-cased');
Insert cell
transformers = import("https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/transformers.min.js")
Insert cell
async function compute_embedding(word){
const emb = await bert(word);
return emb
}
Insert cell
async function create_features_word(word) {
const features = compute_embedding(word);
const transformedArray = Array.from(features.data, (value, index) => ({
dim: index,
value: value,
word: word
}));
return transformedArray
}
Insert cell
async function create_features_words(words) {
// Map each word to a promise that resolves to its features transformed array
const promises = words.map(async (word) => {
const features = await bert(word);
return Array.from(features.data, (value, index) => ({
dim: index,
value: value,
word: word
}));
});
// Await all promises to resolve and then flatten the resulting arrays into a single array
const arrays = await Promise.all(promises);
const transformedArray = arrays.flat(); // Use flat() to concatenate all arrays into one
return transformedArray;
}
Insert cell
transformedArray = create_features_words(words)
Insert cell
Insert cell
word = "chat"
Insert cell
{
let tokenizer = await transformers.AutoTokenizer.from_pretrained('Xenova/bert-base-cased')
let model = await transformers.AutoModel.from_pretrained('Xenova/bert-base-cased')
let text = word
let encoded_input = await tokenizer(text)
let output = await model(encoded_input)
return output
}
Insert cell
function calcVectorSize(vec) {
return Math.sqrt(vec.reduce((accum, curr) => accum + Math.pow(curr, 2), 0));
};
Insert cell
function cosineSimilarity(vec1, vec2) {
const dotProduct = vec1.map((val, i) => val * vec2[i]).reduce((accum, curr) => accum + curr, 0);
const vec1Size = calcVectorSize(vec1);
const vec2Size = calcVectorSize(vec2);

return dotProduct / (vec1Size * vec2Size);
};
Insert cell
async function compute_similarity(word1, word2){
let emb_1 = await compute_embedding(word1)
let emb_2 = await compute_embedding(word2)
const similarity = cosineSimilarity(emb_1.data, emb_2.data)
return [similarity, emb_1, emb_2]
}

Insert cell
compute_similarity('foot', 'rugby')
Insert cell
compute_similarity('foot', 'python')
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more