vector_search = ({
prompt:
"similarity search needs to return the cell_id, cell_path of the top matches",
time: 1727050000000
} &&
async function vector_search(query, options = {}) {
const n = options.n || 5;
const embedding_full = await get_vector_embedding(query);
const embedding = embedding_to_pca(embedding_full, means, proj);
const rows = await embedding_db.query(
`SELECT cell_id, cell_path, pca FROM compressed_embeddings`
);
function cosine_similarity(a, b) {
const dot = a.reduce((acc, val, i) => acc + val * b[i], 0);
const normA = Math.sqrt(a.reduce((acc, val) => acc + val * val, 0));
const normB = Math.sqrt(b.reduce((acc, val) => acc + val * val, 0));
return dot / (normA * normB);
}
const similarities = rows.map((row) => {
const sim = cosine_similarity(embedding, [...row.pca]);
return {
cell_id: row.cell_id,
cell_path: row.cell_path,
similarity: sim
};
});
similarities.sort((a, b) => b.similarity - a.similarity);
return Promise.all(similarities.slice(0, n).map(get_code));
})