Public
Edited
Nov 28
Importers
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
load_indexes = {
load_compressed_embeddings;
load_pca_proj;
return "Loaded";
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
sources = {
({
prompt:
'list all files in "https://github.com/endpointservices/observable-notebooks"',
time: 1726857989123
});
fetchFromGithub; // Only if asked
const owner = "endpointservices";
const repo = "observable-notebooks";
const branch = "main";

const response = await fetch(
`https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`
);
const data = await response.json();
const files = data.tree
.filter((item) => item.type === "blob")
.map((item) => item.path)
.filter((item) => item.endsWith(".js"));
return files;
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
parsedContent = {
const filedata = await kb.query(`SELECT content, path FROM files LIMIT 100;`);
await kb.query(`DROP TABLE files;`); // clear memory
return filedata.map(({ content, path }) => ({
parsed: acorn.parse(content, { ecmaVersion: 2022, sourceType: "module" }),
content,
path
}));
}
Insert cell
processedContent = ({
prompt:
"For each element of parsedContent, add a list of top level function declarations, and slice the content to product a string under a key cells.",
time: 1726905252540
} &&
parsedContent.map((item) => ({
cells: item.parsed.body
.filter((node) => node.type === "FunctionDeclaration")
.map((fn, i) => ({
id: i,
name: fn.id.name,
code: item.content.slice(fn.start, fn.end),
fn
})),
export: item.parsed.body
.filter((node) => node.type === "ExportDefaultDeclaration")
.map((fn, i) => ({
code: item.content.slice(fn.start, fn.end),
fn
}))[0],

...item
})))
Insert cell
Insert cell
Insert cell
highlight(extract_definitions(processedContent[1]))
Insert cell
linkedContent = processedContent.map((content) => {
const definitions = extract_definitions(content);
content.cells.forEach((cell) => {
if (definitions[cell.name]) {
cell.cell_name = definitions[cell.name].name;
cell.dependencies = definitions[cell.name].dependencies;
}
});
return {
definitions,
...content
};
})
Insert cell
Insert cell
create_tables = {
({
prompt:
"Now insert into the database a tables called notebooks, and cells which contain the all the information in cells. \n\nNotebooks should contain the path (thats their primary key)\n\nCells should contain a reference to their notebook (foreign key), their id, their cell_name, their code.\n\nCross dependancies between cells should be in a separate table called deps",
time: 1726936221270
});
const kb = await DuckDBClient.of({});
kb.query("SET memory_limit = '14GB';");
kb.query(`
CREATE TABLE IF NOT EXISTS notebooks (
path TEXT PRIMARY KEY
);`);

kb.query(`CREATE TABLE IF NOT EXISTS cells (
id INTEGER,
cell_name TEXT,
code TEXT,
notebook_path TEXT,
FOREIGN KEY (notebook_path) REFERENCES notebooks(path),
PRIMARY KEY (id, notebook_path)
);`);

kb.query(`CREATE TABLE IF NOT EXISTS deps (
cell_id INTEGER,
cell_path TEXT,
depends_on TEXT,
PRIMARY KEY (cell_id, cell_path, depends_on),
FOREIGN KEY (cell_id, cell_path) REFERENCES cells(id, notebook_path)
);`);
return kb;
}
Insert cell
Insert cell
{
insertLinks;
viewof populated_kb.value = create_tables;
viewof populated_kb.dispatchEvent(new Event("input"));
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
viewof cell_file = Inputs.file({
label: "Upload cell file",
accept: ".zip",
required: true
})
Insert cell
cell_kb = importDuckDB(cell_file)
Insert cell
get_vector_embedding = ({
prompt:
"create a function call get vector embedding that will call OpenAI and retreive the vector embedding for a peice of text",
time: 1727006386551
} &&
async function get_vector_embedding(text) {
const response = await fetch("https://api.openai.com/v1/embeddings", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`
},
body: JSON.stringify({
input: text,
model: "text-embedding-3-small"
})
});
if (!response.ok) {
throw new Error(`Error fetching embedding: ${response.statusText}`);
}
const data = await response.json();
return data.data[0].embedding;
})
Insert cell
create_embeddings_table = ({
prompt:
'create an embeddings table with column "text-embedding-3-small" that is a vec FLOAT[1536] and has a foreign key to cells',
time: 1727006849001
} &&
(await cell_kb.query(`
CREATE TABLE IF NOT EXISTS cell_embeddings (
cell_id INTEGER,
cell_path TEXT,
"text-embedding-3-small" FLOAT[1536],
PRIMARY KEY (cell_id, cell_path),
FOREIGN KEY (cell_id, cell_path) REFERENCES cells(id, notebook_path)
);
`)))
Insert cell
cellsWithoutEmbedding = {
create_embeddings_table;
return await cell_kb.query(`
SELECT c.id, c.cell_name, c.code, c.notebook_path
FROM cells c
LEFT JOIN cell_embeddings ce
ON c.id = ce.cell_id AND c.notebook_path = ce.cell_path
WHERE ce."text-embedding-3-small" IS NULL
`);
}
Insert cell
insert_embeddings = {
({
prompt:
'Now insert an embedding for every cell that does not already have an embedding based on its "code"',
time: 1727007112473
});

for (const cell of cellsWithoutEmbedding) {
const embedding = await get_vector_embedding(cell.code);
await cell_kb.query(`
INSERT INTO cell_embeddings (cell_id, cell_path, "text-embedding-3-small")
VALUES (${cell.id}, '${cell.notebook_path}', [${embedding}])`);
}
}
Insert cell
Insert cell
embedding_db
SELECT * FROM cells
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
viewof cutoff = Inputs.range([1, variance?.length], { value: 200, step: 1 })
Insert cell
Insert cell
means = pca ? new Float32Array(pca.means) : base64ToFloat32(load_pca_proj.means)
Insert cell
Insert cell
proj = pca
? new Float32Array(pca.U.data.map((row) => [...row.slice(0, cutoff)]).flat())
: base64ToFloat32(load_pca_proj.proj)
Insert cell
create_compressed_table = Promise.all([
embedding_db.query(`
CREATE TABLE IF NOT EXISTS compressed_embeddings (
cell_id INTEGER,
cell_path TEXT,
"pca" FLOAT[${cutoff}],
PRIMARY KEY (cell_id, cell_path),
FOREIGN KEY (cell_id, cell_path) REFERENCES cells(id, notebook_path)
);
`)
])
Insert cell
insert_compressed_embeddings = {
({
prompt:
'Now insert an embedding for every cell that does not already have an embedding based on its "code"',
time: 1727007112473
});
create_compressed_table;
return Promise.all([
...embeddings.map((cell, i) =>
embedding_db.query(`
INSERT INTO compressed_embeddings (cell_id, cell_path, pca)
VALUES (${cell.cell_id}, '${cell.cell_path}', [${embedding_to_pca(
[...cell["text-embedding-3-small"]],
means,
proj
)}])`)
)
]);
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more