Public
Edited
Apr 21
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
openai = new OpenAI({
apiKey: OPENAI_API_KEY,
// We only set dangerouslyAllowBrowser to true here because the Observable notebook runs locally.
// This is safe because this notebook doesn't store your API key in persistent memory.
// If you run this elsewhere, you should remove dangerouslyAllowBrowser.
dangerouslyAllowBrowser: true
})
Insert cell
scorecard = new ScorecardClient({
apiKey: SCORECARD_API_KEY
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// Create a Testset containing multiple Testcases and add the Testset to a project.
async function createTestset(projectId, testsetName, testcases) {
if (!projectId) throw Error("projectId is required");
if (!testsetName) throw Error("testsetName is required");
if (!testcases.length) throw Error("Expected to have some testcases");

const testset = await scorecard.testset.create({
name: testsetName,
description: "Testset demo created using the SDK on Observable",
projectId
});

// Since this is in a Promise.all(), we create the testcases in arbitrary order.
await Promise.all(
testcases.map((testcase) => scorecard.testcase.create(testset.id, testcase))
);

return testset;
}
Insert cell
Insert cell
// This button is just for the Observable notebook.
// In a non-notebook, you can just do: `const testset = createTestset(projectId, "Emojis", emojiTestcases)`
viewof testset = Inputs.button("Create testset in Scorecard", {
value: null,
reduce: () => createTestset(projectId, "Emojis SDK", emojiTestcases),
// Prevent clicking this without a Scorecard API key or a projectId
disabled: !SCORECARD_API_KEY || !projectId
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
// This is our "system under test". It runs GPT-4o mini on a given system prompt and user query.
async function runModel(systemPrompt, userQuery) {
if (!systemPrompt) throw Error("systemPrompt is required");
if (!userQuery) throw Error("userQuery is required");

const chatCompletion = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{
role: "system",
content: systemPrompt
},
{ role: "user", content: userQuery }
]
});

return chatCompletion.choices[0].message.content;
}
Insert cell
// Runs model with systemPrompt on the given Testcase and records the result as part of a Scorecard run.
async function runTestcase(systemPrompt, run, testcase) {
// Run model
const modelResponse = await runModel(systemPrompt, testcase.userQuery);
// Record model response in Scorecard
await scorecard.testrecord.create(run.id, {
testcaseId: testcase.id,
testsetId: run.testsetId,
userQuery: testcase.userQuery,
context: testcase.context,
ideal: testcase.ideal,
response: modelResponse
});
}
Insert cell
// Runs all testcases in a given Testset and scores them with the given Scoring config
async function runTest(systemPrompt, testsetId, scoringConfigId) {
const run = await scorecard.run.create({
testsetId,
scoringConfigId,
});

// Optional: set run status to executing tests
await scorecard.run.updateStatus(run.id, {
status: "running_execution"
});

// Run all testcases in parallel
const testcases = await scorecard.testset.getTestcases(testsetId);
await Promise.all(
testcases.results.map((testcase) =>
runTestcase(systemPrompt, run, testcase)
)
);

// Set run status to waiting for metric scoring
await scorecard.run.updateStatus(run.id, {
status: "awaiting_scoring"
});

return run;
}
Insert cell
Insert cell
// In a non-notebook, you can just do: `const createdRun = runTest(SYSTEM_PROMPT, testset.id, scoringConfigId)`
viewof createdRun = Inputs.button("Run all tests against system prompt", {
// This function gets called when the button is pressed.
reduce: () => runTest(SYSTEM_PROMPT, testset.id, +scoringConfigId)
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more