Public
Edited
Apr 11, 2023
1 star
Insert cell
Insert cell
viewof TEXT = Inputs.textarea({
rows: 24,
cols: 80,
})
Insert cell
viewof DOWNLOAD_BUTTON = DOM.download(() => {
const ndjson = DATASET.map((sample) => JSON.stringify(sample)).join('\n');
return new Blob([ndjson], { type: 'application/x-ndjson' });
}, "Text Splitting Dataset.ndjson");
Insert cell
CONTEXT_SIZE = 16
Insert cell
DATASET = {
const tokenizer = GPT2Tokenizer;

const [nl] = tokenizer.encode('\n');

const tokens = tokenizer.encode(TEXT);

const ret = [];
for (let i=0, n=tokens.length; i<n+CONTEXT_SIZE+1; ++i) {
const context = tokens.slice(i, i+CONTEXT_SIZE);

const input = tokenizer.decode(context);
const output = tokens[i+CONTEXT_SIZE] === nl;

ret.push({ input, output });
}
return ret;
}
Insert cell
GPT3Tokenizer = {
const library = await import("https://cdn.skypack.dev/gpt3-tokenizer@1.1.5/dist-browser/gpt3-tokenizer.js");
return library.default;
}
Insert cell
GPT2Tokenizer = await import("https://deno.land/x/gpt_2_3_tokenizer@v0.0.2/mod.js")
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more