Public
Edited
Apr 6, 2023
1 fork
1 star
Insert cell
Insert cell
viewof OPENAI_API_KEY = Inputs.text({
label: 'OpenAI API Key',
});
Insert cell
htl.html`<textarea>${eglSimplifiedHtmlPages[0].html}`;
Insert cell
eglSimplifiedHtmlPages = (await FileAttachment("EGL Simplified HTML Pages.json").json()).filter((d, i) => d.name === 'eglChooseConfig')
Insert cell
Inputs.textarea({
value: eglManPages[0].man,
})
Insert cell
{
const { total } = costs;
return htl.html`<p>Expected costs: $${(0.002 * total / 1000).toFixed(3)}`;
}
Insert cell
costs = {
const ret = [];
let total = 0;
for (const { name, html } of eglSimplifiedHtmlPages) {
const fragments = SPLIT(html, { maxTokens: 2048 });

let tokens = 0;

for (let i=0, n=fragments.length; i<n; ++i) {
const fragment = fragments[i];

tokens += 2048;

if (i > 0) {
tokens += 512;
tokens += 512;
}
}

total += tokens;

ret.push({ name, tokens });
}

return Object.assign(ret, { total });
}
Insert cell
{
// return;
const z = zip();
for (const { name, man } of eglManPages) {
z.file(`${name}.7`, man);
}

const blob = await z.generateAsync({ type: 'blob' });
return DOM.download(blob, `egl-doc-man.zip`, `Download egl-doc-man.zip`);
}
Insert cell
viewof approval
Insert cell
eglManPages = {
// await (yield button('Start')).promise;
// return
// const begin = Date.now();

let usage = 0;

const promises = [];
for (let j=0, m=eglSimplifiedHtmlPages.length; j<m; ++j) {
promises.push(SIMPLIFY(j, m));
}

return await Promise.all(promises);

async function SIMPLIFY(j, m) {
const { name, html } = eglSimplifiedHtmlPages[j];
const fragments = SPLIT(html, { maxTokens: 2048 });

let man = [];
let prevInput = null;
let prevOutput = null;
for (let i=0, n=fragments.length; i<n; ++i) {
const fragment = fragments[i];

await approval(`Page "${name}" (${j+1}/${m}): Fragment ${i+1}/${n}: ${usage} tokens (Approx. \$${(0.002*usage/1000).toFixed(3)})`);

// await (yield button(`${name}: ${i+1}/${fragments.length}`)).promise;

const messages = [];
messages.push({ role: 'system', content: `You are a document transforming AI bot. The user will provide you with a snippet of HTML markup from some online documentation. You respond with the converted markup using syntactically correct Linux mandoc format.` });
if (i > 0) {
messages.push({ role: 'user', content: TAIL(prevInput, { maxTokens: 512 }) });
messages.push({ role: 'assistant', content: TAIL(prevOutput, { maxTokens: 512 }) });
messages.push({ role: 'system', content: `Continue where you left off. Convert the following documentation from HTML to mandoc format.` });
}
messages.push({ role: 'user', content: fragment });

const request = new Request('https://api.openai.com/v1/chat/completions', {
method: 'POST',
mode: 'cors',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: 'gpt-3.5-turbo',
messages,
}),
});

let response = await FETCH(request);
usage += response.usage.total_tokens;
// console.log(response);
response = response.choices[0].message.content;

man.push(response);
prevOutput = response;
prevInput = fragment;
}

man = man.join('\n');

return { name, html, man };
}
}
Insert cell
function SPLIT(document, {
maxTokens=null,
nonOverlapTokens=null,
overlapTokens=0,
tokenizer=new GPT3Tokenizer({ type: 'gpt3' }),
}={}) {
if (maxTokens === null && nonOverlapTokens === null) {
throw 'one of [maxTokens | nonOverlapTokens] is required';
} else if (maxTokens === null && nonOverlapTokens !== null) {
maxTokens = nonOverlapTokens + overlapTokens;
} else if (maxTokens !== null && nonOverlapTokens !== null) {
if (maxTokens !== nonOverlapTokens + overlapTokens) {
throw 'if both maxTokens and nonOverlapTokens are passed, then they need to be consistent';
}
}
const { text } = tokenizer.encode(document);

const fragments = [];
for (let i=0, n=text.length; i<n; i += maxTokens - overlapTokens) {
fragments.push(text.slice(i, i + maxTokens).join(''));
}

return fragments;
}
Insert cell
function TAIL(document, {
maxTokens,
tokenizer=new GPT3Tokenizer({ type: 'gpt3' }),
}={}) {
let { text } = tokenizer.encode(document);
text = text.slice(-maxTokens);
if (text.length > maxTokens) throw `bad length: ${text.length}`;
text = text.join('');
return text;
}
Insert cell
async function FETCH(...args) {
const request = new Request(...args);
const { key, digest } = await DIGEST(request.clone());

console.log({ key, digest });

if (localStorage.getItem(key) !== null) {
const { response } = JSON.parse(localStorage.getItem(key));
return response;
}

const response = await fetch(request);
if (response.status !== 200) throw `response.status = ${response.status}; request.url = ${request.url}`;
const json = await response.json();

localStorage.setItem(key, JSON.stringify({ key, request: digest, response: json }));
return json;

async function DIGEST(request) {
const props = ['url', 'method'];
const digest = {};
for (const prop of props) {
Object.assign(digest, { [prop]: request[prop] });
}

const body = await request.json();
Object.assign(digest, { body });

const key = sha256(JSON.stringify(digest));
return { key, digest };
}
}
Insert cell
import {zip, zipreader} from "@fil/jszip"
Insert cell
import { sha256 } from '@critesjosh/cryptographic-hash-functions';
Insert cell
GPT3Tokenizer = (await import("https://cdn.skypack.dev/gpt3-tokenizer@1.1.5/dist-browser/gpt3-tokenizer.js")).default;
Insert cell
import { viewof approval } from '@player1537/utilities';
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more