OpenAI Convenience Library / Tanner Hobson

viewof API_KEY = Inputs.text({ label: 'OpenAI API Key' })

Array.from(openai.complete`

${openai.model('foobar')}

Summarize this passage: ${'this is the passage'.split(' ')}

Summary:`)

[{ 'hello': 'world' }, 'this is a string', ['this', 'is', 'an', 'array']].map((d) => {

return Array.isArray(d) ? 'array' : typeof d;

})

viewof DOCUMENT = Inputs.textarea({

label: 'document',

rows: 24,

cols: 80,

value: await FileAttachment("xeaiso.net - blog - a weapon to surpass metal gear.txt").text(),

});

{

await (yield button('Execute')).promise;

let keyIdeas;

let results;

yield VIEW('initial');

keyIdeas = await MAP(DOCUMENT);

yield VIEW('map');

results = await FOLD(keyIdeas.responses);

yield VIEW('complete');

function VIEW(status) {

console.log({ status, keyIdeas, results });

return htl.html`

Status: ${status}

${results && htl.html.fragment`

<textarea rows=24 cols=24>${results.responses[results.responses.length-1]}

`/* htl.html.fragment */}

</summary>

${PARTS('key ideas', keyIdeas)}

${PARTS('results', results)}

`/* htl.html */;

function PARTS(label, { requests=[], responses=[] }={}) {

const parts = [];

for (let i=0, n=requests.length; i<n; ++i) {

const part = htl.html.fragment`

<textarea rows=24 cols=80>${requests[i].prompt}${responses[i]}</textarea>

`/* htl.html.fragment */;

parts.push(part);

}

return htl.html`

<summary>${label}</summary>

${parts}

`/* htl.html */;

}

async function MAP(document) {

const requests = openai.complete`

Using Markdown, extract key ideas from this passage.

${openai.split`

${openai.maxTokens(2*256)}

${openai.overlap(1*256)}

${document}

`/* openai.split */}

***

Key Ideas:

1.

` /* openai.complete */;

let responses = await openai.request(requests);

responses = responses.map((d) => `1. ${d}`);

return { requests, responses };

}

async function FOLD(keyIdeas) {

const requests = [];

const responses = [];

let prev = keyIdeas[0];

for (let i=1, n=keyIdeas.length; i<n; ++i) {

const curr = keyIdeas[i];

const [request] = openai.complete`

Rank the top 5 most important ideas that follow.

${prev}

${curr}

***

Key ideas:

1.

`/* openai.complete */;

let response = await openai.request(request);

response = `1. ${response}`;

prev = response;

requests.push(request);

responses.push(response);

}

return { requests, responses };

}

{

await (yield button('Execute')).promise;

return yield htl.html`

${openai.complete`

Combine and shorten these summaries:

${(await openai.request(openai.complete`

Summarize this passage:

${openai.split`${openai.maxTokens(3*64)}${openai.overlap(32)}

Many data-intensive scientific analysis techniques require

global domain traversal, which over the years has been

a bottleneck for efficient parallelization across distributed-

memory architectures. Inspired by MapReduce and other

simplified parallel programming approaches, we have de-

signed DStep, a flexible system that greatly simplifies effi-

cient parallelization of domain traversal techniques at scale.

In order to deliver both simplicity to users as well as scalabil-

ity on HPC platforms, we introduce a novel two-tiered com-

munication architecture for managing and exploiting asyn-

chronous communication loads. We also integrate our de-

sign with advanced parallel I/O techniques that operate di-

rectly on native simulation output. We demonstrate DStep

by performing teleconnection analysis across ensemble runs

of terascale atmospheric CO2 and climate data, and we show

scalability results on up to 65,536 IBM BlueGene/P cores.

`.map((d) => `... ${d} ...`)}

Summary:

`)).join('\n')}`[0].prompt}`;

// yield requests;

// return;

// await (yield button('Execute')).promise;

// const responses = [];

// for (const request of requests) {

// const response = await openai.request(request);

// responses.push(response);

// yield htl.html`

// <detail>

// ${responses.map((response) => htl.html.fragment`

// <textarea rows=24 cols=80>${response}

// `)}

// `; // htl.html

// } // for

}

{

await (yield button('Execute')).promise;

const request = openai.chat`

${openai.role('system')}

You are a helpful AI assistant.

${openai.role('user')}

I want you to summarize this text for me.

Many data-intensive scientific analysis techniques require

global domain traversal, which over the years has been

a bottleneck for efficient parallelization across distributed-

memory architectures. Inspired by MapReduce and other

simplified parallel programming approaches, we have de-

signed DStep, a flexible system that greatly simplifies effi-

cient parallelization of domain traversal techniques at scale.

In order to deliver both simplicity to users as well as scalabil-

ity on HPC platforms, we introduce a novel two-tiered com-

munication architecture for managing and exploiting asyn-

chronous communication loads. We also integrate our de-

sign with advanced parallel I/O techniques that operate di-

rectly on native simulation output. We demonstrate DStep

by performing teleconnection analysis across ensemble runs

of terascale atmospheric CO2 and climate data, and we show

scalability results on up to 65,536 IBM BlueGene/P cores.

`/* openai.chat */;

return request;

}

openai = {

const $url = Symbol('url');

const $model = Symbol('model');

const $maxTokens = Symbol('maxTokens');

const $overlap = Symbol('tokenOverlap');

const $temperature = Symbol('temperature');

const $role = Symbol('role');

const $which = Symbol('which');

const $chat = Symbol('chat');

const $text = Symbol('text');

const tokenizer = new GPT3Tokenizer({ type: 'gpt3' });

const self = {};

Object.assign(self, {

// primary interface

request,

split,

complete,

chat,

tokenizer,

// secondary interface

url: (x) => ({ [$url]: x }),

model: (x) => ({ [$model]: x }),

maxTokens: (x) => ({ [$maxTokens]: x }),

overlap: (x) => ({ [$overlap]: x }),

temperature: (x) => ({ [$temperature]: x }),

role: (x) => ({ [$role]: x }),

// tertiary interface

$url,

$model,

$maxTokens,

$overlap,

$temperature,

$role,

});

return self;

async function request(parameters) {

if (Array.isArray(parameters)) {

return await Promise.all(parameters.map(request));

}

const { [$which]: which } = parameters;

if (which === $text) {

const { prompt, model, maxTokens, url, temperature } = parameters;

const key = JSON.stringify([prompt, model, maxTokens, url, temperature]);

const hash = md5(key);

if (localStorage.getItem(hash) !== null) {

const { value } = JSON.parse(localStorage.getItem(hash));

return Promise.resolve(value);

}

const response = await fetch(url, {

method: 'POST',

headers: {

'Content-Type': 'application/json',

'Authorization': `Bearer ${API_KEY}`,

},

mode: 'cors',

body: JSON.stringify({

model,

max_tokens: maxTokens,

temperature,

prompt,

}),

});

const json = await response.json();

const value = json.choices[0].text;

localStorage.setItem(hash, JSON.stringify({ key, value }));

return value;

} else if (which === $chat) {

const { messages, model, maxTokens, url } = parameters;

const key = JSON.stringify([messages.map(({ role, content }) => ([role, content])), model, maxTokens, url]);

const hash = md5(key);

if (localStorage.getItem(hash) !== null) {

const { value } = JSON.parse(localStorage.getItem(hash));

return Promise.resolve(value);

}

const response = await fetch(url, {

method: 'POST',

headers: {

'Content-Type': 'application/json',

'Authorization': `Bearer ${API_KEY}`,

},

mode: 'cors',

body: JSON.stringify({

model,

max_tokens: maxTokens,

messages,

}),

});

const json = await response.json();

const value = json.choices[0].content;

localStorage.setItem(hash, JSON.stringify({ key, value }));

return value;

}

function chat(strings, ...expressions) {

strings = NORMALIZE(strings);

const options = {

[$maxTokens]: 1024,

[$role]: 'user',

[$model]: 'gpt-3.5-turbo',

};

const parts = [];

for (let i=0, n=strings.length+expressions.length; i<n; ++i) {

const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];

switch (TYPE(part)) {

case 'object':

Object.assign(options, part);

break;

case 'array':

parts.push(part.map((text) => ({ text, ...options })));

break;

case 'string':

parts.push([{ text: part, ...options }]);

break;

} // switch

} // for

const { [$maxTokens]: maxTokens } = options;

const { [$model]: model } = options;

const ret = [];

const messages = [];

for (let parts of d3.cross(...parts)) {

prompt = prompt.join('');

prompt = prompt.trim();

messages.push({ role, content: prompt });

}

return { [$which]: $chat, model, max_tokens: maxTokens, messages };

}

function split(strings, ...expressions) {

strings = NORMALIZE(strings);

const options = {

[$maxTokens]: 256,

[$overlap]: 32,

};

const parts = [];

for (let i=0, n=strings.length+expressions.length; i<n; ++i) {

const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];

switch (TYPE(part)) {

case 'object':

Object.assign(options, part);

break;

case 'array':

parts.push(part);

break;

case 'string':

parts.push([part]);

break;

} // switch

} // for

const { [$maxTokens]: maxTokens } = options;

const { [$overlap]: overlap } = options;

if (maxTokens - overlap <= 0) {

throw `bad config: maxTokens (${maxTokens}) - overlap (${overlap}) <= 0`;

}

const ret = [];

for (let prompt of d3.cross(...parts)) {

prompt = prompt.join('');

prompt = prompt.trim();

const { text } = tokenizer.encode(prompt);

for (let i=0, n=text.length; i<n; i += maxTokens - overlap) {

ret.push(text.slice(i, i + maxTokens).join(''));

}

return ret;

}

function complete(strings, ...expressions) {

strings = NORMALIZE(strings);

const options = {

[$url]: `https://api.openai.com/v1/completions`,

[$model]: 'text-curie-001',

[$maxTokens]: 256,

[$temperature]: 0.2,

};

const parts = [];

for (let i=0, n=strings.length+expressions.length; i<n; ++i) {

const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];

switch (TYPE(part)) {

case 'object':

Object.assign(options, part);

break;

case 'array':

parts.push(part);

break;

case 'string':

parts.push([part]);

break;

} // switch

} // for

const { [$url]: url } = options;

const { [$model]: model } = options;

const { [$maxTokens]: maxTokens } = options;

const { [$temperature]: temperature } = options;

const ret = [];

for (let prompt of d3.cross(...parts)) {

prompt = prompt.join('');

prompt = prompt.trim();

ret.push({ [$which]: $text, prompt, model, maxTokens, url, temperature });

}

return ret;

}

function TYPE(x) {

return Array.isArray(x) ? 'array' : typeof x;

}

function NORMALIZE(strings) {

let prefix = Infinity;

const getPrefix = /^( +)[^ ]/;

for (const string of strings) {

for (const line of string.split('\n')) {

const match = getPrefix.exec(line);

if (match) {

prefix = Math.min(prefix, match[1].length);

}

if (prefix === Infinity) {

return strings;

}

const matchPrefix = new RegExp(String.raw`^ {${prefix}}`);

const ret = [];

for (const string of strings) {

const parts = [];

for (let line of string.split('\n')) {

line = line.replace(matchPrefix, '');

parts.push(line);

}

ret.push(parts.join('\n'));

}

return ret;

} // function NORMALIZE

};

GPT3Tokenizer = {

const library = await import("https://cdn.skypack.dev/gpt3-tokenizer@1.1.5/dist-browser/gpt3-tokenizer.js");

return library.default;

}

import { md5 } from '@elmisback/zero-import-md5-hash';

import { button } from '@player1537/utilities';

Purpose-built for displays of data