Public
Edited
Mar 22, 2023
1 fork
Importers
Insert cell
Insert cell
viewof API_KEY = Inputs.text({ label: 'OpenAI API Key' })
Insert cell
Array.from(openai.complete`
${openai.model('foobar')}
Summarize this passage: ${'this is the passage'.split(' ')}
Summary:`)
Insert cell
[{ 'hello': 'world' }, 'this is a string', ['this', 'is', 'an', 'array']].map((d) => {
return Array.isArray(d) ? 'array' : typeof d;
})
Insert cell
viewof DOCUMENT = Inputs.textarea({
label: 'document',
rows: 24,
cols: 80,
value: await FileAttachment("xeaiso.net - blog - a weapon to surpass metal gear.txt").text(),
});
Insert cell
{
await (yield button('Execute')).promise;

let keyIdeas;
let results;

yield VIEW('initial');
keyIdeas = await MAP(DOCUMENT);
yield VIEW('map');
results = await FOLD(keyIdeas.responses);
yield VIEW('complete');

function VIEW(status) {
console.log({ status, keyIdeas, results });
return htl.html`
<details>
<summary>
Status: ${status}
${results && htl.html.fragment`
<textarea rows=24 cols=24>${results.responses[results.responses.length-1]}
`/* htl.html.fragment */}
</summary>
${PARTS('key ideas', keyIdeas)}
${PARTS('results', results)}
`/* htl.html */;

function PARTS(label, { requests=[], responses=[] }={}) {
const parts = [];
for (let i=0, n=requests.length; i<n; ++i) {
const part = htl.html.fragment`
<textarea rows=24 cols=80>${requests[i].prompt}${responses[i]}</textarea>
`/* htl.html.fragment */;
parts.push(part);
}

return htl.html`
<details>
<summary>${label}</summary>
${parts}
`/* htl.html */;
}
}

async function MAP(document) {
const requests = openai.complete`
Using Markdown, extract key ideas from this passage.
${openai.split`
${openai.maxTokens(2*256)}
${openai.overlap(1*256)}
${document}
`/* openai.split */}

***

Key Ideas:
1.
` /* openai.complete */;

let responses = await openai.request(requests);
responses = responses.map((d) => `1. ${d}`);
return { requests, responses };
}

async function FOLD(keyIdeas) {
const requests = [];
const responses = [];
let prev = keyIdeas[0];
for (let i=1, n=keyIdeas.length; i<n; ++i) {
const curr = keyIdeas[i];

const [request] = openai.complete`
Rank the top 5 most important ideas that follow.
${prev}
${curr}

***

Key ideas:
1.
`/* openai.complete */;

let response = await openai.request(request);
response = `1. ${response}`;
prev = response;

requests.push(request);
responses.push(response);
}

return { requests, responses };
}
}
Insert cell
{
await (yield button('Execute')).promise;
return yield htl.html`
<textarea rows=24 cols=80>
${openai.complete`
Combine and shorten these summaries:
${(await openai.request(openai.complete`
Summarize this passage:
${openai.split`${openai.maxTokens(3*64)}${openai.overlap(32)}
Many data-intensive scientific analysis techniques require
global domain traversal, which over the years has been
a bottleneck for efficient parallelization across distributed-
memory architectures. Inspired by MapReduce and other
simplified parallel programming approaches, we have de-
signed DStep, a flexible system that greatly simplifies effi-
cient parallelization of domain traversal techniques at scale.
In order to deliver both simplicity to users as well as scalabil-
ity on HPC platforms, we introduce a novel two-tiered com-
munication architecture for managing and exploiting asyn-
chronous communication loads. We also integrate our de-
sign with advanced parallel I/O techniques that operate di-
rectly on native simulation output. We demonstrate DStep
by performing teleconnection analysis across ensemble runs
of terascale atmospheric CO2 and climate data, and we show
scalability results on up to 65,536 IBM BlueGene/P cores.
`.map((d) => `... ${d} ...`)}
Summary:
`)).join('\n')}`[0].prompt}`;

// yield requests;
// return;

// await (yield button('Execute')).promise;

// const responses = [];
// for (const request of requests) {
// const response = await openai.request(request);
// responses.push(response);

// yield htl.html`
// <detail>
// ${responses.map((response) => htl.html.fragment`
// <textarea rows=24 cols=80>${response}
// `)}
// `; // htl.html
// } // for
}
Insert cell
{
await (yield button('Execute')).promise;

const request = openai.chat`
${openai.role('system')}
You are a helpful AI assistant.
${openai.role('user')}
I want you to summarize this text for me.

Many data-intensive scientific analysis techniques require
global domain traversal, which over the years has been
a bottleneck for efficient parallelization across distributed-
memory architectures. Inspired by MapReduce and other
simplified parallel programming approaches, we have de-
signed DStep, a flexible system that greatly simplifies effi-
cient parallelization of domain traversal techniques at scale.
In order to deliver both simplicity to users as well as scalabil-
ity on HPC platforms, we introduce a novel two-tiered com-
munication architecture for managing and exploiting asyn-
chronous communication loads. We also integrate our de-
sign with advanced parallel I/O techniques that operate di-
rectly on native simulation output. We demonstrate DStep
by performing teleconnection analysis across ensemble runs
of terascale atmospheric CO2 and climate data, and we show
scalability results on up to 65,536 IBM BlueGene/P cores.
`/* openai.chat */;

return request;
}
Insert cell
openai = {
const $url = Symbol('url');
const $model = Symbol('model');
const $maxTokens = Symbol('maxTokens');
const $overlap = Symbol('tokenOverlap');
const $temperature = Symbol('temperature');
const $role = Symbol('role');
const $which = Symbol('which');
const $chat = Symbol('chat');
const $text = Symbol('text');
const tokenizer = new GPT3Tokenizer({ type: 'gpt3' });

const self = {};
Object.assign(self, {
// primary interface
request,
split,
complete,
chat,
tokenizer,

// secondary interface
url: (x) => ({ [$url]: x }),
model: (x) => ({ [$model]: x }),
maxTokens: (x) => ({ [$maxTokens]: x }),
overlap: (x) => ({ [$overlap]: x }),
temperature: (x) => ({ [$temperature]: x }),
role: (x) => ({ [$role]: x }),

// tertiary interface
$url,
$model,
$maxTokens,
$overlap,
$temperature,
$role,
});
return self;

async function request(parameters) {
if (Array.isArray(parameters)) {
return await Promise.all(parameters.map(request));
}

const { [$which]: which } = parameters;
if (which === $text) {
const { prompt, model, maxTokens, url, temperature } = parameters;
const key = JSON.stringify([prompt, model, maxTokens, url, temperature]);
const hash = md5(key);
if (localStorage.getItem(hash) !== null) {
const { value } = JSON.parse(localStorage.getItem(hash));
return Promise.resolve(value);
}
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${API_KEY}`,
},
mode: 'cors',
body: JSON.stringify({
model,
max_tokens: maxTokens,
temperature,
prompt,
}),
});
const json = await response.json();
const value = json.choices[0].text;
localStorage.setItem(hash, JSON.stringify({ key, value }));
return value;

} else if (which === $chat) {
const { messages, model, maxTokens, url } = parameters;
const key = JSON.stringify([messages.map(({ role, content }) => ([role, content])), model, maxTokens, url]);
const hash = md5(key);
if (localStorage.getItem(hash) !== null) {
const { value } = JSON.parse(localStorage.getItem(hash));
return Promise.resolve(value);
}
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${API_KEY}`,
},
mode: 'cors',
body: JSON.stringify({
model,
max_tokens: maxTokens,
messages,
}),
});
const json = await response.json();
const value = json.choices[0].content;
localStorage.setItem(hash, JSON.stringify({ key, value }));
return value;
}
}

function chat(strings, ...expressions) {
strings = NORMALIZE(strings);
const options = {
[$maxTokens]: 1024,
[$role]: 'user',
[$model]: 'gpt-3.5-turbo',
};
const parts = [];
for (let i=0, n=strings.length+expressions.length; i<n; ++i) {
const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];
switch (TYPE(part)) {
case 'object':
Object.assign(options, part);
break;
case 'array':
parts.push(part.map((text) => ({ text, ...options })));
break;
case 'string':
parts.push([{ text: part, ...options }]);
break;
} // switch
} // for

const { [$maxTokens]: maxTokens } = options;
const { [$model]: model } = options;

const ret = [];
const messages = [];
for (let parts of d3.cross(...parts)) {
prompt = prompt.join('');
prompt = prompt.trim();
messages.push({ role, content: prompt });
}

return { [$which]: $chat, model, max_tokens: maxTokens, messages };
}

function split(strings, ...expressions) {
strings = NORMALIZE(strings);
const options = {
[$maxTokens]: 256,
[$overlap]: 32,
};
const parts = [];
for (let i=0, n=strings.length+expressions.length; i<n; ++i) {
const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];
switch (TYPE(part)) {
case 'object':
Object.assign(options, part);
break;
case 'array':
parts.push(part);
break;
case 'string':
parts.push([part]);
break;
} // switch
} // for

const { [$maxTokens]: maxTokens } = options;
const { [$overlap]: overlap } = options;

if (maxTokens - overlap <= 0) {
throw `bad config: maxTokens (${maxTokens}) - overlap (${overlap}) <= 0`;
}

const ret = [];
for (let prompt of d3.cross(...parts)) {
prompt = prompt.join('');
prompt = prompt.trim();
const { text } = tokenizer.encode(prompt);
for (let i=0, n=text.length; i<n; i += maxTokens - overlap) {
ret.push(text.slice(i, i + maxTokens).join(''));
}
}

return ret;
}

function complete(strings, ...expressions) {
strings = NORMALIZE(strings);
const options = {
[$url]: `https://api.openai.com/v1/completions`,
[$model]: 'text-curie-001',
[$maxTokens]: 256,
[$temperature]: 0.2,
};

const parts = [];
for (let i=0, n=strings.length+expressions.length; i<n; ++i) {
const part = [strings, expressions][ (i%2)|0 ][ (i/2)|0 ];
switch (TYPE(part)) {
case 'object':
Object.assign(options, part);
break;
case 'array':
parts.push(part);
break;
case 'string':
parts.push([part]);
break;
} // switch
} // for

const { [$url]: url } = options;
const { [$model]: model } = options;
const { [$maxTokens]: maxTokens } = options;
const { [$temperature]: temperature } = options;

const ret = [];
for (let prompt of d3.cross(...parts)) {
prompt = prompt.join('');
prompt = prompt.trim();
ret.push({ [$which]: $text, prompt, model, maxTokens, url, temperature });
}
return ret;
}

function TYPE(x) {
return Array.isArray(x) ? 'array' : typeof x;
}

function NORMALIZE(strings) {
let prefix = Infinity;
const getPrefix = /^( +)[^ ]/;
for (const string of strings) {
for (const line of string.split('\n')) {
const match = getPrefix.exec(line);
if (match) {
prefix = Math.min(prefix, match[1].length);
}
}
}

if (prefix === Infinity) {
return strings;
}

const matchPrefix = new RegExp(String.raw`^ {${prefix}}`);
const ret = [];
for (const string of strings) {
const parts = [];
for (let line of string.split('\n')) {
line = line.replace(matchPrefix, '');
parts.push(line);
}
ret.push(parts.join('\n'));
}

return ret;
} // function NORMALIZE
};
Insert cell
GPT3Tokenizer = {
const library = await import("https://cdn.skypack.dev/gpt3-tokenizer@1.1.5/dist-browser/gpt3-tokenizer.js");
return library.default;
}
Insert cell
import { md5 } from '@elmisback/zero-import-md5-hash';
Insert cell
import { button } from '@player1537/utilities';
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more