Public
Edited
Mar 12, 2024
Importers
Insert cell
Insert cell
retagUpdate = async (codeWithSnippetDelimited, updatedCodeWithoutDelimiters, delimiter) => {
console.log(codeWithSnippetDelimited, updatedCodeWithoutDelimiters, delimiter)
// const gptOut = await getFirstChoice(await getChatCompletion(
// 'gpt-4-turbo-preview',
// [{role: 'user',
// content: prompt_breakdown9({codeWithSnippetDelimited,
// updatedCodeWithSnippetDelimited: updatedCodeWithoutDelimiters,
// delimiter})}]))
// console.log(gptOut)
let gptOut;
try {
const gptOutCompletion = await openai.chat.completions.create({
messages: [
{
role: "system",
content: "You are a helpful assistant designed to output JSON.",
},
{ role: "user", content: prompt_breakdown11({codeWithSnippetDelimited,
updatedCodeWithSnippetDelimited: updatedCodeWithoutDelimiters,
delimiter })}
],
model: "gpt-4-0125-preview",
response_format: { type: "json_object" },
});
console.log(gptOutCompletion)

gptOut = gptOutCompletion.choices[0].message.content
console.log(gptOut)
} catch (e) {
return {error: e, errorType: 'model'}
}
let gptRetaggingJSON;
try {
gptRetaggingJSON = JSON.parse(gptOut)
console.log(gptRetaggingJSON)
} catch (e) {
// (This should never happen based on the OpenAI documentation.)
return {error: e, errorType: 'JSON parse', gptOut}
}
// console.log(completion.choices[0].message.content);
/* Unhandled issues:
* the response may be incorrect; could check across several tries to mitigate
* the response may be correct but there may be multiple correct responses; disambiguation needed
* the response may have an unreadable format, leading to failure in the next part
*/
// const gptRetaggingJSONString = (await askMX([
// { role: 'system',
// content: 'You are a text parser designed to output JSON.'},
// { role: 'user',
// content: retagPromptGPT(gptOut)}
// ])).trim() // sometimes Mixtral puts a space in front of the response...
// console.log(gptRetaggingJSONString)
// const gptRetaggingJSON = JSON.parse(gptRetaggingJSONString)
/* Unhandled issues:
* the response may be incorrect given the input
* failure to parse
*/
const computeUpdatedCodeWithSnippetRetagged = ({code, snippet, lineStart, lineEnd, nthOccurrence, delimiterStart, delimiterEnd}) => {
// Note lineStart and lineEnd are 1-indexed.
/* We expand the search by one line if it fails on the identified segment to handle off-by-one issues. */
/* NOTE expanded search was introduced after the initial evaluation.
/* Unhandled issues:
* any non-whitespace typos in the output (even e.g. missing comments) will cause a failure to match
* potentially allowing the model to place the delimiter interactively
would guarantee placement in the "intended" location,
but this is slow
*/
let sectionString = code.split('\n').slice(lineStart - 1, lineEnd).join('\n')
let lenUpToSection = code.split('\n').slice(0, lineStart - 1).map(s=>s + '\n').join('').length
let snippetIdxInSection = findStartAndEndNormalized(sectionString, snippet, nthOccurrence)
if (snippetIdxInSection.start === -1) {
lineStart = Math.max(0, lineStart - 1)
lineEnd = Math.min(lineEnd + 1, code.split('\n').length)
sectionString = code.split('\n').slice(lineStart - 1, lineEnd).join('\n')
lenUpToSection = code.split('\n').slice(0, lineStart - 1).map(s=>s + '\n').join('').length
snippetIdxInSection = findStartAndEndNormalized(sectionString, snippet, nthOccurrence)
}
// const sectionString = code.split('\n').slice(lineStart - 1, lineEnd).join('\n')
// const lenUpToSection = code.split('\n').slice(0, lineStart - 1).map(s=>s + '\n').join('').length
// const snippetIdxInSection = findStartAndEndNormalized(sectionString, snippet, nthOccurrence)
const leftIdx = lenUpToSection + snippetIdxInSection.start
const rightIdx = leftIdx + snippetIdxInSection.end - snippetIdxInSection.start
return code.slice(0, leftIdx) + delimiterStart + code.slice(leftIdx, rightIdx) + delimiterEnd + code.slice(rightIdx, code.length)
}
try {
const out = computeUpdatedCodeWithSnippetRetagged({
code:updatedCodeWithoutDelimiters,
snippet:gptRetaggingJSON[1],
lineStart:gptRetaggingJSON[2],
lineEnd:gptRetaggingJSON[3],
nthOccurrence:gptRetaggingJSON[4],
delimiterStart:delimiter,
delimiterEnd:delimiter})
console.log(out)
return {gptRetaggingJSON, out}
} catch (e) {
return {error: e, errorType: 'snippet matching', gptOut, gptRetaggingJSON}
}
}
Insert cell
import {train, showUpdateDiff, copyUpdateParts, removeFirstAndLastLines} from "4ac2191d97f52603"
Insert cell
import {openai, getChatCompletion, getFirstChoice} from "9f772e010064bd3e"
Insert cell
Insert cell
disableMxRequests = false
Insert cell
async function askMX (messages, max_tokens=1000) {
if (disableMxRequests) { return undefined}
const response = await fetch('https://api.deepinfra.com/v1/openai/chat/completions', {
method: 'POST',
body: JSON.stringify({
model: "mistralai/Mixtral-8x7B-Instruct-v0.1",
messages,
max_tokens,
}),
headers: {
"Content-Type": "application/json",
authorization: `Bearer ${DEEPINFRA_KEY}`,
}
});
const data = await response.json();
return data.choices[0].message.content
}
Insert cell
import {secret} from "@tmcw/secret"
Insert cell
viewof DEEPINFRA_KEY = secret("Save deepinfra API key", {description: "This is an deepinfra API token whose value is only available to your notebooks.", submit: "Save deepinfra API key"})
Insert cell
prompt_breakdown3 = t => `Complete the following 8 tasks in order. Do not look ahead as you complete them.

Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

1) A specific segment of code has been marked with "${t.delimiter}". The segment is ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks. Print the text between the "${t.delimiter}" marks only.

For example, in this code, the specific segment is the value 42 returned by the foo() function ONLY. It is not the return statement, or the foo() function.

def foo():
return ${t.delimiter}42${t.delimiter}

For that code, a correct response would be "42".

2) Describe the segment.

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

Something like the same segment appears in the updated file. We will now locate it.

3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

7) Print the line number in UPDATED that (6) starts on.

8) Print the line number in UPDATED that (6) ends on.

Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.`

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown4 = t => `Complete the following 11 tasks in order, writing in pen. Do not look ahead as you complete them.

Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}

The first character of this segment is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.indexOf(t.delimiter) + 2)}" and the last character is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.lastIndexOf(t.delimiter) -1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}".

For example, in this code, the specific segment is the value 42 returned by the foo() function ONLY. It is not the return statement, or the foo() function.

def foo():
return ${t.delimiter}42${t.delimiter}

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

1) Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

2) State the line number in UPDATED that (1) starts on.

3) State the line number in UPDATED that (1) ends on.

4) Describe what should be marked in the updated code. Think through this question step-by-step. What is the first character that should be marked? What is the last character that should be marked? Be specific and match the original specific segment as carefully as possible, down to the starting and stopping character.

5) Print the text that should be marked in the updated code. Do not include line numbers.

6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

7) Review your answer to (4) and look again at the INPUT. Criticize your choices. Has anything been excluded that should have been included? Has anything been included that should have been excluded? You must write at least one valid criticism.

8) Review your answers to (5) and (6), and look at your response to (7). Pick a new most correct answer for (6), adjusting if needed.

9) Print the line number in UPDATED that (8) starts on.

10) Print the line number in UPDATED that (8) ends on.

11) Report your confidence in your answer as a percentage.

Now respond with ALL of the answers to 1-11 in a SINGLE MESSAGE. The single message must contain ALL of 1-11.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown5 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

<EXCLUDED_PREFIX>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.slice(0, t.codeWithSnippetDelimited.indexOf(t.delimiter)).lastIndexOf('\n') + 1, t.codeWithSnippetDelimited.indexOf(t.delimiter))}
</EXCLUDED_PREFIX>

<EXCLUDED_SUFFIX>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter), t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.lastIndexOf(t.delimiter), t.codeWithSnippetDelimited.length - 1).indexOf('\n') + 1)}
</EXCLUDED_SUFFIX>

Note NOTHING OUTSIDE OF THE "${t.delimiter}" IS INCLUDED. The first character of this segment is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.indexOf(t.delimiter) + 2)}" and the last character is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.lastIndexOf(t.delimiter) -1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}".

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.

4) Put yourself in another programmer's shoes. Think carefully about whether someone else might provide a different answer to this problem. If so, state it here.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown6 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

<EXCLUDED_PREFIX>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.slice(0, t.codeWithSnippetDelimited.indexOf(t.delimiter)).lastIndexOf('\n') + 1, t.codeWithSnippetDelimited.indexOf(t.delimiter))}
</EXCLUDED_PREFIX>

<EXCLUDED_SUFFIX>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter), t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.lastIndexOf(t.delimiter), t.codeWithSnippetDelimited.length - 1).indexOf('\n') + 1)}
</EXCLUDED_SUFFIX>

Note NOTHING OUTSIDE OF THE "${t.delimiter}" IS INCLUDED. The first character of this segment is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.indexOf(t.delimiter) + 2)}" and the last character is "${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.lastIndexOf(t.delimiter) -1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}".

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown7 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
/* Problems: leads to markdown output (of the JSON object), increases likelihood of skipping the discussion section when it may be needed.
*/
prompt_breakdown8 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers as a JSON object:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.

The object must look like: {1: <code>, 2: <number>, 3: <number>}

The answer to 1 should be a code string only, without markdown formatting or extra notes.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown9 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.

4) (1) may occur multiple times in the section given by [(2),(3)]. Which number occurrence, as ONLY a 1-indexed number, is (1)?`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown10 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers as a JSON object:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.

4) (1) may occur multiple times in the section given by [(2),(3)]. Which number occurrence, as ONLY a 1-indexed number, is (1)?

The object must look like: {notes: <string>, 1: <code>, 2: <number>, 3: <number>, 4: <number>}

Use "notes" for your unstructured thoughts about the problem. The answer to 1 should be a code string only, without markdown formatting or extra notes.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown11 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

A specific segment of code has been marked with "${t.delimiter}". The segment refers to ONLY THE TEXT BETWEEN THE "${t.delimiter}" marks:

<SEGMENT>
${t.codeWithSnippetDelimited.slice(t.codeWithSnippetDelimited.indexOf(t.delimiter) + 1, t.codeWithSnippetDelimited.lastIndexOf(t.delimiter))}
</SEGMENT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

You are responsible for placing an identical annotation on this updated file. It is extremely important that you place the annotation in the correct place. Important metadata is attached to this segment.

Describe possible sections the specific segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice. Remember to be detailed about the start and stop of the segment. If the segment has been updated, it may need to expand or shrink. BE CAREFUL TO INCLUDE NOTHING EXTRA. Then, provide the following numbered answers as a JSON object:

1) Print ONLY the text of the updated specific segment. You must print all of the text here.

2) State ONLY the line number in UPDATED that (1) starts on.

3) State ONLY the line number in UPDATED that (1) ends on.

4) (1) may occur multiple times in the section given by [(2),(3)]. Which number occurrence, as ONLY a 1-indexed number, is (1)?

The object must look like: {1: <code>, 2: <number>, 3: <number>, 4: <number>}

The answer to 1 should be a code string only, without markdown formatting or extra notes.`
// 3) Describe possible sections the segment could be said to be located in. It is possible the segment has not changed, or that it has been refactored. Pick the most correct choice.

// 4) Fully reprint the updated code of the chosen section. Do not add any additional marks yet.

// 5) Describe what should be marked in the updated code. Print the part of (4) that should be marked in the updated code. Make sure the marking is as close as possible to the original intent. If you added marks in 4, copy those here.

// 6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

// 7) Print the line number in UPDATED that (6) starts on.

// 8) Print the line number in UPDATED that (6) ends on.

// Now respond with ALL of the answers to 1-8 in a SINGLE MESSAGE. The single message must contain ALL of 1-8.

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
md`


Rarely works for fixing duplication:
4) The string (1) may occur multiple times in the section of UPDATED given by [(2),(3)], especially if [(2),(3)] is a single line. If someone searched for that string in this section, would they get the wrong occurrence?

5) Which number occurrence, as ONLY a 1-indexed number, is (1)?


`
Insert cell
copy(prompt_breakdown11(train[3]))
Insert cell
copy(prompt_breakdown11(t))
Insert cell
md`\`\`\`
${prompt_breakdown11(t)}
\`\`\``
Insert cell
copy(prompt_breakdown3(t))
Insert cell
t = train[3]
Insert cell
diff(updatedCodeWithSnippetRetagged2, t.updatedCodeWithSnippetDelimited)
Insert cell
diffTxt('ab', 'abc')
Insert cell
import {jsdiff} from "@jobleonard/diff"
Insert cell
diffTxt = (left, right, options={}) => jsdiff.createTwoFilesPatch(
left.name || "left",
right.name || "right",
"" + left,
"" + right,
undefined,
undefined,
{ context: options.context || 2 }
)
Insert cell
// updatedCodeWithSnippetRetagged2 = retagUpdate(t.codeWithSnippetDelimited, t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, ''), t.delimiter)
Insert cell
string = `{
"1": "line in sales\\_file",
"2": 6,
"3": 6,
"4": 1
}`
Insert cell
JSON.parse(string)
Insert cell
retagging_interactive = t => `A student is trying to tag the new file. They have added the left "${t.delimiter}" as follows:

<UPDATED_WITH_LEFT>
const cart = [];

function addItem(item) {
cart.push(item);
}

function removeItem(itemName) {
const index = cart.findIndex(item => item.name === itemName);
if (index !== -1) {
cart.splice(index, 1);
}
}

function calculateTotalPrice() {
let totalPrice = 0;
for (let item of cart) {
totalPrice += item.price * item.quantity; // Updated to multiply by the quantity
}
return totalPrice;
}

function viewCart() {
for (let item of cart) {
console.log(\`Item: \${item.name}, Price: \${item.price}, Quantity: \${item.quantity}\`); // Updated to display the quantity
}${t.delimiter}
}

// Test the functionality
const item1 = { name: 'Apple', price: 0.5, quantity: 2 }; // Added quantity property
const item2 = { name: 'Banana', price: 0.25, quantity: 3 }; // Added quantity property

addItem(item1);
addItem(item2);
viewCart(); // Output: Item: Apple, Price: 0.5, Quantity: 2
// Item: Banana, Price: 0.25, Quantity: 3

removeItem('Apple');
viewCart(); // Output: Item: Banana, Price: 0.25, Quantity: 3

console.log(\`Total Price: \${calculateTotalPrice()}\`); // Output: Total Price: 0.75
</UPDATED_WITH_LEFT>

Is this position correct? If so, respond with CORRECT. If not, state that the correct position for the left "${t.delimiter}" is BEFORE/AFTER the position the student selected. You must select one of these options. Do not reprint any code.`
Insert cell
viewof GEMINI_KEY = secret("Save Gemini API key", {description: "This is a Gemini API token whose value is only available to your notebooks.", submit: "Save Gemini API key"})
Insert cell
// prompt_breakdown5(t)
Insert cell
//getFirstChoice(await getChatCompletion('gpt-4-turbo-preview', [{role: 'user', content: prompt_breakdown5(t)}]))
Insert cell
{
//const model = genai.getGenerativeModel({ model: "gemini-pro" });

const prompt = prompt_breakdown5(t)//"How are you today?";
// const image = {
// inlineData: {
// data: base64EncodedImage /* see JavaScript quickstart for details */,
// mimeType: "image/png",
// },
// };
const result = await model.generateContent(prompt);
// console.log(result.response.text());
return result.response.text()
}
Insert cell
genai = new (await import('https://unpkg.com/@google/generative-ai@0.2.1/dist/index.mjs?module')).GoogleGenerativeAI(GEMINI_KEY)
Insert cell
copy(prompt_breakdown2(t))
Insert cell
{
const response = await fetch('https://api.deepinfra.com/v1/openai/chat/completions', {
method: 'POST',
body: JSON.stringify({
model: "mistralai/Mixtral-8x7B-Instruct-v0.1",
messages: [{role: "user", content: "Hello"}],
max_tokens: 20,
}),
headers: {
"Content-Type": "application/json",
authorization: `Bearer ${DEEPINFRA_KEY}`,
}
});
const data = await response.json();
return [data.choices[0].message.content, data.usage.prompt_tokens, data.usage.completion_tokens]
}
Insert cell
async function mixtralRetag(prompt) {
return undefined
return askMX([{role: "user", content: prompt}])
}
Insert cell
prompt_breakdown(t)
Insert cell
copy(prompt_breakdown(t))
Insert cell
retagging_interactive(t)
Insert cell
gptOut = gptOutCache || getFirstChoice(await getChatCompletion('gpt-4-turbo-preview', [{role: 'user', content: prompt_breakdown9(t)}]))
Insert cell
gptOutCache = `1) row >= 0 && row < rows && col >= 0 && col < cols
2) 16
3) 16
4) 1`
Insert cell
gptRetaggingJSONString = gptRetaggingJSONStringCache || (await getRetaggingJSONGPT(gptOut)).trim()
Insert cell
gptRetaggingJSONStringCache = `{
"1": "row >= 0 && row < rows && col >= 0 && col < cols",
"2": 16,
"3": 16,
"4": 1
}`
Insert cell
gptRetaggingJSON = JSON.parse(gptRetaggingJSONString)
Insert cell
computeUpdatedCodeWithSnippetRetagged({
code:t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, ''),
snippet:gptRetaggingJSON[1],
lineStart:gptRetaggingJSON[2],
lineEnd:gptRetaggingJSON[3],
nthOccurrence:gptRetaggingJSON[4],
delimiterStart:t.delimiter,
delimiterEnd:t.delimiter})
Insert cell
mxOut = await mixtralRetag(prompt_breakdown2(t)) || mxOutCache
Insert cell
mxOutCache = ` 1) The snippet marked with "★" is an expression that retrieves the \`price\` property of the \`item\` object. Its type is a number, specifically a decimal. This expression calculates the price of an item in the cart.

2) \`totalPrice += item.price;\`

3) The snippet could be located in the \`calculateTotalPrice\` function since that function calculates the total price of the cart, or it could be located in the \`viewCart\` function since that function displays the item's price. However, the most likely location is the \`calculateTotalPrice\` function since the snippet calculates the item's price.

4) Here is the updated code for the \`calculateTotalPrice\` function:

14: function calculateTotalPrice() {
15: let totalPrice = 0;
16: for (let item of cart) {
17: totalPrice += item.price * item.quantity; // Updated to multiply by the quantity
18: }
19: return totalPrice;
20: }

5) Here is the updated code for the \`calculateTotalPrice\` function with the part that should be marked:

14: function calculateTotalPrice() {
15: let totalPrice = 0;
16: for (let item of cart) {
17: totalPrice += ★item.price★ * item.quantity; // Updated to multiply by the quantity
18: }
19: return totalPrice;
20: }

6) \`item.price\`

7) The line number that 6 starts on is 17.

8) The line number that 6 ends on is 17.`
Insert cell
copy(mxOut)
Insert cell
copy(retagPrompt(mxOut))
Insert cell
mxJSONString = await getRetaggingJSONMixtral(mxOut) || mxJSONStringCache
Insert cell
mxJSONStringCache = ` {
"1": "The snippet marked with '★' is an expression that extracts the \`price\` property of an item object and adds it to a running total. Its type is a number.",
"2": "totalPrice += ★item.price★;",
"3": "The snippet could be located in the \`calculateTotalPrice\` function, as it involves calculating the total price of all items in the cart.",
"4": "function calculateTotalPrice() {\\n let totalPrice = 0;\\n for (let item of cart) {\\n totalPrice += item.price * item.quantity; // Updated to multiply by the quantity\\n }\\n return totalPrice;\\n}",
"5": "function calculateTotalPrice() {\\n let totalPrice = 0;\\n for (let item of cart) {\\n totalPrice += ★item.price★ * item.quantity; // Updated to multiply by the quantity\\n }\\n return totalPrice;\\n}",
"6": "item.price",
"7": 17,
"8": 17
}`
Insert cell
JSON.parse(mxJSONString)[4]
Insert cell
import {fuzzyFindStartAndEndWithNormalizedWhitespace, findStartAndEndNormalized} from "0be16c76c2df9027"
Insert cell
sectionStartEnd = fuzzyFindStartAndEndWithNormalizedWhitespace(t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, ''), JSON.parse(mxJSONString)[4], {interIns: 500})
Insert cell
sectionStartEnd2 = findStartAndEndNormalized(t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, ''), JSON.parse(mxJSONString)[4])
Insert cell
Insert cell
sectionStartEnd3 = JSON.parse(mxJSONString)[7]
Insert cell
sectionString = updatedCodeWithoutDelimiters.split('\n').slice(JSON.parse(gptRetaggingJSONString)[2] - 1, JSON.parse(gptRetaggingJSONString)[3]).join('\n')//updatedCodeWithoutDelimiters.split('\n').slice(JSON.parse(mxJSONString)[7] - 1, JSON.parse(mxJSONString)[8]).join('\n')
Insert cell
JSON.parse(mxJSONString)[6]
Insert cell
snippetIdxInSection = findStartAndEndNormalized(sectionString, JSON.parse(gptRetaggingJSONString)[1])//findStartAndEndNormalized(sectionString, JSON.parse(mxJSONString)[6])
Insert cell
/(?<=totalPrice \+= )\w+\.(?=price \* item\.quantity)/.exec(t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, ''))
Insert cell
JSON.parse(mxJSONString)[6]
Insert cell
snippetStartEnd = fuzzyFindStartAndEndWithNormalizedWhitespace(t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').slice(sectionStartEnd.start, sectionStartEnd.end), JSON.parse(mxJSONString)[6])
Insert cell
sectionString.slice(snippetIdxInSection.start, snippetIdxInSection.end)
Insert cell
lenUpToSection = updatedCodeWithoutDelimiters.split('\n').slice(0, JSON.parse(gptRetaggingJSONString)[2] - 1).map(s=>s + '\n').join('').length//updatedCodeWithoutDelimiters.split('\n').slice(0, JSON.parse(mxJSONString)[7] - 1).map(s=>s + '\n').join('').length
Insert cell
leftIdx = lenUpToSection + snippetIdxInSection.start
Insert cell
rightIdx = leftIdx + snippetIdxInSection.end - snippetIdxInSection.start
Insert cell
updatedCodeWithSnippetRetagged = updatedCodeWithoutDelimiters.slice(0, leftIdx) + t.delimiter + updatedCodeWithoutDelimiters.slice(leftIdx, rightIdx) + t.delimiter + updatedCodeWithoutDelimiters.slice(rightIdx, updatedCodeWithoutDelimiters.length)
Insert cell
findStartAndEndNormalized('abc', 'bdbc', 1)
Insert cell
computeUpdatedCodeWithSnippetRetagged = ({code, snippet, lineStart, lineEnd, nthOccurrence, delimiterStart, delimiterEnd}) => {
// Note lineStart and lineEnd are 1-indexed.
let sectionString = code.split('\n').slice(lineStart - 1, lineEnd).join('\n')
let lenUpToSection = code.split('\n').slice(0, lineStart - 1).map(s=>s + '\n').join('').length
let snippetIdxInSection = findStartAndEndNormalized(sectionString, snippet, nthOccurrence)
console.log('first', sectionString, snippet, nthOccurrence, snippetIdxInSection)
if (snippetIdxInSection.start === -1) {
lineStart = Math.max(0, lineStart - 1)
lineEnd = Math.min(lineEnd + 1, code.split('\n').length)
sectionString = code.split('\n').slice(lineStart - 1, lineEnd).join('\n')
lenUpToSection = code.split('\n').slice(0, lineStart - 1).map(s=>s + '\n').join('').length
snippetIdxInSection = findStartAndEndNormalized(sectionString, snippet, nthOccurrence)
console.log('second', sectionString, snippet, nthOccurrence, snippetIdxInSection)
}
if (snippetIdxInSection.start === -1) {
const snippetFirstLine = snippet.split('\n').slice(0, 1)[0]
const snippetLastLine = snippet.split('\n').slice(-1)[0]
console.log('third')
console.log(snippetFirstLine, snippetLastLine)
console.log(findStartAndEndNormalized(sectionString.split('\n').slice(-1)[0], snippetLastLine, nthOccurrence))
console.log(findStartAndEndNormalized(sectionString.split('\n').slice(-2, -1)[0], snippetLastLine, nthOccurrence))
if (!(findStartAndEndNormalized(sectionString.split('\n').slice(-1)[0], snippetLastLine, nthOccurrence).start >= 0 || findStartAndEndNormalized(sectionString.split('\n').slice(-2, -1)[0], snippetLastLine, nthOccurrence).start >= 0)) {
return {error: 'No retagging found.'}
}
const sectionLastLine = findStartAndEndNormalized(sectionString.split('\n').slice(-1)[0], snippetLastLine, nthOccurrence).start > 0 ? sectionString.split('\n').slice(-1) : sectionString.split('\n').slice(-2, -1)[0]
const sectionLenUpToLastLine = sectionString.split('\n').slice(-1)[0].indexOf(snippetLastLine) > 0 ? sectionString.split('\n').slice(0,-1).join('\n').length : sectionString.split('\n').slice(0, -2).join('\n').length
snippetIdxInSection = {start: findStartAndEndNormalized(sectionString, snippetFirstLine, nthOccurrence).start, end: sectionLenUpToLastLine + findStartAndEndNormalized(sectionLastLine, snippetLastLine, nthOccurrence).end}
console.log('third', sectionString, snippet, nthOccurrence, snippetIdxInSection)
}
const leftIdx = lenUpToSection + snippetIdxInSection.start
const rightIdx = leftIdx + snippetIdxInSection.end - snippetIdxInSection.start
return code.slice(0, leftIdx) + delimiterStart + code.slice(leftIdx, rightIdx) + delimiterEnd + code.slice(rightIdx, code.length)
}
Insert cell
[1, 2, 3].slice(0,-1)
Insert cell
diff(t.codeWithSnippetDelimited, t.updatedCodeWithSnippetDelimited)
Insert cell
import {findOriginalPositions} from "0be16c76c2df9027"
Insert cell
validatePrompt = retagging => `Consider the following text submitted by a student as a homework response:

<TEXT>
${retagging}
</TEXT>

Does the TEXT include answers for ALL of problems 1-8 (Y), or has it been cut off before 8 (N)? Respond with Y/N. ONLY RESPOND WITH A SINGLE CHARACTER. DO NOT COMMENT ON YOUR ANSWER.`
//The final line of your response should have the word COMPLETE if 1-8 are present, or INCOMPLETE if they are not all present.
Insert cell
copy(validatePrompt(mxOutCache))
Insert cell
retagPromptGPT = retagging => `Convert the following response into a JSON object:

<RESPONSE>
${retagging}
</RESPONSE>

The object must look like: {1: <code>, 2: <number>, 3: <number>, 4: <number>}

The answer to 1 should be a code string only, without markdown formatting or extra notes.`
Insert cell
copy(retagPromptGPT(gptOut))
Insert cell
retagPrompt = retagging => `Convert the following response into a JSON object:

<RESPONSE>
${retagging}
</RESPONSE>

The answers to 2, 4, 5, and 6 are processed as code only, without markdown formatting or extra notes. Extract the code for these. The values for each key should be strings. Remove any markdown formatting from code. The fields should have the following types:
1: text
2: code
3: text
4: code
5: code
6: code
7: number
8: number
`

// matching the following type:

// type response = { error: string } | { 1: string, 2: string, 3: string, 4: string, 5: string, 6: string, 7: string, 8: string }
Insert cell
async function getRetaggingJSONGPT(retagging) {
const result = await askMX([
{ role: 'system',
content: 'You are a text parser designed to output JSON.'},
{ role: 'user',
content: retagPromptGPT(retagging)
}
])
// 8: text
// 9: code
return result
}
Insert cell
async function getRetaggingJSONMixtral(retagging) {
return undefined
const result = await askMX([
{ role: 'system',
content: 'You are a text parser designed to output JSON.'},
{ role: 'user',
content: retagPrompt(retagging)
}
])
// 8: text
// 9: code
return result
}
Insert cell
showUpdateDiff(t)
Insert cell
prompt_breakdown(t)
Insert cell
//retag1Out = retag1('gpt-3.5-turbo-0125', t)
Insert cell
// retag1Out1 = retag1('gpt-3.5-turbo-0613', t)
Insert cell
// retag1Out2 = retag1('gpt-4-turbo-preview', t)
Insert cell
resultJSON1 = getRetaggingJSON('gpt-3.5-turbo-0125', retag1Out)
Insert cell
JSON.parse(resultJSON1)
Insert cell
async function getRetaggingJSON(model, retagging) {
const result = await ask(model, [
{ role: 'system',
content: 'You are a helpful assistant designed to output JSON.'},
{ role: 'user',
content: `Convert the following response into a JSON object.
<RESPONSE>
${retagging}
</RESPONSE>

The answers to 2, 4, 5, and 6 must be code only, without markdown formatting or extra notes. Extract the code for these. The values for each key should be strings. Remove any markdown formatting from code. The fields should have the following types:
1: text
2: code
3: text
4: code
5: code
6: code

If there appears to be a missing field in the response, do not guess or fill out any fields. Instead return an object with a single top-level error key, like { error: "Missing field: <field>" }.
`}
])
// 8: text
// 9: code
return result
const resultJSON = JSON.parse(result)
return resultJSON
}
Insert cell
prompt_breakdown = t => `Complete the following 6 tasks in order. Do not look ahead as you complete them.

Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited}
</INPUT>

1) A snippet has been marked with "${t.delimiter}". Briefly describe it: what kind of expression is it? What is its type? What does it do? Do not overexplain, but give enough context that a reader could locate the snippet in this code based on your description. Distinguish it from other parts of the file

2) Reprint this snippet.

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')}
</UPDATED>

The same snippet appears in the updated file. We will now locate it.

3) Describe possible sections the snippet could be said to be located in. It is possible the snippet has not changed, or that it has been refactored. Pick the most correct choice.

4) Fully reprint the updated code of the chosen section.

5) Print the part of that text that should be highlighted.

6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

Now respond with each of the answers to 1-6.
`

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_breakdown2 = t => `Complete the following 8 tasks in order. Do not look ahead as you complete them.

Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited.split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</INPUT>

1) A snippet has been marked with "${t.delimiter}". Briefly describe it: what kind of expression is it? What is its type? What does it do? Do not overexplain, but give enough context that a reader could locate the snippet in this code based on your description. Distinguish it from other parts of the file

2) Reprint this snippet.

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '').split('\n').map((l, i) => i+1 + ':' + l).join('\n')}
</UPDATED>

The same snippet appears in the updated file. We will now locate it.

3) Describe possible sections the snippet could be said to be located in. It is possible the snippet has not changed, or that it has been refactored. Pick the most correct choice.

4) Fully reprint the updated code of the chosen section.

5) Print the part of that text that should be marked in the updated code. Make sure the marking is as close as possible to the original intent.

6) Print the part of (5) that should be highlighted without any context or "${t.delimiter}" characters. Anything that was inside the "${t.delimiter}" should still be present, and anything that was outside should be absent.

7) Print the line number that 6 starts on.

8) Print the line number that 6 ends on.

Now respond with each of the answers to 1-8.`

// 8) Counting from zero, if we searched for the string from (7) in the updated file, which number occurence would this be? You can write out any reasoning here.

// 9) Print just the number of the index from (8).`
// 4) That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.

// 5) Print just the text between the "${t.delimiter}" in the old snippet. Describe again what this text does.

// 6) Print just that text.

// 7) Print text that does the same thing in the new section. Look for perfectly equivalent code. Describe what it does carefully.

// 8) Reprint only the equivalent text that does the same thing in the new section.

// 9) Reprint only the equivalent text that does the same thing in the new section, refining your answer if possible.`
Insert cell
prompt_smarter = t => `Complete the following tasks in order. Do not look ahead as you complete them.

Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited}
</INPUT>

Next, consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')}
</UPDATED>

1) A snippet has been marked in the input with "${t.delimiter}". Briefly describe it: what kind of expression is it? What is its type? What does it do? Do not overexplain, but give enough context that a reader could locate the snippet in this code based on your description.

The same snippet appears in the updated file. We will now locate it.

2) Describe possible sections the snippet could be said to be located in. It is possible the snippet has not changed, or that it has been refactored. Pick ONLY the single most correct choice.

3) Fully reprint the updated code with the new snippet marked with "${t.delimiter}" as in the input file. You must include every character that was in the original file or you will risk a system outage.`
Insert cell
copy(prompt_smarter(t))
Insert cell
async function retag1(model, test) {
return getFirstChoice(await getChatCompletion(model, [{role: 'user', content: prompt_breakdown(test)}]))
// return getFirstChoice(await getChatCompletion('gpt-3.5-turbo', [
// {role: 'system', content: 'You are a helpful assistant designed to output JSON.'},
// ...train.slice(8).map(t => ([
// {role: 'user',
// content: JSON.stringify({
// codeWithSnippetDelimited: t.codeWithSnippetDelimited,
// updatedCode: t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')})},
// {role: 'assistant',
// content: JSON.stringify({
// snippetDescription: t.snippetDescription,
// updatedCodeWithSnippetDelimited: t.updatedCodeWithSnippetDelimited
// })} ])).flat(),
// {role: 'user', content:
// JSON.stringify({codeWithSnippetDelimited: test.codeWithSnippetDelimited,
// updatedCode: test.updatedCodeWithSnippetDelimited.replaceAll(test.delimiter, '')})}]))
}
Insert cell
async function retag(test) {
return getFirstChoice(await getChatCompletion('gpt-3.5-turbo', [{role: 'user', content: prompt_breakdown(test)}]))
// return getFirstChoice(await getChatCompletion('gpt-3.5-turbo', [
// {role: 'system', content: 'You are a helpful assistant designed to output JSON.'},
// ...train.slice(8).map(t => ([
// {role: 'user',
// content: JSON.stringify({
// codeWithSnippetDelimited: t.codeWithSnippetDelimited,
// updatedCode: t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')})},
// {role: 'assistant',
// content: JSON.stringify({
// snippetDescription: t.snippetDescription,
// updatedCodeWithSnippetDelimited: t.updatedCodeWithSnippetDelimited
// })} ])).flat(),
// {role: 'user', content:
// JSON.stringify({codeWithSnippetDelimited: test.codeWithSnippetDelimited,
// updatedCode: test.updatedCodeWithSnippetDelimited.replaceAll(test.delimiter, '')})}]))
}
Insert cell
train[1]
Insert cell
// train0naive = retag(train[1])
Insert cell
md`${train0naive}`
Insert cell
// result = ask('gpt-3.5-turbo', [
// {role: 'system', content: 'You are a helpful assistant designed to output JSON.'},
// {role: 'user',
// content: `Convert the following response into a JSON object.
// <RESPONSE>
// ${train0naive}
// </RESPONSE>

// The answers to 5, 6, and 7 must be code only. Extract the code for these.
// 1: text
// 2: code
// 3: text
// 4: text
// 5: code
// 6: code
// 7: code
// `}
// ])
Insert cell
train[1].updatedCodeWithSnippetDelimited.replaceAll(train[1].delimiter, '').indexOf("javascript")
Insert cell
resultJSON = JSON.parse(result)
Insert cell
// computed = {
// const updatedCode = train[1].updatedCodeWithSnippetDelimited.replaceAll(train[1].delimiter, '')
// const section = resultJSON[5]
// const highlight = resultJSON[7]
// return computeUpdatedSnippet(updatedCode, section, highlight)
// }
Insert cell
// computedTrain0 = computeUpdatedSnippet(train[0])
Insert cell
computedTrain0 = computeUpdatedSnippet(train[1])
Insert cell
train[2].updatedCodeWithSnippetDelimited
Insert cell
computedTrain0.slice(0, 100)
Insert cell
diff(computedTrain0, train[2].updatedCodeWithSnippetDelimited)
Insert cell
copy(JSON.stringify(computedTrain0))
Insert cell
Fuse = require('fuse.js')
Insert cell
smarter_train1 = computeUpdatedSnippetSmarter(train[1])
Insert cell
diff(smarter_train1.response, train[1].updatedCodeWithSnippetDelimited)
Insert cell
train[7].updatedCodeWithSnippetDelimited
Insert cell
train[3].updatedCodeWithSnippetDelimited
Insert cell
smarter_train1
Insert cell
async function computeUpdatedSnippetSmarter(test) {
return 42
// const updatedCode = test.updatedCodeWithSnippetDelimited.replaceAll(test.delimiter, '')

const model = 'gpt-4-turbo-preview'
async function retag(test) {
return getFirstChoice(await getChatCompletion(model, [{role: 'user', content: prompt_smarter(test)}]))
}

const retagging = await retag(test)
console.log('retagging', retagging)
const result = await ask(model, [
{ role: 'system',
content: 'You are a helpful assistant designed to output JSON, NOT MARKDOWN.'},
{ role: 'user',
content: `I just want the code from part 3 of the response, INCLUDING the ★s. Extract ALL of the code from part 3, copying exactly. Disregard the other parts of the response. Respond with an object in the format {response: <all code for part 3 goes here>}. Response should be a raw string WITHOUT MARKDOWN FORMATTING. UNDER NO CIRCUMSTANCES should you start your response with three backticks. YOUR RESPONSE WILL BE CONSUMED BY AN API ENDPOINT, DO NOT MARKDOWN FORMAT ANY PART OF IT.
<RESPONSE>
${retagging}
</RESPONSE>`}
])
console.log('result', result)

const resultJSON = await JSON.parse(result)

return resultJSON
}
Insert cell
async function computeUpdatedSnippet(test) {
return 42
const updatedCode = test.updatedCodeWithSnippetDelimited.replaceAll(test.delimiter, '')

const model = 'gpt-3.5-turbo'
async function retag(test) {
return getFirstChoice(await getChatCompletion(model, [{role: 'user', content: prompt_breakdown(test)}]))
}

async function ask(model, messages) {
return getFirstChoice(await getChatCompletion(model, messages))
}

// Get retagging response, then parse it into JSON (could probably do in one shot with more time...)
const retagging = await retag(test)
console.log('retagging', retagging)
const result = await ask(model, [
{ role: 'system',
content: 'You are a helpful assistant designed to output JSON.'},
{ role: 'user',
content: `Convert the following response into a JSON object.
<RESPONSE>
${retagging}
</RESPONSE>

The answers to 4, 6, 7, and 9 must be code only. Extract the code for these.
1: text
2: code
3: text
4: code
5: code
6: code`}
])
// 8: text
// 9: code
const resultJSON = JSON.parse(result)
console.log('resultJSON', resultJSON)
const section = resultJSON[4].replaceAll('\r', '') // get rid of carriage returns
const highlight = resultJSON[6]
let sectionStart = -1 // updatedCode.indexOf(section)
console.log('ss1', sectionStart)
if (sectionStart === -1) { // The section isn't easily searchable
console.log(updatedCode, sectionStart)
sectionStart = await fuzzyFindIndex(updatedCode, section)
}
console.log('ss2', sectionStart)
const sectionEnd = sectionStart + section.length
const highlightStart = await fuzzyFindIndex(section, highlight)
console.log('hs1', highlightStart)

if (sectionStart === -1) {
// we will just have to bail out and try using highlight only.
const index = resultJSON[9]
const globalHighlightStart = findNthOccurrence(updatedCode, highlight, index)
const globalHighlightEnd = globalHighlightStart + highlight.length
return updatedCode.slice(0, globalHighlightStart) + test.delimiter + highlight + test.delimiter + updatedCode.slice(globalHighlightEnd)
}
const highlightEnd = highlightStart + highlight.length
return updatedCode.slice(0, sectionStart) + section.slice(0, highlightStart) + test.delimiter + highlight + test.delimiter + section.slice(highlightEnd) + updatedCode.slice(sectionEnd)
}
Insert cell
import {fuzzyFindIndex} from "0be16c76c2df9027"
Insert cell
function findNthOccurrence(string, subString, n) {
let index = -1;
for (let i = 0; i < n; i++) {
index = string.indexOf(subString, index + 1);
if (index === -1) {
break;
}
}
return index;
}
Insert cell
async function ask(model, messages) {
return getFirstChoice(await getChatCompletion(model, messages))
}
Insert cell
prompt1 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited}
</INPUT>

Describe the section highlighted with "${t.delimiter}".`
Insert cell
prompt2 = t => `Now consider the following file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')}
</UPDATED>

Attach "★" to the exact same snippet in this file. It is possible the snippet has not changed, or that it has been refactored. Change no other parts of the input. Respond with only the entire UPDATED section, with the "★" attached in the place you identified. Your response must include the entire UPDATED section and two "★" marking the snippet, and should not differ in any other way from the UPDATED section above.`
Insert cell
combo = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited}
</INPUT>

A snippet has been marked with "${t.delimiter}".

<SNIPPET DESCRIPTION>
(${t.snippetType})
${t.snippetDescription}
</SNIPPET DESCRIPTION>

Now consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')}
</UPDATED>

Print a region of UPDATED that is most similar to the original snippet.

Then, reprint the full text of UPDATED with the snippet identified in the same way using "${t.delimiter}".
`
Insert cell
prompt01 = t => `Consider the following file:

<INPUT>
${t.codeWithSnippetDelimited}
</INPUT>

A snippet has been marked with "${t.delimiter}". Briefly describe it: what kind of expression is it? What is its type? What does it do? Do not overexplain, but give enough context that a reader could locate the snippet in this code based on your description.`
Insert cell
prompt02 = t => `Now consider the following updated file:

<UPDATED>
${t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')}
</UPDATED>

The same snippet appears in this file. It is possible the snippet has not changed, or that it has been refactored. Reprint the section where it appears.`
Insert cell
prompt03 = t => `That is the section of the new snippet. Now reprint the old snippet (the code surrounded by "${t.delimiter}") to remind yourself of it.`
Insert cell
prompt04 = t => `Print just the text between the "${t.delimiter}" in the old snippet.`
Insert cell
prompt05 = t => `Print the text that is like this in the new section.`
Insert cell
import {copy, asyncCopy} from "@ryanseddon/copy"
Insert cell
Insert cell
train.slice(7).map(t => ([
{role: 'user',
content: JSON.stringify({
codeWithSnippetDelimited: t.codeWithSnippetDelimited,
updatedCode: t.updatedCodeWithSnippetDelimited.replaceAll(t.delimiter, '')})},
{role: 'assistant',
content: JSON.stringify({
snippetDescription: t.snippetDescription,
updatedCodeWithSnippetDelimited: t.updatedCodeWithSnippetDelimited
})} ])).flat().map(o => o.content).join('\n')
Insert cell
train.slice(7)
Insert cell
starsToTags = test => test.codeWithSnippetDelimited.replace(test.delimiter, '<1>').replace(test.delimiter, '</1>')
Insert cell
diff(train[0].updatedCodeWithSnippetDelimited, train0naive)
Insert cell
diff(train[0].codeWithSnippetDelimited, train0naive)
Insert cell
import {diff} from "@jobleonard/diff-tool"
Insert cell
highlight = (old_annotated_file, updated_file) => `Consider the task of annotation updating.

You will be shown an old annotated file with a section highlighted and a newer version of the file without the highlight applied. Your task will be to copy the highlight to the updated version of the file. In this case, highlights are represented by numbered opening and closing tags, like <1> </1>. Your response should be only the updated file with highlights included.

OLD ANNOTATED FILE:

${old_annotated_file}

END OLD ANNOTATED FILE

UPDATED FILE:

${updated_file}

END UPDATED FILE

Write your answer below. Again, your response should be only the updated file with highlights included. Do not include "END UPDATED FILE" or similar in your response.
`
Insert cell

prompt = (old_annotated_file, updated_file) => `Consider the task of annotation updating.

You will be shown an old annotated file with a section highlighted and a newer version of the file without the highlight applied. Your task will be to copy the highlight to the updated version of the file. In this case, highlights are represented by 2 star characters, like so: ★highlighted text★. Your response should be only the updated file with highlights included.

OLD ANNOTATED FILE:

${old_annotated_file}

END OLD ANNOTATED FILE

UPDATED FILE:

${updated_file}

END UPDATED FILE

Write your answer below. Again, your response should be only the updated file with highlights included. Do not include "END UPDATED FILE" or similar in your response. Your response should be a character-for-character match with the UPDATED FILE, except for the additional 2 characters for highlights.
`
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more