Public
Edited
Dec 8, 2022
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
{
viewof annT2.value = `
<1>const newT2rev = <2>newT2.split('').reverse()</2>.join('')</1>
<3>const isOpen = t => t.tag[0][1] !== '/'</3>;
const pairMapping = newTags.filter(isOpen)
.map(<4>t1 => ({open: t1,
close: newTags
.find(<6>(t2, j) => <7>t2.tag[0].slice(2,-1)</7> === t1.tag[0].slice(1,-1)</6>)})</4> )// ...
`
viewof newT2.value = oldT2
mutable mutT2 = oldT2
mutable mutAnnT2 = annT2
}
Insert cell
diff = require('diff')
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
d = {
const out = new diff.Diff(true)
out.tokenize = function(value) {
// tokenize usage described at https://github.com/kpdecker/jsdiff/issues/63
// word tokenize example at https://github.com/kpdecker/jsdiff/blob/3b654c2ed7d5262ed9946de841ad8dae990286c7/src/diff/word.js#L34
const extendedWordChars = /^[a-zA-Z\u{C0}-\u{FF}\u{D8}-\u{F6}\u{F8}-\u{2C6}\u{2C8}-\u{2D7}\u{2DE}-\u{2FF}\u{1E00}-\u{1EFF}]+$/u;
// All whitespace symbols except newline group into one token, each newline - in separate token
// NOTE this can be tuned
let tokens = value.split(/([^\S\r\n]+|[()[\]{}'"\r\n\.]|\b)/);//value.split(/([^\S\r\n]+|[()[\]{}'"\r\n\.]|\b)/);//value.split(/(.)/);////value.split(/(.)/);//value.split(/([^\s]+|[()[\]{}'"\.]|\b)/);//
//Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
// for (let i = 0; i < tokens.length - 1; i++) {
// // If we have an empty string in the next field and we have only word chars before and after, merge
// if (!tokens[i + 1] && tokens[i + 2]
// && extendedWordChars.test(tokens[i])
// && extendedWordChars.test(tokens[i + 2])) {
// tokens[i] += tokens[i + 2];
// tokens.splice(i + 1, 2);
// i--;
// }
// }
return tokens;
};
return out
}
Insert cell
lineDiff = new diff.Diff(true)
Insert cell
newT2
Insert cell
diff.diffLines(oldT2, newT2, {newlineIsToken: true})
Insert cell
addIsNewline = ds => {
const result = []
for (let i = 0; i < ds.length; i++) {
const newline = ds[i].added && (!ds[i-1] || ds[i-1]?.value?.endsWith('\n')) && (ds[i].value.endsWith('\n') || (!ds[i+1] || ds[i+1]?.value?.startsWith('\n')))
result.push({...ds[i], newline})
}
return result
}
Insert cell
// addIsNewlineP = ps => {
// const result = []
// for (let i = 0; i < ps.length; i++) {
// const newline = ps[i].added && (!ps[i-1] || ps[i-1]?.[0]?.endsWith('\n')) && (ps[i][0].endsWith('\n') || (!ps[i+1] || ps[i+1]?.[0].startsWith('\n')))
// result.push({...ps[i], newline})
// }
// return result
// }
Insert cell
''.endsWith
Insert cell
diffs2Patches(addIsNewline(d.diff(oldT2, newT2)))
Insert cell
d.diff(oldT2, newT2)
Insert cell
d.diff(oldT2, annT2)
Insert cell
diffs2Patches(d.diff(oldT, newT1))
Insert cell
diffs2Patches = ds => {
ds = ds.map((d, i) => ({...d, idx: i})).sort((a, b) => !((a.removed && b.added) || (b.removed && a.added)) ? 0 : Math.abs(a.idx - b.idx) !== 1 ? 0 : d.added ? -1 : 1) // make sure added stuff comes first for adjacent patches
// NOTEs
// for .removed, the output index must refer to the starting position in the string from which stuff should be removed
return ds.reduce(([acc, delta], d) => {
const d1 = d.added ? 0 : d.value.length
const index = d.removed ? delta + d1 : delta // removals are indexed by the range's final index
return [[...acc, {0: d.value, newline: d.newline, index, added: d.added, removed: d.removed, drop: !(d.added || d.removed)}], delta + d1]
}, [[], 0])[0].filter(o => !o.drop)
}
Insert cell
"abcdefGhi.Lmn".split(/(.)/)
Insert cell
patches(oldT, newT1)
Insert cell
Insert cell
diffs2Patches(d.diff(oldT, newT))
Insert cell
patches(oldT, newT1)
Insert cell
merge(oldT, patches(oldT, annT), patches(oldT, newT))
Insert cell
merge(oldT, patches2(oldAnn), patches2(oldNew))
Insert cell
mutable mutT2 = oldT2
Insert cell
mutable mutAnnT2 = annT2
Insert cell
//merge(mutable mutT2, patches(mutable mutT2, mutable mutAnnT2), patches(mutable mutT2, newT2))
Insert cell
{
mutable mutAnnT2 = merge2(mutable mutT2, mutable mutAnnT2, newT2)
mutable mutT2 = newT2
}
Insert cell
patches(oldT2, annT2)
Insert cell
patches(oldT2, newT2)
Insert cell
oldT.length
Insert cell
patches = (old, current) => diffs2Patches(addIsNewline(d.diff(old, current)))
Insert cell
applyDelta = (s, d) => {
const text = d[0]//.slice(2, -2)
const length = text.length
if (d.added) {
return s.slice(0, d.index) + text + s.slice(d.index)
} else {
return s.slice(0, d.index - length) + s.slice(d.index)
}
}
Insert cell
{
const t0 = performance.now()
const out = merge2(oldT2, annT2, newT2)
const t1 = performance.now()
return [t1 - t0, out]
}
Insert cell
clean = s => {
// moves tags around in the string according to some rules:
// * closing tags should never be preceded by a newline/whitespace/open {paren/brace/bracket}, and move back around these characters
// * opening tags should never be followed by (reverse of stuff above)

const closingTags = s.matchAll(/<\/\d+>/g)
for (const tag of closingTags) {
let idx = tag.index
while (s[idx-1].match(/[\r\n\s\(\[\{]/)) {
s = s.slice(0, idx-1) + tag[0] + s[idx-1] + s.slice(idx + tag[0].length)
idx = idx-1
console.log(s)
}
}
const openingTags = [...s.matchAll(/<\d+>/g)]
for (const tag of openingTags.sort((a,b) => -(a.index - b.index))) {
let idx = tag.index
while (s[idx + tag[0].length].match(/[\r\n\s\)\]\}]/)) {
s = s.slice(0, idx) + s[idx + tag[0].length] + tag[0] + s.slice(idx + tag[0].length + 1)
idx = idx + 1
console.log(s)
}
}
return s
}
Insert cell
merge2(oldT2, annT2, newT2)
Insert cell
s = merge1(oldT2, annT2, newT2)
Insert cell
{
function swap(s, idx1, idx2) {
const copy = s.split('')
copy[idx1] = s[idx2]
copy[idx2] = s[idx1]
return copy.join('')
}
let s1 = s
for (const tag of s1.matchAll(/<\/\d+>/g)) {
let idx = tag.index
while (s1[idx-1].match(/[\r\n\s\(\[\{]/)) {
s1 = s1.slice(0, idx-1) + tag[0] + s1[idx-1] + s1.slice(idx + tag[0].length)
idx = idx-1
}
}
return s1
}
Insert cell
merge2 = (s, ann, newT) => clean(merge1(s, ann, newT))
Insert cell
merge1 = (s, s1, s2) => merge(s, patches(s, s1), patches(s, s2))
Insert cell
merge = (s, ds1, ds2) => {
// Go from back to front, applying removals first when there are conflicts
console.log('start merge', s, ds1, ds2)
const order = (a, b) => {
const diff = a.index - b.index
if (diff) {return -diff } // reverse overall order
if (a.added && b.added) {
console.log(a, b, a.left ? b.left ? 0 : (a[0][1] === '/' ? 0 : 1) : 0)
return a.newline ? 1 : b.newline ? -1 // make sure added lines end up applied last
: a.left ? b.left ? 0 : (a[0][1] === '/' ? 0 : 1) : 0}
if (a.removed === b.removed) {return -1} // TODO preserve order within removals/additions
if (a.removed) {return -1} // Apply removals first
return 1 // Flip if order is a:removed, b:added
}
const sorted = [...ds1.map(d => ({...d, left: true})), ...ds2.map(d => ({...d, right: true}))].sort(order)
console.log(sorted)

// Move annotations that occur in a removed section back to the start of the section.
const reindexed = sorted.reduce(([acc, removeStartIdx], d) => {
let index = d.index;
if (removeStartIdx === d.index) {
removeStartIdx = undefined
}
if (d.removed) {
console.log('HERE', d)
removeStartIdx = d.index - d[0].length
}
else if (removeStartIdx !== undefined && removeStartIdx < d.index) {
index = removeStartIdx
}
return [[...acc, {...d, index}], removeStartIdx]
}, [[], undefined])[0].sort(order)

console.log(reindexed)
return reindexed.reduce((acc, d) => {
const out = applyDelta(acc, d)
console.log(d[0], d.index, out)
return out
}, s)
}
Insert cell
distance = require('jaro-winkler')
Insert cell
oldT2
Insert cell
newT2
Insert cell
annT2
Insert cell
merge2(oldT2, annT2, newT2)
Insert cell
merge3(annT2, newT2)
Insert cell
merge4(annT2, newT2)[0]
Insert cell
//clean(out[0])
Insert cell
out = {
const t0 = performance.now()
const out = doubleSided && merge5(annT2, newT2)
return out
}
Insert cell
merge5 = (annT2, newT2) => {
if (newT2.length > 500) return ['aborting to avoid performance issue']
// try "looking left and right", since this algo ostensibly likes matching prefixes
// also assess range fit
const tagRE = /<\/?\d+>/g
const tags = [...annT2.matchAll(tagRE)]
const reindexed = tags.reduce(([acc, idx], t) => [[...acc, {...t, index: t.index - idx}], idx + t[0].length], [[], 0])[0]
let newTags = []
let N = 20
const M = 3 // context for range matching
let nSpaces = n => new Array(n).map(v => '').join(' ')
const context = (s, i, k) => (s + nSpaces(k)).slice(i, i + k)
const pad2sides = (s, k) => (nSpaces(k) + s + nSpaces(k))
const oldT2 = annT2.replaceAll(tagRE, '')
const oldT2rev = oldT2.split('').reverse().join('')
for (const tag of reindexed) {
let idx = tag.index
newTags.push({contextR: context(oldT2, idx, N), contextL: context(oldT2rev, oldT2.length - idx, N), tag})
// while (s1[idx-1].match(/[\r\n\s\(\[\{]/)) {
// s1 = s1.slice(0, idx-1) + tag[0] + s1[idx-1] + s1.slice(idx + tag[0].length)
// idx = idx-1
// }
}
newTags = newTags.map((t, i) => ({...t, idx: i}))
//return newTags
//return distance("mn(opQrs)\n ", context(newT2, 5, N))
const newT2rev = newT2.split('').reverse().join('')
const isOpen = t => t.tag[0][1] !== '/';
const pairMapping = newTags.filter(isOpen).map(t1 => ({open: t1, close: newTags.find((t2, j) => t2.tag[0].slice(2,-1) === t1.tag[0].slice(1,-1))}) ).map(({open, close}) => [...new Array(newT2.length)].map((_,i) => [...new Array(newT2.length)].map((_,j) => [[i,j], j < i ? -1 : distance(pad2sides(oldT2, M).slice(open.tag.index, close.tag.index), pad2sides(newT2, M).slice(i, j))])))
const fullmapping1 = newTags.map((t) => {
return [t.tag[0],
[...new Array(newT2.length)].map((_,i) => {
// whoops, we need to recover tagIdx
const pairIdx = newTags.filter(isOpen).findIndex(t1 => t1.tag[0].slice(1,-1) === t.tag[0].slice(-2,-1))
return (distance(t.contextR, context(newT2, i, N)) * .3 +
distance(t.contextL, context(newT2rev, newT2.length - i, N)) * .3 +
distance(t.contextR.slice(0, 3), context(newT2, i, 3)) * .2 +
distance(t.contextL.slice(0, 3), context(newT2rev, newT2.length - i, 3)) * .2 +
/*(isOpen(t) ? pairMapping[pairIdx][i].reduce((sum, v, k) => sum + v[1], 0) :
pairMapping[pairIdx].reduce((sum, v, k) => sum + v[i][1], 0)) *(1/25) * .4 + */
(isOpen(t) ? pairMapping[pairIdx][i].reduce(([j, max], v, k) => [v[1] > max ? k : j, v[1] > max ? v[1]: max], [-1, -1])[1] :
pairMapping[pairIdx].reduce(([j, max], v, k) => [v[i][1] > max ? k : j, v[i][1] > max ? v[i][1]: max], [-1, -1])[1]) * .4)
})]
})
//return fullmapping1
const fullmapping = fullmapping1.map(([t, out]) => [t, out.reduce(([i, max], v, j) => [v > max ? j : i, v > max ? v : max], [-1, null])])
//return fullmapping
const mapping = fullmapping.map(m => [m[0], m[1][0]])
//return mapping
const out = mapping.sort((a, b) => b[1] - a[1] !== 0 ? b[1] - a[1] : a[0][-2] < b[0][-2] ? -1 : 1).reduce((acc, [t, i]) => acc.slice(0, i) + t + acc.slice(i), newT2)
return [out, fullmapping]
}
Insert cell
merge4 = (annT2, newT2) => { // try "looking left and right", since this algo likes matching prefixes
if (newT2.length > 100) return ['aborting to avoid performance issue']
const tagRE = /<\/?\d+>/g
const tags = [...annT2.matchAll(tagRE)]
const reindexed = tags.reduce(([acc, idx], t) => [[...acc, {...t, index: t.index - idx}], idx + t[0].length], [[], 0])[0]
const newTags = []
let N = 30
let nSpaces = n => new Array(n).map(v => '').join(' ')
const context = (s, i, k) => (s + nSpaces(k)).slice(i, i + k).trim()
const oldT2 = annT2.replaceAll(tagRE, '')
const oldT2rev = oldT2.split('').reverse().join('')
for (const tag of reindexed) {
let idx = tag.index
newTags.push({contextR: context(oldT2, idx, N), contextL: context(oldT2rev, oldT2.length - idx, N), tag})
// while (s1[idx-1].match(/[\r\n\s\(\[\{]/)) {
// s1 = s1.slice(0, idx-1) + tag[0] + s1[idx-1] + s1.slice(idx + tag[0].length)
// idx = idx-1
// }
}
//return newTags
//return distance("mn(opQrs)\n ", context(newT2, 5, N))
const newT2rev = newT2.split('').reverse().join('')
const fullmapping = newTags.map(t => [t.tag[0], [...new Array(newT2.length)].map((_,i) => distance(t.contextR, context(newT2, i, N)) + distance(t.contextL, context(newT2rev, newT2.length - i, N))).reduce(([i, max], v, j) => [v > max ? j : i, v > max ? v : max], [-1, null])])
const mapping = fullmapping.map(m => [m[0], m[1][0]])
//return mapping
const out = mapping.sort((a, b) => b[1] - a[1] !== 0 ? b[1] - a[1] : a[0][-2] < b[0][-2] ? -1 : 1).reduce((acc, [t, i]) => acc.slice(0, i) + t + acc.slice(i), newT2)
return [out, fullmapping]
}
Insert cell
merge3 = (annT2, newT2) => {
if (newT2.length > 1000) return ['aborting to avoid performance issue']
const tagRE = /<\/?\d+>/g
const tags = [...annT2.matchAll(tagRE)]
const reindexed = tags.reduce(([acc, idx], t) => [[...acc, {...t, index: t.index - idx}], idx + t[0].length], [[], 0])[0]
const newTags = []
let N = 20
let nSpaces = n => new Array(n).map(v => '').join(' ')
const context = (s, i, k) => (nSpaces(k) + s + nSpaces(k)).slice(i, i + k*2).trim()
for (const tag of reindexed) {
let idx = tag.index
newTags.push({context: context(annT2.replaceAll(tagRE, ''), idx, N), tag})
// while (s1[idx-1].match(/[\r\n\s\(\[\{]/)) {
// s1 = s1.slice(0, idx-1) + tag[0] + s1[idx-1] + s1.slice(idx + tag[0].length)
// idx = idx-1
// }
}
//return newTags
//return distance("mn(opQrs)\n ", context(newT2, 5, N))
const mapping = newTags.map(t => [t.tag[0], [...new Array(newT2.length)].map((_,i) => distance(t.context, context(newT2, i, N))).reduce(([i, max], v, j) => [v > max ? j : i, v > max ? v : max], [-1, null])[0]])
const out = mapping.sort((a, b) => b[1] - a[1] !== 0 ? b[1] - a[1] : a[0][-2] < b[0][-2] ? -1 : 1).reduce((acc, [t, i]) => acc.slice(0, i) + t + acc.slice(i), newT2)
return out
}
Insert cell
Insert cell
oldNew = 'abcdefGhi.Lmn([-opQrs-]{+A + B+})'
Insert cell
patches2(oldAnn)
Insert cell
patches2(oldAnn)
Insert cell
oldAnn = '{+<3><1>+}abcdefGhi{+</1>+}.Lmn({+<2>+}opQrs{+</2>+}){+</3>+}'
Insert cell
patches2(oldNew)
Insert cell
m = import('https://unpkg.com/three-way-merge@0.1.0/dist/modules/src/index.js?module')
Insert cell
patches2(oldAnn)
Insert cell
applyDelta(oldT, patches2(oldAnn)[0])
Insert cell
applyDelta(oldT, patches2(oldNew)[0])
Insert cell
patches2(oldNew)
Insert cell
patches2(oldAnn)[0][0]
Insert cell
patches2 = s => [...s.matchAll(/[{\[][+-].*?[\]}]/g)].reduce(([acc, delta], o) => {
const d1 = o[0].length
// index should be the index of the character the edit precedes
return [[...acc, ({...o, 0: o[0].slice(2, -2), added: o[0][1] === '+', removed: o[0][1] !== '+', newIndex: o.index, index: o.index - delta})], delta + d1]
}, [[], 0])[0]
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more