Public
Edited
Oct 6, 2022
Insert cell
Insert cell
Insert cell
// This is where you can SET the GUEST and HOST texts etc:
// if you change anything be sure to RUN THE CELL before clicking ‘generate’
cfg = {
let cfg = {}; // new Config(limage, theimage)
cfg.guest = theimage;
cfg.host = commentcestSliced;
//
if (enTexts.includes(cfg.guest)) cfg.sylls = hyphen;
else cfg.sylls = hyphfr;
//
cfg.useSubsequentHosts = false;
cfg.numberParagraphs = false;
cfg.exactMatches = true; // allow exact matches (e.g. if using same language in guest and host)
cfg.alwaysSplit = false; // force multiple word hosts for all guests except single-letter guests
cfg.produceDictionary =
!cfg.useSubsequentHosts && config(cfg, "guest") == limage ? true : false;
//
cfg.htagOpen = "<em>";
cfg.htagClose = "</em>";
cfg.maxKsplits = 5;
cfg.guestColor = "darkred"; // darkred good guest highlighting with ..
cfg.hostColor = "black"; // ... orange; or lightgray/lightslategray/lightsteelblue
cfg.paraTag = `<p class="fade">`; // must use double-quotes for id specification
cfg.guestWordDelimiter = " "; // ` ${cfg.htagOpen}/${cfg.htagClose} `; // "&nbsp; " or " / " or "<br>"

return cfg;
}
Insert cell
displayObj
Insert cell
<div id="display">${displayObj.htmlStr}</div>
Insert cell
<div id="dictionary">${dStr}</div>
Insert cell
Insert cell
Insert cell
display = (cfg) => {
let pNum = config(cfg, "numberParagraphs") ? 1 : 0;
let htmlStr = config(cfg, "paraTag") + (pNum > 0 ? `*${pNum}.* ` : ``); // using * for <em> tags
let dictionary, phrases;
if (config(cfg, "produceDictionary")) {
dictionary = new Map();
phrases = new Set();
} else {
dictionary = undefined;
phrases = undefined;
}
let kpermutations;
let unhosted = [];
let unhostedSyls = [];
let hostIdx = 0;
let lastpNum = 0;
let tkObj = { idx: 0, guestToken: "To Begin" };
loop_through_guestTokens: for (let guestToken of config(cfg, "guest")) {
// handle space breaks as in supply texts
// console.log(pNum);
let pStr = "";
if (pNum == lastpNum) {
pStr = "";
} else {
lastpNum = pNum;
pStr = pNum + ". ";
}

console.log(pStr, `${guestToken}`);

// reset hostIdx if generating stable per-guest host phrases:
if (!config(cfg, "useSubsequentHosts")) {
hostIdx = 0;
}

// new paragraph indicated by blank line
if (guestToken === `\n`) {
htmlStr += config(cfg, "paraTag") + (pNum > 0 ? `*${++pNum}.* ` : ``); // using * for <em> tags
continue loop_through_guestTokens;
}

// sharedCaps terms (shared proper names) are always 'found'
if (sharedCaps.has(guestToken)) {
tkObj = {};
tkObj.idx = searchFromExact(guestToken, hostTokens, hostIdx);
tkObj.guestToken = guestToken;
if (tkObj.idx > -1) {
// only bump hostIdx if we actually found the shared PN
tkObj.hostToken = hostTokens[tkObj.idx];
hostIdx = (tkObj.idx + 1) % hostTokens.length;
} else {
tkObj.hostToken = "[shared PN not in host]";
}
tkObj.displayStr =
`${config(cfg, "htagOpen")}` +
guestToken +
`${config(cfg, "htagClose")}`;
tkObj.caps = -1;
htmlStr += guestSpc(tkObj.displayStr);
report("shrdPN", [tkObj], dictionary);
continue loop_through_guestTokens;
} else tkObj = undefined;

// single letter tokens:
if (guestToken.length === 1 && config(cfg, "exactMatches")) {
let idx = searchFrom(guestToken, hostTokens, hostIdx, true);
if (idx > -1) {
tkObj = {};
tkObj.idx = idx;
tkObj.guestToken = guestToken;
tkObj.hostToken = hostTokens[tkObj.idx];
tkObj.displayStr =
`${config(cfg, "htagOpen")}` +
guestToken +
`${config(cfg, "htagClose")}`;
tkObj.caps = -1;
htmlStr += guestSpc(tkObj.displayStr);
report("oneLtr", [tkObj], dictionary);
hostIdx = (idx + 1) % hostTokens.length;
continue loop_through_guestTokens;
} else tkObj = undefined;
}

// uppercase treatment
let caps = -1;
if (/[A-ZÉ]/.test(guestToken)) {
if (guestToken === guestToken.toUpperCase() && guestToken.length > 1) {
// these are all caps guestTokens, flagged with caps = 99 and changed to lower case
caps = 99;
} else if (guestToken.length > 0) {
// if a guestToken is in sharedCaps and it's not empty:
if (guestToken[0] !== guestToken[0].toLowerCase()) {
// i.e. capitalized words, including "I", flagged with caps = 1 and toLowerCase()
caps = 0;
}
// French cases of "l’<oneCap>..."
if (
guestToken.length > 2 &&
guestToken[2] !== guestToken[2].toLowerCase()
) {
// capitalized after "l’"
caps = 2;
}
}
guestToken = guestToken.toLowerCase();
}
// end uppercase treatment

if (!config(cfg, "alwaysSplit") || guestToken.length < 2) {
// search hostTokens for first token containing all of the guestToken
tkObj = conTokenFor(guestToken, caps, hostIdx);
// tkObj will be undefined if not found
if (tkObj) {
htmlStr += guestSpc(tkObj.displayStr);
hostIdx = (tkObj.idx + 1) % hostTokens.length;
report("allFnd", [tkObj], dictionary);
continue loop_through_guestTokens;
}
} else tkObj = undefined;
// entire guestToken is not in any hostToken

// single-letter words should have been found
if (guestToken.length < 2) {
unhosted.push(guestToken);
console.log(`Single-letter token, ${guestToken} unfound at: ${hostIdx}`);
continue loop_through_guestTokens;
}

// try hyphenated tokens first
// capture caps parameter (which is -1 on first match)
let tempCaps = caps;
let partsArray = [];
let guestSyls = config(cfg, "sylls").hyphenate(guestToken);
// special cases
if (guestToken === "de rigueur") guestSyls = ["de", "ri", "gueur"];
if (guestToken === "à la") guestSyls = ["à", "la"];
if (guestToken === "in extremis") guestSyls = ["in", "ex", "trem", "is"];
if (guestToken === "en route") guestSyls = ["en", "route"];
if (config(cfg, "sylls") === hyphen) {
if (
guestSyls.filter((s) => s.includes("k") || s.includes("w")).length > 0
) {
// TODO ???
// console.log(pNum + ": " + guestSyls);
}
}
if (guestSyls.length > 1) {
let displayStr = "";
let foundAll = true;
// let oneCap = guestObj.oneCap;
for (let sylIdx = 0; sylIdx < guestSyls.length; sylIdx++) {
let guestToken = guestSyls[sylIdx];
tkObj = conTokenFor(guestToken, caps, hostIdx);
if (!tkObj) {
foundAll = false;
unhostedSyls.push(guestSyls[sylIdx]);
// capture caps parameter (which becomes -1 on first match)
caps = tempCaps;
partsArray = [];
} else {
partsArray.push(tkObj);
hostIdx = (tkObj.idx + 1) % hostTokens.length;
caps = tkObj.caps;
displayStr += tkObj.displayStr + "&nbsp;";
}
}
if (foundAll) {
htmlStr += guestSpc(displayStr);
report("Hyph> ", partsArray, dictionary, phrases, tkObj.idx);
continue loop_through_guestTokens;
} else tkObj = undefined;
}

// new (better) code for generating any number of split subTokens
for (let k = 1; k < config(cfg, "maxKsplits") + 1; ++k) {
if (guestToken.length < k + 1) {
unhosted.push(guestToken);
continue loop_through_guestTokens;
}
// generates array of all for k splits from 1 to cfg.maxKsplits
kpermutations = permutedSplits(guestToken, k);
// console.log(pNum + ". kpermutations: " + kpermutations);
for (let idx = 0; idx < kpermutations.length; ++idx) {
guestSyls = kpermutations[idx].slice();
// checking that the slice yields two strings:
if (guestSyls.length > 1) {
let displayStr = "";
let foundAll = true;
partsArray = [];
// capture caps parameter (which becomes -1 on first match)
caps = tempCaps;
for (let sylIdx = 0; sylIdx < guestSyls.length; sylIdx++) {
let guestToken = guestSyls[sylIdx];
tkObj = conTokenFor(guestToken, caps, hostIdx);
if (!tkObj) {
foundAll = false;
caps = tempCaps;
partsArray = [];
} else {
partsArray.push(tkObj);
hostIdx = (tkObj.idx + 1) % hostTokens.length;
caps = tkObj.caps;
displayStr += tkObj.displayStr + "&nbsp;";
}
} // end syls loop
if (foundAll) {
htmlStr += guestSpc(displayStr);
report("kPerm> ", partsArray, dictionary, phrases, tkObj.idx);
continue loop_through_guestTokens;
} else tkObj = undefined;
} // at least one syllable
} // end permutations loop
}

unhosted.push(guestToken);
}

// collecting guestToken's that have not been hosted
unhosted = new Set(unhosted);
unhostedSyls = new Set(unhostedSyls);
let sortedDictionary = undefined;
if (dictionary) {
sortedDictionary = new Map();
// sorting is based on the previous sorted liConcordance
for (const [key, value] of liConcordance) {
sortedDictionary.set(key, dictionary.get(key));
}
}
return { htmlStr, unhosted, unhostedSyls, sortedDictionary, phrases };
}
Insert cell
report = (type, partsArray, dictionary, phrases, idx) => {
let host = "";
let guest = "";
for (const tkObj of partsArray) {
guest += tkObj.guestToken;
host += tkObj.displayStr + " ";
}
host = host.trim();
if (dictionary) {
if (!dictionary.has(guest)) {
let count = " (" + liConcordance.get(guest) + ")";
dictionary.set(guest, host + count);
if (phrases) {
let phrase = norm(host.replaceAll("*", ""));
let words = phrase.split(" ");
let consecutive = true;
for (let i = words.length - 2; i > -1; --i) {
idx = mod(idx - 1, hostTokens.length);
consecutive = hostTokens[idx] === words[i];
if (!consecutive) break;
}
if (consecutive) {
phrases.add(`${guest}: ${phrase}${count}`);
}
}
}
}
console.log(type, host, partsArray);
}
Insert cell
Insert cell
conTokenFor = (guestToken, caps, hostIdx) => {
// get the actual host
let hostToken = hostTokens[hostIdx],
displayStr,
idx;

if (config(cfg, "exactMatches")) {
// matches with the entire host are allowed
// search caseSensitive if single-letter guestToken
let caseSensitive = guestToken.length === 1;
// edge cases:
if (["ba", "ab"].includes(guestToken)) caseSensitive = true;
idx = searchFrom(guestToken, hostTokens, hostIdx, caseSensitive);
//
if (idx > -1) {
hostToken = hostTokens[idx]; // restore the actual, non-normalized hostToken
({ displayStr, caps } = highlight(guestToken, hostToken, caps));
return { idx, guestToken, hostToken, displayStr, caps };
}
}

// no exact match, now look for a host that contains the guest
let hostTokenNorm = "";
let guestTokenNorm = norm(guestToken);
let inhost = -1; // = haven't found it yet
// search from current hostIdx
for (idx = hostIdx; idx < hostTokens.length; idx++) {
// normalize the hostToken:
hostTokenNorm = norm(hostTokens[idx]).toLowerCase();
inhost = hostTokenNorm.indexOf(guestTokenNorm);
if (inhost > -1) {
if (!config(cfg, "exactMatches") && guestTokenNorm === hostTokenNorm)
continue;
hostToken = hostTokens[idx];
({ displayStr, caps } = highlight(guestToken, hostToken, caps));
return { idx, guestToken, hostToken, displayStr, caps };
}
}
for (idx = 0; idx < hostIdx; ++idx) {
// normalize the hostToken:
hostTokenNorm = norm(hostTokens[idx]).toLowerCase();
inhost = hostTokenNorm.indexOf(guestTokenNorm);
if (inhost > -1) {
if (!config(cfg, "exactMatches") && guestTokenNorm === hostTokenNorm)
continue;
hostToken = hostTokens[idx];
({ displayStr, caps } = highlight(guestToken, hostToken, caps));
return { idx, guestToken, hostToken, displayStr, caps };
}
}
// found nothing return undefined
return;
}
Insert cell
guestSpc = (str) => {
if (typeof str === "string")
return (
str.trim().replace(new RegExp("&nbsp;$"), "") +
config(cfg, "guestWordDelimiter")
);
else return "something wrong guestSpc ";
}
Insert cell
"this&nbsp;and that&nbsp;".replace(new RegExp("&nbsp;$"), "")
Insert cell
highlight = (guestToken, hostToken, caps) => {
let displayStr = "something wrong highlight ";
let guestTokenNorm = norm(guestToken).toLowerCase();
let hostTokenNorm = norm(hostToken).toLowerCase();
let tkIdx = hostTokenNorm.indexOf(guestTokenNorm);
if (tkIdx != -1) {
if (caps === 99) {
guestToken = guestToken.toUpperCase();
} else if (caps > -1 && guestToken.length > caps) {
guestToken =
guestToken.slice(0, caps) +
guestToken[caps].toUpperCase() +
guestToken.slice(caps + 1);
caps = -1;
}
displayStr =
hostToken.slice(0, tkIdx) +
`${config(cfg, "htagOpen")}` +
guestToken +
`${config(cfg, "htagClose")}` +
hostToken.slice(tkIdx + guestToken.length); // + sliceLess
return { displayStr, caps };
}
}
Insert cell
hostTokens = config(cfg, "host")
Insert cell
norm = (tk) => tk.normalize("NFD").replace(/[\u0300-\u036f]/g, "") // ’\- apostrophe hyphen
Insert cell
permutedSplits = (str, numberOfSplits = 2) => {
if (typeof str !== "string" || str.length < numberOfSplits + 1)
return undefined;
let splits = str.split("");
splits = [...Array(splits.length).keys()].slice(1);
// let numOfSplits = 3;
// let array = [1, 2, 3, 4, 5, 6, 7];

let prep = LoopIt(numberOfSplits, [], splits);

// gather k-permutations in splits
let result = [];
for (let idx = 0; idx < prep.length; idx += numberOfSplits) {
let a = [];
for (let j = 0; j < numberOfSplits; ++j) {
a.push(prep[idx + j]);
}
result.push(a);
}
// filter result for linearly ordered k-permutations only
result = result.filter((a) => {
let met = true;
for (let idx = 0; idx < numberOfSplits - 1; ++idx) {
if (a[idx] >= a[idx + 1]) {
met = false;
break;
}
}
return met;
});

// turn into strings
return resultStrs(result, str).reverse();

function LoopIt(depth, baseArray, array) {
let returnValue = [];
for (let i = 0; i < array.length; i++) {
let arr = baseArray.concat(array[i]); // works
if (depth == 1) {
returnValue = returnValue.concat(arr);
} else {
returnValue = returnValue.concat(LoopIt(depth - 1, arr, array));
}
}
return returnValue;
}
}
Insert cell
resultStrs = (splits, str) => {
let sarray = [];
for (const split of splits) {
let splarray = [];
for (let i = 0; i < split.length + 1; ++i) {
let s = "";
if (!i) s = str.slice(0, split[i]);
else s = str.slice(split[i - 1], split[i]);
splarray.push(s);
}
sarray.push(splarray);
}
return sarray;
}
Insert cell
searchFrom = (item, array, idx, caseSensitive = false) => {
let result = -1;
item = norm(item);
for (let i = idx; i < array.length; ++i) {
let mdl = norm(array[i]);
if (!caseSensitive) mdl = mdl.toLowerCase();
if (item === mdl) {
result = i;
break;
}
}
if (result > -1) return result;
for (let i = 0; i < idx; ++i) {
let mdl = norm(array[i]);
if (!caseSensitive) mdl = mdl.toLowerCase();
if (item === mdl) {
return i;
}
}
return result;
}
Insert cell
searchFromExact = (item, array, idx) => {
let result = array.indexOf(item, idx);
if (result > -1) return result;
return array.slice(0, idx).indexOf(item);
}
Insert cell
splitAt = (str, sp) => [str.slice(0, sp), str.slice(sp)]
Insert cell
import { liConcordance, sharedCaps } from "@shadoof/commenttis-prep"
Insert cell
sharedCaps
Insert cell
liConcordance
Insert cell
mutable dictionaryStr = ""
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
limage.reduce((a, b) => (a.length > b.length ? a : b)) // info: longest in l'image
Insert cell
limageTtl = ["l’", "image"]
Insert cell
theimageTtl = ["the", "image"]
Insert cell
<h3>CSS</h3>
<style>
#display {
color: ${cfg.hostColor}; /* orange is good for debugging */
font-variant-ligatures: no-common-ligatures;
}

.fade { /* was: p#fade */
cursor: pointer;
transition: color .7s ease-in-out; /* vendorless fallback */
-o-transition: color .7s ease-in-out; /* opera */
-ms-transition: color .7s ease-in-out; /* IE 10 */
-moz-transition: color .7s ease-in-out; /* Firefox */
-webkit-transition: color .7s ease-in-out; /*safari and chrome */
}
.fade:hover {
color: white;
}
em {
/* font-family: monospace; */
font-style: normal;
/* font-weight: bold; */
color: ${cfg.guestColor}; /* darkred is good for debugging with orange */
}
</style>
Insert cell
Insert cell
config = (cfg, prop, value) => {
if (!cfg.hasOwnProperty(prop)) {
console.log("Error! undefined config property:", prop);
throw "undefined config property: " + prop;
}
if (value !== undefined) {
cfg[prop] = value;
return undefined;
}
return cfg[prop];
}
Insert cell
mod = function (n, m) {
// mod function that handles negative numbers, usage: mod(num, modulous)
return ((n % m) + m) % m;
}
Insert cell
Insert cell
RT = require("rita")
Insert cell
{
RT.tokenize("a_là"); // -> "a là"
RT.tokenize("a_la"); // -> "a la"
RT.tokenize("à_la"); // -> "à_la"
RT.tokenize("la_bas"); // -> "la bas"
RT.tokenize("lá_bas"); // -> "lá_bas"
}
Insert cell
Insert cell
Hypher = require("https://bundle.run/hypher@0.2.5") // import("https://cdn.skypack.dev/hypher@0.2.5?min")
Insert cell
english = import("https://cdn.skypack.dev/hyphenation.en-gb@0.2.1?min") // require("https://bundle.run/hyphenation.en-us@0.2.1")
Insert cell
french = import("https://cdn.skypack.dev/hyphenation.fr@0.2.1?min")
Insert cell
hyphen = new Hypher(english)
Insert cell
hyphfr = new Hypher(french)
Insert cell
hyphen.hyphenate("à la")
Insert cell
hyphfr.hyphenate("tentative")
Insert cell
Insert cell
{
var strLetters = "12";
var arrLetters = strLetters.split("");
return LoopIt(2, "", arrLetters);

function LoopIt(depth, baseString, arrLetters) {
var returnValue = "";
for (var i = 0; i < arrLetters.length; i++) {
let str = baseString + arrLetters[i];
if (depth == 1) {
returnValue += str; // baseString + arrLetters[i];
} else {
returnValue += LoopIt(
depth - 1,
str, // baseString + arrLetters[i],
arrLetters
);
}
}
return returnValue;
}
}
Insert cell
oldpermutedSplits = (str, numberOfSplits = 2) => {
if (typeof str !== "string" || str.length < numberOfSplits + 1)
return undefined;
let splits = str.split("");
splits = [...Array(splits.length).keys()].slice(1);
let result = [];

let lastp = new Array(numberOfSplits),
p;
const permute = (arr, m = []) => {
if (arr.length === 0) {
result.push(m);
} else {
for (let i = 0; i < arr.length; i++) {
let curr = arr.slice();
let next = curr.splice(i, 1);
permute(curr.slice(), m.concat(next));
}
}
};

permute(splits);

return result;
}
Insert cell
{
let htxt = RT.untokenize(howitis3).split("\n");
let ctxt = RT.untokenize(commentcest3).split("\n");
let e = document.getElementById("m");
for (let i = 220; i < htxt.length; ++i) {
await readKey();
e.innerHTML = `${i + 1}<br>${htxt[i]}<br><br>${ctxt[i]}`;
}
}
Insert cell
function readKey() {
return new Promise((resolve) => {
window.addEventListener("keypress", resolve, { once: true });
});
}
Insert cell
htmlForExport = main.innerHTML
Insert cell
markDownStringForExport = {
let str = main.innerHTML.replaceAll("<em>", "*");
str = str.replaceAll("</em>", "*");
str = str.replaceAll("<p>", "");
str = str.replaceAll(config(cfg, "paraTag"), "");
str = str.replaceAll("</p>", "\n\n");
str = str.replaceAll("&nbsp;", " ");
return str;
}
Insert cell
textForComparison = markDownStringForExport
? markDownStringForExport.replaceAll("*", "").replaceAll("\n\n", "\n")
: undefined
Insert cell
howitisSliced.includes("travelers")
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more