Public
Edited
Feb 12
Paused
3 forks
Importers
12 stars
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
md`> ${clipStart(str, width, defaultWidth, ellipsis)}`
Insert cell
md`> ${clipMiddle(str, width, defaultWidth, ellipsis)}` // not recommended with ellipsis=""
Insert cell
md`> ${clipEnd(str, width, defaultWidth, ellipsis)}`
Insert cell
Insert cell
softHyphen = "\u00ad"
Insert cell
// This is a greedy algorithm for line wrapping. It would be better to use the
// Knuth–Plass line breaking algorithm (but that would be much more complex).
// https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
function lineWrap(input, maxWidth, widthof) {
const lines = [];
let lineStart,
lineEnd = 0;
for (const [wordStart, wordEnd, required] of lineBreaks(input)) {
// Record the start of a line. This isn’t the same as the previous line’s
// end because we often skip spaces between lines.
if (lineStart === undefined) lineStart = wordStart;

// If the current line is not empty, and if adding the current word would
// make the line longer than the allowed width, then break the line at the
// previous word end.
if (lineEnd > lineStart && widthof(input, lineStart, wordEnd) > maxWidth) {
lines.push(
input.slice(lineStart, lineEnd) +
(input[lineEnd - 1] === softHyphen ? "-" : "")
);
lineStart = wordStart;
}

// If this is a required break (a newline), emit the line and reset.
if (required) {
lines.push(input.slice(lineStart, wordEnd));
lineStart = undefined;
continue;
}

// Extend the current line to include the new word.
lineEnd = wordEnd;
}
return lines;
}
Insert cell
// This is a rudimentary (and U.S.-centric) algorithm for finding opportunities
// to break lines between words. A better and far more comprehensive approach
// would be to use the official Unicode Line Breaking Algorithm.
// https://unicode.org/reports/tr14/
function* lineBreaks(input) {
let i = 0,
j = 0;
const n = input.length;
while (j < n) {
let k = 1;
switch (input[j]) {
case softHyphen:
case "-": // hyphen
++j;
yield [i, j, false];
i = j;
break;
case " ":
yield [i, j, false];
while (input[++j] === " "); // skip multiple spaces
i = j;
break;
case "\r":
if (input[j + 1] === "\n") ++k; // falls through
case "\n":
yield [i, j, true];
j += k;
i = j;
break;
default:
++j;
break;
}
}
yield [i, j, true];
}
Insert cell
// Computed as round(measureText(text).width * 10) at 10px system-ui. For
// characters that are not represented in this map, we’d ideally want to use a
// weighted average of what we expect to see. But since we don’t really know
// what that is, using “e” seems reasonable.
defaultWidthMap = ({
a: 56,
b: 63,
c: 57,
d: 63,
e: 58,
f: 37,
g: 62,
h: 60,
i: 26,
j: 26,
k: 55,
l: 26,
m: 88,
n: 60,
o: 60,
p: 62,
q: 62,
r: 39,
s: 54,
t: 38,
u: 60,
v: 55,
w: 79,
x: 54,
y: 55,
z: 55,
A: 69,
B: 67,
C: 73,
D: 74,
E: 61,
F: 58,
G: 76,
H: 75,
I: 28,
J: 55,
K: 67,
L: 58,
M: 89,
N: 75,
O: 78,
P: 65,
Q: 78,
R: 67,
S: 65,
T: 65,
U: 75,
V: 69,
W: 98,
X: 69,
Y: 67,
Z: 67,
0: 64,
1: 48,
2: 62,
3: 64,
4: 66,
5: 63,
6: 65,
7: 58,
8: 65,
9: 65,
" ": 29,
"!": 32,
'"': 49,
"'": 31,
"(": 39,
")": 39,
",": 31,
"-": 48,
".": 31,
"/": 32,
":": 31,
";": 31,
"?": 52,
"‘": 31,
"’": 31,
"“": 47,
"”": 47,
"…": 82
})
Insert cell
// This is a rudimentary (and U.S.-centric) algorithm for measuring the width of
// a string based on a technique of Gregor Aisch; it assumes that individual
// characters are laid out independently and does not implement the Unicode
// grapheme cluster breaking algorithm. It does understand code points, though,
// and so treats things like emoji as having the width of a lowercase e (and
// should be equivalent to using for-of to iterate over code points, while also
// being fast). TODO Optimize this by noting that we often re-measure characters
// that were previously measured?
// http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
// https://exploringjs.com/impatient-js/ch_strings.html#atoms-of-text
function defaultWidth(text, start = 0, end = text.length) {
let sum = 0;
for (let i = start; i < end; i = readCharacter(text, i)) {
sum +=
defaultWidthMap[text[i]] ??
(isPictographic(text, i) ? 120 : defaultWidthMap.e);
}
return sum;
}
Insert cell
// Even for monospaced text, we can’t assume that the number of UTF-16 code
// points (i.e., the length of a string) corresponds to the number of visible
// characters; we still have to count graphemes. And note that pictographic
// characters such as emojis are typically not monospaced!
function monospaceWidth(text, start = 0, end = text.length) {
let sum = 0;
for (let i = start; i < end; i = readCharacter(text, i)) {
sum += isPictographic(text, i) ? 200 : 100;
}
return sum;
}
Insert cell
function splitter({ monospace, lineWidth, textOverflow }) {
if (textOverflow != null || lineWidth == Infinity)
return (text) => text.split(/\r\n?|\n/g);
const widthof = monospace ? monospaceWidth : defaultWidth;
const maxWidth = lineWidth * 100;
return (text) => lineWrap(text, maxWidth, widthof);
}
Insert cell
function clipper({ monospace, lineWidth, textOverflow }) {
if (textOverflow == null || lineWidth == Infinity) return (text) => text;
const widthof = monospace ? monospaceWidth : defaultWidth;
const maxWidth = lineWidth * 100;
switch (textOverflow) {
case "clip-start":
return (text) => clipStart(text, maxWidth, widthof, "");
case "clip-end":
return (text) => clipEnd(text, maxWidth, widthof, "");
case "ellipsis-start":
return (text) => clipStart(text, maxWidth, widthof, "…");
case "ellipsis-middle":
return (text) => clipMiddle(text, maxWidth, widthof, "…");
case "ellipsis-end":
return (text) => clipEnd(text, maxWidth, widthof, "…");
}
}
Insert cell
// Cuts the given text to the given width, using the specified widthof function;
// the returned [index, error] guarantees text.slice(0, index) fits within the
// specified width with the given error. If the text fits naturally within the
// given width, returns [-1, 0]. If the text needs cutting, the given inset
// specifies how much space (in the same units as width and widthof) to reserve
// for a possible ellipsis character.
function cut(text, width, widthof, inset) {
const I = []; // indexes of read character boundaries
let w = 0; // current line width
for (let i = 0, j = 0, n = text.length; i < n; i = j) {
j = readCharacter(text, i); // read the next character
const l = widthof(text, i, j); // current character width
if (w + l > width) {
w += inset;
while (w > width && i > 0)
(j = i), (i = I.pop()), (w -= widthof(text, i, j)); // remove excess
return [i, width - w];
}
w += l;
I.push(i);
}
return [-1, 0];
}
Insert cell
function clipEnd(text, width, widthof, ellipsis) {
text = text.trim(); // ignore leading and trailing whitespace
const e = widthof(ellipsis);
const [i] = cut(text, width, widthof, e);
return i < 0 ? text : text.slice(0, i).trimEnd() + ellipsis;
}
Insert cell
function clipMiddle(text, width, widthof, ellipsis) {
text = text.trim(); // ignore leading and trailing whitespace
const w = widthof(text);
if (w <= width) return text;
const e = widthof(ellipsis) / 2;
const [i, ei] = cut(text, width / 2, widthof, e);
const [j] = cut(text, w - width / 2 - ei + e, widthof, -e); // TODO read spaces?
return j < 0
? ellipsis
: text.slice(0, i).trimEnd() +
ellipsis +
text.slice(readCharacter(text, j)).trimStart();
}
Insert cell
function clipStart(text, width, widthof, ellipsis) {
text = text.trim(); // ignore leading and trailing whitespace
const w = widthof(text);
if (w <= width) return text;
const e = widthof(ellipsis);
const [j] = cut(text, w - width + e, widthof, -e); // TODO read spaces?
return j < 0
? ellipsis
: ellipsis + text.slice(readCharacter(text, j)).trimStart();
}
Insert cell
reCombiner = /[\p{Combining_Mark}\p{Emoji_Modifier}]+/uy
Insert cell
rePictographic = /\p{Extended_Pictographic}/uy
Insert cell
// Reads a single “character” element from the given text starting at the given
// index, returning the index after the read character. Ideally, this implements
// the Unicode text segmentation algorithm and understands grapheme cluster
// boundaries, etc., but in practice this is only smart enough to detect UTF-16
// surrogate pairs, combining marks, and zero-width joiner (zwj) sequences such
// as emoji skin color modifiers. https://unicode.org/reports/tr29/
function readCharacter(text, i) {
i += isSurrogatePair(text, i) ? 2 : 1;
if (isCombiner(text, i)) i = reCombiner.lastIndex;
if (isZeroWidthJoiner(text, i)) return readCharacter(text, i + 1);
return i;
}
Insert cell
// We avoid more expensive regex tests involving Unicode property classes by
// first checking for the common case of 7-bit ASCII characters.
function isAscii(text, i) {
return text.charCodeAt(i) < 0x80;
}
Insert cell
function isSurrogatePair(text, i) {
const hi = text.charCodeAt(i);
if (hi >= 0xd800 && hi < 0xdc00) {
const lo = text.charCodeAt(i + 1);
return lo >= 0xdc00 && lo < 0xe000;
}
return false;
}
Insert cell
function isZeroWidthJoiner(text, i) {
return text.charCodeAt(i) === 0x200d;
}
Insert cell
function isCombiner(text, i) {
return isAscii(text, i)
? false
: ((reCombiner.lastIndex = i), reCombiner.test(text));
}
Insert cell
function isPictographic(text, i) {
return isAscii(text, i) ? false : ((rePictographic.lastIndex = i), rePictographic.test(text));
}
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more