Published
Edited
Jul 19, 2022
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
//#region Terminal parsers
mdParser = {
//#region Terminal parsers
const fallbackTextParser = () =>
P.regex(/[^\s]+/).map((val) => ({ type: "text", value: val }));
const newlineParser = () => P.regex(/\n/).result({ type: "newline" });
const nonbreakingSpaceParser = () =>
// According to https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes
// the \s character class includes the following codepoints: [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
// We want to treat newlines and nonbreaking spaces specially. Out of the list above, the codepoints u+00a0 and u+202f look like
// they should be treated as non-breaking whitespace
P.regex(/[\u00a0\ufeff]+/).map((val) => ({ type: "text", value: val }));
// Also based on that MDN article, we don't want to consume newlines when we're looking for spaces and tabs
// " \n" should turn into [{ type: "whitespace" }, { type: "newline" }]
const nonNewlineWhitespaceParser = () =>
P.regex(
/[\r\t\f\v \u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/
).result({ type: "whitespace" });
const plainUrlParser = () =>
P.regex(urlRegex).map((result) => ({
type: "plainUrl",
href: result
}));
// https://urlregex.com
const urlRegex = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w\-]*))?)/;
const nonBracketWordParser = () =>
P.regex(/[^\[\]\s]+/).map((val) => ({ type: "text", value: val })); // no brackets, no WS
const nonParensWordParser = () =>
P.regex(/[^\(\)\s]+/).map((val) => ({ type: "text", value: val })); // no parens, no WS
const nonDoubleColonOrParensWordParser = () =>
P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({
type: "text",
value: val
})); // no parens, no WS, no ::
const nonSingleUnderscoreWordParser = () =>
P.regex(/[^_\s]+/).map((val) => ({ type: "text", value: val })); // no WS, no _
const nonDoubleStarWordParser = () =>
P.regex(/([^*\s]|\*(?!\*))+/).map((val) => ({ type: "text", value: val })); // no WS, no **
const nonStylingCharactersParser = () =>
P.regex(/[^\s*_]+/).map((value) => ({ type: "text", value })); // Consume up to * or _
const dodCategoryParser = () =>
P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({
type: "text",
value: val
})); // no WS, no parens, no ::
const dodTermParser = () =>
P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({
type: "text",
value: val
})); // no WS, no parens, no ::
//#endregion
//#region Higher level parsers
const markdownLinkContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.plainBold,
r.plainItalic,
r.nonBracketWord
);
const markdownLinkParser = (r) =>
P.seqObj(
P.string("["),
[
"children",
r.markdownLinkContent /* as P.Parser<MarkdownLinkContent> */
.atLeast(1)
],
P.string("]("),
["href", P.regex(urlRegex)],
P.string(")")
).map(({ children, href }) => ({
type: "markdownLink",
children,
href
}));
const detailOnDemandContentParser = (r) =>
P.alt(
// In TS 4.7 parsimmon could type the parser as Covariant on its type parameter which would remove the need for these casts
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.plainBold,
r.plainItalic,
r.nonBracketWord
);
const detailOnDemandParser = (r) =>
P.seqObj(
P.string("["),
["children", r.detailOnDemandContent.atLeast(1)],
P.string("](hover::"),
["category", r.dodCategory],
P.string("::"),
["term", r.dodTerm],
P.string(")")
).map(({ children, category, term }) => ({
type: "detailOnDemand",
category: category.value,
term: term.value,
children
}));
const boldWithoutItalicContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.detailOnDemand,
r.markdownLink,
r.plainUrl,
r.nonDoubleStarWord
);
const boldWithoutItalicParser = (r) =>
P.seqObj(
P.string("**"),
["children", r.boldWithoutItalicContent.atLeast(1)],
P.string("**")
).map(({ children }) => ({
type: "boldWithoutItalic",
children
}));
const boldContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.italicWithoutBold,
r.detailOnDemand,
r.markdownLink,
r.plainUrl,
r.nonStylingCharacters
);
const boldParser = (r) =>
P.seqObj(
P.string("**"),
["children", r.boldContent.atLeast(1)],
P.string("**")
).map(({ children }) => ({
type: "bold",
children
}));
const plainBoldContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.nonDoubleStarWord
);
const plainBoldParser = (r) =>
P.seqObj(
P.string("**"),
["children", r.plainBoldContent.atLeast(1)],
P.string("**")
).map(({ children }) => ({
type: "plainBold",
children
}));
const italicWithoutBoldContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.newline,
r.detailOnDemand,
r.markdownLink,
r.plainUrl,
r.nonStylingCharacters
);
const italicWithoutBoldParser = (r) =>
P.seqObj(
P.string("_"),
["children", r.italicWithoutBoldContent.atLeast(1)],
P.string("_")
).map(({ children }) => ({
type: "italicWithoutBold",
children
}));
const italicContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.boldWithoutItalic,
r.detailOnDemand,
r.markdownLink,
r.plainUrl,
r.nonStylingCharacters
);
const italicParser = (r) =>
P.seqObj(
P.string("_"),
["children", r.italicContent.atLeast(1)],
P.string("_")
).map(({ children }) => ({
type: "italic",
children
}));
const plainItalicContentParser = (r) =>
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.nonSingleUnderscoreWord
);
const plainItalicParser = (r) =>
P.seqObj(
P.string("_"),
["children", r.plainItalicContent.atLeast(1)],
P.string("_")
).map(({ children }) => ({
type: "plainItalic",
children
}));
//#endregion
//#region Top level language construction
const markdownParser = (r) =>
// The order is crucial here!
P.alt(
r.newline,
r.nonbreakingSpace,
r.nonNewlineWhitespace,
r.detailOnDemand,
r.markdownLink,
r.plainUrl,
r.bold,
r.italic,
// Consume up to ** or _, if possible
r.nonStylingCharacters,
// Otherwise consume everything
r.fallbackText
)
.atLeast(1)
.map((tokens) => ({
type: "MarkdownRoot",
children: tokens
}));
const languageParts = {
markdown: markdownParser,
newline: newlineParser,
nonbreakingSpace: nonbreakingSpaceParser,
nonNewlineWhitespace: nonNewlineWhitespaceParser,
detailOnDemand: detailOnDemandParser,
markdownLink: markdownLinkParser,
plainUrl: plainUrlParser,
bold: boldParser,
italic: italicParser,
plainBold: plainBoldParser,
plainItalic: plainItalicParser,
fallbackText: fallbackTextParser,
// Utility parsers below - these will never be tried on the top level because text covers everything else
detailOnDemandContent: detailOnDemandContentParser,
markdownLinkContent: markdownLinkContentParser,
boldContent: boldContentParser,
plainBoldContent: plainBoldContentParser,
boldWithoutItalic: boldWithoutItalicParser,
boldWithoutItalicContent: boldWithoutItalicContentParser,
plainItalicContent: plainItalicContentParser,
italicContent: italicContentParser,
italicWithoutBold: italicWithoutBoldParser,
italicWithoutBoldContent: italicWithoutBoldContentParser,
nonBracketWord: nonBracketWordParser,
nonParensWord: nonParensWordParser,
nonDoubleColonOrParensWord: nonDoubleColonOrParensWordParser,
nonDoubleStarWord: nonDoubleStarWordParser,
nonStylingCharacters: nonStylingCharactersParser,
nonSingleUnderscoreWord: nonSingleUnderscoreWordParser,
dodCategory: dodCategoryParser,
dodTerm: dodTermParser
};
return P.createLanguage(languageParts);
//#endregion
//# sourceMappingURL=parser.js.map
}
Insert cell
text = (value) => ({ type: "text", value })
Insert cell
group = (children) => ({ type: "group", children })
Insert cell
Insert cell
P = Parsimmon.default
Insert cell
Parsimmon = import("https://cdn.skypack.dev/parsimmon@1.18.1")
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more