Published
Edited
Feb 13, 2020
3 stars
Insert cell
Insert cell
class Parser {

// ====================================================================================
// == PARSING =======================================================================
// ====================================================================================

/**
* Parses and returns a complete expression, given the binding power
* of the 'above' binary operator.
*/
expr(rbp = 0) {
// Parse the left part of a binary expression (e.g. an literal integer or
// a parenthesized expression).
let left = this.nud()

// We just parsed an expression; the token stream should have advanced to either
// the end of the input or to a binary operator.
let token = this.currentToken()

// This is where the magic happens: if the binding power of the token is
// sufficiently high, we consider that the token 'wants' to parse the
// binary expression more than the above expression (with binding power
// 'rbp') so we proceed to the binary parsing right here.
while (left !== null && token !== null && rbp < token.bp) {
left = this.led(left)
token = this.currentToken()
}

// At this point we've either reached the end of the input, or an operator
// with a low binding power.
// Let's say token == '+' in the expression '2 * 3 + 1': we would currently
// be in the recursive call parsing the binary expression with '*' (rbp == 20)
// and we would have token.bp == 10.
// The while loop is therefore exited and left == '2 * 3' is returned to 'led'.
// 'led' returns as well, and the above call to expr with rbp == 0 continues.
// This time token.bp > rbp, so '+ 1' is parsed and the entire expression
// '(2 * 3) + 1' is returned.
return left
}

/**
* Parses and returns a primary expression (or nud, for "null denotation").
*/
nud() {
const token = this.advanceToken()

if (token === null)
return null

// A dispatch table could also be used here, which would look like this:
//
// for (const parser of unaryParsers) {
// if (parser.accepts(token))
// return parser.parse(this)
// }
// return null
//
// Or even:
//
// return unaryParsers[token.kind]?.parse(this) ?? null
//
if (token.kind === NUMBER)
return Expression.number(token.value)
if (token.kind === POS || token.kind === NEG)
return Expression.unary(token.kind, this.nud())

if (token.kind === LPAREN) {
const inner = this.expr(),
current = this.advanceToken()

if (current === null || current.kind !== RPAREN)
return null
else
return inner
}

return null
}

/**
* Parses and returns a binary expression (or led, for "left denotation"),
* given its left operand.
*/
led(left) {
// 'led' is only called when the current token wraps a binary
// operator (e.g. '+', '*'). We retrieve that token and advance
// the stream to the next one.
const token = this.advanceToken()

if ([ADD, SUB, MUL, DIV, POW].includes(token.kind))
return Expression.binary(token.kind, left, this.expr(token.abp))

if (token.kind === QUE) {
// A question mark introduces a ternary expression (i.e. 'cond ? ifTrue : ifFalse').
//
// First we parse the 'ifTrue' expression with a low binding power so
// that '? 2 * 3 : 4 / 5' only returns to us at ':' with the expression '2 * 3'.
const right = this.expr(token.abp)

// We might have reached the end of the input or a right parenthesis.
// That's an error.
if (this.advanceToken().kind !== COL)
return null

// We parse the rest of the expression into 'ifFalse' and return the ternary node.
return Expression.ternary(left, right, this.expr(token.abp))
}

return null
}


// ====================================================================================
// == CONSTRUCTOR ===================================================================
// ====================================================================================

constructor(tokens) {
this.tokens = tokens

// Two ways of processing tokens are shown:
// - The 'array' way, which uses a pre-computed array of tokens
// and keeps track of the current position using an integer.
// - The 'lazy' way, which uses a generator and can ask for
// tokens to be computed on-demand. It is slightly less intuitive
// to use, but here a lazy collection can be very useful.
if (Array.isArray(tokens)) {
this.position = 0
} else {
this.curr = tokens.next()
this.next = tokens.next()
}

console.clear()

if (trace) {
// Some tracing, open your console to find out how expressions
// are parsed behind the scenes!
traceParser(this)
}
}


// ====================================================================================
// == TOKENS HANDLING ===============================================================
// ====================================================================================

/**
* Returns the current token and advances to the next token.
*/
advanceToken() {
// Needlessly complicated logic to handle both iterators and arrays as input.
if (Array.isArray(this.tokens)) {
return this.position >= this.tokens.length ? null : this.tokens[this.position++]
} else {
const curr = this.curr

this.curr = this.next
this.next = this.tokens.next()

return curr.done ? null : curr.value
}
}

/**
* Returns the current token without advancing.
*/
currentToken() {
// Ditto.
if (Array.isArray(this.tokens)) {
return this.position >= this.tokens.length ? null : this.tokens[this.position]
} else {
return this.curr.done ? null : this.curr.value
}
}
}
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
[...Token.tokenize("(1 + 2) * 3 / 7-2")]
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more