2

GitHub - bitair-org/parser.js: Language recognition tool for JavaScript RTEs

 1 year ago
source link: https://github.com/bitair-org/parser.js
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

Intro

Parser.js is a text parser for JavaScript RTEs. Parser.js can be used for data extraction and creating transpilers and interpreters.

Technical facts

  • Implements a top-down recursive parser with both lookback and lookahead functionalities.
  • Uses the JavaScript's RegExp object for tokenization.
  • Provides a fluent API for grammar creation.
  • Is pure JavaScript (library/framework free) and can be used in all JavaScript RTEs.
  • Bundled as an ES5 ESM library.
  • Written in TypeScript with the strictest ESNext config.
  • Uses no recursive function (uses the stack to implement recursion).
  • Returns a JSON object instead of a parse-tree.

Installation

npm i @bitair/parser.js

Usage

Import

import { r, Combinator, Quantifier, Assertion, EventType, Parser } from '@bitair/parser.js'

Sample calculator

import { r, Parser, Combinator } from '@bitair/parser.js'
import { equal } from 'node:assert'

const NUMBER = /[0-9]+(\.[0-9]+)?/
const soloOp = r(/sin|cos/).as('op')
const pairOp = r(/\+|-|\/|\*|\^/).as('op')
const grammar = r(
  [r(NUMBER).as('a'), pairOp, r(NUMBER).as('b')],
  [soloOp, r(NUMBER).as('a')]
)
  .combine(Combinator.OrderedChoice)
  .as('equation')

const parser = new Parser(grammar, {
  skip: /\s*/
})

let equation = parser.parse('2+2')
equal(interpret(equation), 4)
equation = parser.parse('2-2')
equal(interpret(equation), 0)
equation = parser.parse('2/2')
equal(interpret(equation), 1)
equation = parser.parse('2*2')
equal(interpret(equation), 4)
equation = parser.parse('2^2')
equal(interpret(equation), 4)
equation = parser.parse('sin 0')
equal(interpret(equation), 0)
equation = parser.parse('cos 0')
equal(interpret(equation), 1)

console.log('Done!')

function interpret({ a, b, op }) {
  a = parseFloat(a)
  b = parseFloat(b)
  switch (op) {
    case '+':
      return a + b
    case '-':
      return a - b
    case '*':
      return a * b
    case '/':
      return a / b
    case '^':
      return a ** b
    case 'sin':
      return Math.sin(a)
    case 'cos':
      return Math.cos(a)
  }

  return NaN
}

r(...subrules: RegExp | Callback | Rule | Array<RegExp | Callback | Rule>) : RuleFactory

Creates a rule from single or multiple subrules. Returns a RuleFactory instance for configuring the rule.
  • Example:

    const input = 'Name:Apple\nColor:Yellow'
    const expected = [
      { key: 'name', value: 'APPLE' },
      { key: 'color', value: 'YELLOW' }
    ]
    
    const END_OF_LINE = /\n|$/
    const COLON = /:/
    const key = r(/[a-zA-Z]+/, /[a-zA-Z0-9]*/).as('key')
    const value = r(/.+/).as('value')
    const grammar = r(key, COLON, value, END_OF_LINE).quantify('+').as('list')
    
    const listener = (event: ParserEvent) => {
      if (event.type === ParserEventType.RuleSucceeded) {
        // Transforming the parsed data
        const rule = event.data
        if (rule.name === 'list') {
          return rule.data.map(({ key, value }: { key: string; value: string }) => ({
            key: key.toLowerCase(),
            value: value.toUpperCase()
          }))
        }
      }
    }
    
    const parser = new Parser(grammar, { listener })
    const actual = parser.parse(input)
    assert.deepEqual(actual, expected)
  • ...subrules: Callback : A callback can be used for creating recursive rules and manual scanners.

    type Callback = (...args: any[]) => any
    • Example: Recursive rule

      const input = '[[[[1]]]]'
      const expected = [[[[1]]]]
      
      const array = r([/\[/, (): Rule => array, /\]/], NUMBER)
        .combine(Combinator.OrderedChoice)
        .as('array')
      
      const listener = (event: ParserEvent) => {
        if (event.type === ParserEventType.RuleSucceeded) {
          const { name, data } = event.data
          if (name === 'array') return data.array ? [data.array] : parseInt(data)
        }
      }
      
      const parser = new Parser(array, { listener })
      const actual = parser.parse(input)
      
      assert.equal(actual, expected)
    • Example: Manual scanner

      const input = "'It\\'s Ok!'"
      const expected = "It's Ok!"
      
      const scanner = function (this: IParser): string | undefined {
        let text = ''
        const parser = this
        while (!parser.lexer.isEof) {
          const char = parser.lexer.scan(/[\w\W\s\S]/)
          if (char === "'") {
            // If the quotation mark has been escaped then remove the \ char
            if (text[text.length - 1] === '\\') text = text.slice(0, -1) + "'"
            // Otherwise the quotation mark is an enclosing mark and should be dropped
            continue
          }
          text += char
        }
        return text
      }
      
      const grammar = r(() => scanner).as('text')
      const parser = new Parser(grammar)
      const actual = parser.parse(input)
      
      assert.equal(actual, expected)

RuleFactory

Provides a method chaining instance for configuring a rule.
  • .combine(Combinator: Combinator) : RuleFactory Combines the subrules. When is not configuring, the subrules will be sequentially combined.

    enum Combinator {
      Sequence,
      OrderedChoice
    }
  • .quantify(quantifier: Quantifier) : RuleFactory

    enum Quantifier {
      Optional, // ?
      ZeroOrMore, // *
      OneOrMany // +
    }
  • .capture(value: false): RuleFactory

    Is used for dropping the parsed data from the final output.
    
    • Example:

      const input = '#This is a comment!\nprint("Hello World!")#This is another comment\n'
      const expected = [{ statement: 'print("Hello World!")' }]
      
      const comment = r(/#[^\n]+(\n|$)/)
        .capture(false)
        .as('comment')
      const statement = r(/[^#\n]+(\n|$|)/).as('statement')
      const grammar = r(comment, statement)
        .quantify(Quantifier.ZeroOrMore)
        .combine(Combinator.OrderedChoice)
        .as('program')
      
      const parser = new Parser(grammar)
      const actual = parser.parse(input)
      assert.equal(actual, expected)
  • .skip(pattern: RegExp): RuleFactory

    Skips tokens between subrules. (E.g.: skipping whitespace tokens)
    
  • .assert(assertion: Assertion) : Rule

    Asserts a rule without consuming the input. This function should be called as the last chain function.
    
    enum Assertion {
      Lookahead,
      NegativeLookahead
    }
  • .as(name: string) : Rule

    Configures and returns the rule. This function should be called as the last chain function.
    
    • name: string: name of the rule.

class Parser implements IParser

  • constructor (grammar: Rule, settings?: ParserSettings) : IParser

    Creates a reusable parser for parsing multiple inputs.
    
    • grammar: Rule: The top-level rule.

    • setting?: ParserSettings: Global parser settings

      type ParserSettings = Partial<{
        skip: RegExp
        listener: ParserListener
      }>
      • skip: RegExp: The default skipping pattern that would be applied to the rules that haven't specified the skip pattern.
      • listener: ParserListener: Is used for extending the parser process. (E.g: transforming the parsed data.)
      type ParserListener = (this: IParser, event: ParserEvent) => unknown
      type ParserEvent = RuleSucceededEvent | RuleFailedEvent
      type RuleSucceededEvent = {
        type: 0
        data: { name?: string; data: any }
      }
      type RuleFailedEvent = {
        type: 1
        data: { name?: string }
      }
export interface IParser {
  lexer: ILexer
}

export interface ILexer {
  isEof: boolean
  jump: (length: number) => void
  scan(pattern: RegExp): string | undefined
  lookahead(pattern: RegExp): string | undefined
  lookback(pattern: RegExp): string | undefined
}

License

GNU Lesser General Public License v3.0 or later


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK