lib/parser/tokenRegex.js

const xRegExp = require("xregexp");

class TokenRegex {
  constructor(name, regexLiteral, flags) {
    this.name = name;
    this.expression = xRegExp(regexLiteral, flags);
  }

  get lastIndex() {
    return this.expression.lastIndex;
  }

  set lastIndex(index) {
    this.expression.lastIndex = index;
  }

  reset() {
    this.lastIndex = 0;

    return this;
  }

  exec(program) {
    let match = this.expression.exec(program);

    if (match !== null) {
      return {
        type: this.name,
        value: match[1],
        start: this.lastIndex - match[1].length + 1,
        end: this.lastIndex
      };
    }

    return null;
  }

  test(program) {
    let savedIndex = this.lastIndex;
    const result = this.exec(program);
    this.lastIndex = savedIndex;

    return result !== null;
  }
}

const WHITES = new TokenRegex(
  "WHITE",
  `(\\s+|       # Spaces
                                [#;].*|     # Single-line comments
                                \\/\\*[^]*?\\*\\/  # Multiline comments
                               )`,
  `yx`
);

const NEWLINE = new TokenRegex(
  "NEWLINE",
  `(\\r?        # Carriage return (for compatibility)
                                 \\n         # Newline char
                                )`
);

const NUMBER = new TokenRegex(
  "NUMBER",
  `([-+]?       # Sign
                                \\d*          # Digits
                                \\.?\\d+      # Decimals
                                (?:[eE][-+]?\\d+)? # Exp notatio
                               )`,
  "yx"
);

const STRING = new TokenRegex(
  "STRING",
  `"([^"\\\\]*  # Any character except 'escaped "'
                                )"`,
  "yx"
);

const WORD = new TokenRegex(
  "WORD",
  `(\\[\\]|     # [] is a reserved word for arrays
                              :=|         # := is a reserved word for 'define'
                              [^\\s\\(\\)\\{\\}\\[\\]\\.,:"]+   # Avoid some chars
                              )`,
  "yx"
);

const LP = new TokenRegex(
  "LP",
  `([\\(\\{\\[\\.]  # ({[ are synonyms
                           )`,
  "yx"
);

const RP = new TokenRegex(
  "RP",
  `([\\)\\}\\]]     # }}] are synonyms
                          )`,
  "yx"
);

const COMMA = new TokenRegex(
  "COMMA",
  `(,|          # comma
                               :(?!=)      # a : that isn't followed by a = (:= is word)
                              )`,
  "yx"
);

const REGEX = new TokenRegex(
  "REGEX",
  `(r/[^]*?/       # body of the regex
                               [nsxAgimuy]*   # flags
                              )`,
  "yx"
);

module.exports = {
  WHITES,
  NEWLINE,
  NUMBER,
  STRING,
  WORD,
  LP,
  RP,
  COMMA,
  REGEX,
  TokenRegex
};