From 2c55e38822fcd2fc1a2e9cacaa85b6243de315b9 Mon Sep 17 00:00:00 2001 From: Tangent Wantwight Date: Wed, 5 Jun 2024 19:16:56 -0400 Subject: [PATCH] WIP replacement Notcl parser --- src/parser2.ts | 155 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 src/parser2.ts diff --git a/src/parser2.ts b/src/parser2.ts new file mode 100644 index 0000000..75f9842 --- /dev/null +++ b/src/parser2.ts @@ -0,0 +1,155 @@ +import { + Command, + ErrorResult, + InterpolatedPiece, + Script, + SimplifyWord, + Word, +} from "./words"; + +/** + * Parse out a Notcl script into an easier-to-interpret representation. + * No script is actually executed yet. + * + * @param code code to parse + * @param offset source position of code, if embedded in a larger source document + * @returns parsed list of commands, or error message on failure + */ +export function parse( + code: string, + offset = 0 +): [true, Script] | [false, string] { + try { + const parser = new Parser(code); + const script = parser.parseScript(); + + // TODO: report error with error position + + if (parser.lastIndex != code.length) { + return [false, "Couldn't parse full script"]; + } + + return [true, script]; + } catch (ex) { + return [false, String(ex)]; + } +} + +// --------------------------- + +// Parser for evaluating Notcl scripts + +type TokenType = + | "newline" + | "whitespace" + | "semicolon" + | "{" + | "}" + | "[" + | "]" + | "quote" + | "backslash" + | "comment" + | "text" + | "EOF" + | "ERROR"; + +type Token = [TokenType, string, number]; + +const Tokens: [TokenType, RegExp][] = [ + ["newline", /(\n)/y], + ["whitespace", /([^\S\n]+)/y], + ["text", /([^\s\\;\[\]]+)/y], +]; + +class WipScript { + script: Command[] = []; + wipCommand: Word[] = []; + wipWord: InterpolatedPiece[] = []; + // TODO: thing to fail {}a & ""a + + addWordPiece(piece: InterpolatedPiece) { + this.wipWord.push(piece); + } + finishWord() { + if (this.wipWord.length > 0) { + this.wipCommand.push(SimplifyWord(this.wipWord)); + this.wipWord = []; + } + } + finishCommand() { + this.finishWord(); + if (this.wipCommand.length > 0) { + this.script.push(this.wipCommand); + this.wipCommand = []; + } + } + finishScript(): Script { + this.finishCommand(); + return this.script; + } +} + +class Parser { + lastIndex: number = 0; + next: Token; + + constructor(public text: string) { + this.next = this.advance(); + } + + advance(): Token { + const startPos = this.lastIndex; + if (startPos == this.text.length) { + return (this.next = ["EOF", "", startPos]); + } + + for (const [type, regex] of Tokens) { + regex.lastIndex = startPos; + const matches = regex.exec(this.text); + if (matches) { + this.lastIndex = regex.lastIndex; + return (this.next = [type, matches[1], startPos]); + } + } + + return (this.next = ["ERROR", "Token not matched", startPos]); + } + + parseScript(): Script { + const wip = new WipScript(); + + while (true) { + const [type, chars, pos] = this.next; + switch (type) { + case "text": + wip.addWordPiece({ bare: chars, pos }); + break; + + case "whitespace": + wip.finishWord(); + break; + + case "newline": + case "semicolon": + wip.finishCommand(); + break; + + case "EOF": + case "]": + return wip.finishScript(); + + case "{": + case "}": + case "[": + case "quote": + case "backslash": + case "comment": + case "ERROR": + throw new Error(`Unhandled case: ${type} (${chars})`); + } + + this.advance(); + } + } +}