WIP replacement Notcl parser

This commit is contained in:
Tangent Wantwight 2024-06-05 19:16:56 -04:00
parent 7c74c9e34f
commit 2c55e38822

155
src/parser2.ts Normal file
View file

@ -0,0 +1,155 @@
import {
Command,
ErrorResult,
InterpolatedPiece,
Script,
SimplifyWord,
Word,
} from "./words";
/**
* Parse out a Notcl script into an easier-to-interpret representation.
* No script is actually executed yet.
*
* @param code code to parse
* @param offset source position of code, if embedded in a larger source document
* @returns parsed list of commands, or error message on failure
*/
export function parse(
code: string,
offset = 0
): [true, Script] | [false, string] {
try {
const parser = new Parser(code);
const script = parser.parseScript();
// TODO: report error with error position
if (parser.lastIndex != code.length) {
return [false, "Couldn't parse full script"];
}
return [true, script];
} catch (ex) {
return [false, String(ex)];
}
}
// ---------------------------
// Parser for evaluating Notcl scripts
type TokenType =
| "newline"
| "whitespace"
| "semicolon"
| "{"
| "}"
| "["
| "]"
| "quote"
| "backslash"
| "comment"
| "text"
| "EOF"
| "ERROR";
type Token = [TokenType, string, number];
const Tokens: [TokenType, RegExp][] = [
["newline", /(\n)/y],
["whitespace", /([^\S\n]+)/y],
["text", /([^\s\\;\[\]]+)/y],
];
class WipScript {
script: Command[] = [];
wipCommand: Word[] = [];
wipWord: InterpolatedPiece[] = [];
// TODO: thing to fail {}a & ""a
addWordPiece(piece: InterpolatedPiece) {
this.wipWord.push(piece);
}
finishWord() {
if (this.wipWord.length > 0) {
this.wipCommand.push(SimplifyWord(this.wipWord));
this.wipWord = [];
}
}
finishCommand() {
this.finishWord();
if (this.wipCommand.length > 0) {
this.script.push(this.wipCommand);
this.wipCommand = [];
}
}
finishScript(): Script {
this.finishCommand();
return this.script;
}
}
class Parser {
lastIndex: number = 0;
next: Token;
constructor(public text: string) {
this.next = this.advance();
}
advance(): Token {
const startPos = this.lastIndex;
if (startPos == this.text.length) {
return (this.next = ["EOF", "<EOF>", startPos]);
}
for (const [type, regex] of Tokens) {
regex.lastIndex = startPos;
const matches = regex.exec(this.text);
if (matches) {
this.lastIndex = regex.lastIndex;
return (this.next = [type, matches[1], startPos]);
}
}
return (this.next = ["ERROR", "Token not matched", startPos]);
}
parseScript(): Script {
const wip = new WipScript();
while (true) {
const [type, chars, pos] = this.next;
switch (type) {
case "text":
wip.addWordPiece({ bare: chars, pos });
break;
case "whitespace":
wip.finishWord();
break;
case "newline":
case "semicolon":
wip.finishCommand();
break;
case "EOF":
case "]":
return wip.finishScript();
case "{":
case "}":
case "[":
case "quote":
case "backslash":
case "comment":
case "ERROR":
throw new Error(`Unhandled case: ${type} (${chars})`);
}
this.advance();
}
}
}