WIP replacement Notcl parser
This commit is contained in:
parent
7c74c9e34f
commit
2c55e38822
1 changed files with 155 additions and 0 deletions
155
src/parser2.ts
Normal file
155
src/parser2.ts
Normal file
|
@ -0,0 +1,155 @@
|
|||
import {
|
||||
Command,
|
||||
ErrorResult,
|
||||
InterpolatedPiece,
|
||||
Script,
|
||||
SimplifyWord,
|
||||
Word,
|
||||
} from "./words";
|
||||
|
||||
/**
|
||||
* Parse out a Notcl script into an easier-to-interpret representation.
|
||||
* No script is actually executed yet.
|
||||
*
|
||||
* @param code code to parse
|
||||
* @param offset source position of code, if embedded in a larger source document
|
||||
* @returns parsed list of commands, or error message on failure
|
||||
*/
|
||||
export function parse(
|
||||
code: string,
|
||||
offset = 0
|
||||
): [true, Script] | [false, string] {
|
||||
try {
|
||||
const parser = new Parser(code);
|
||||
const script = parser.parseScript();
|
||||
|
||||
// TODO: report error with error position
|
||||
|
||||
if (parser.lastIndex != code.length) {
|
||||
return [false, "Couldn't parse full script"];
|
||||
}
|
||||
|
||||
return [true, script];
|
||||
} catch (ex) {
|
||||
return [false, String(ex)];
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------
|
||||
|
||||
// Parser for evaluating Notcl scripts
|
||||
|
||||
type TokenType =
|
||||
| "newline"
|
||||
| "whitespace"
|
||||
| "semicolon"
|
||||
| "{"
|
||||
| "}"
|
||||
| "["
|
||||
| "]"
|
||||
| "quote"
|
||||
| "backslash"
|
||||
| "comment"
|
||||
| "text"
|
||||
| "EOF"
|
||||
| "ERROR";
|
||||
|
||||
type Token = [TokenType, string, number];
|
||||
|
||||
const Tokens: [TokenType, RegExp][] = [
|
||||
["newline", /(\n)/y],
|
||||
["whitespace", /([^\S\n]+)/y],
|
||||
["text", /([^\s\\;\[\]]+)/y],
|
||||
];
|
||||
|
||||
class WipScript {
|
||||
script: Command[] = [];
|
||||
wipCommand: Word[] = [];
|
||||
wipWord: InterpolatedPiece[] = [];
|
||||
// TODO: thing to fail {}a & ""a
|
||||
|
||||
addWordPiece(piece: InterpolatedPiece) {
|
||||
this.wipWord.push(piece);
|
||||
}
|
||||
finishWord() {
|
||||
if (this.wipWord.length > 0) {
|
||||
this.wipCommand.push(SimplifyWord(this.wipWord));
|
||||
this.wipWord = [];
|
||||
}
|
||||
}
|
||||
finishCommand() {
|
||||
this.finishWord();
|
||||
if (this.wipCommand.length > 0) {
|
||||
this.script.push(this.wipCommand);
|
||||
this.wipCommand = [];
|
||||
}
|
||||
}
|
||||
finishScript(): Script {
|
||||
this.finishCommand();
|
||||
return this.script;
|
||||
}
|
||||
}
|
||||
|
||||
class Parser {
|
||||
lastIndex: number = 0;
|
||||
next: Token;
|
||||
|
||||
constructor(public text: string) {
|
||||
this.next = this.advance();
|
||||
}
|
||||
|
||||
advance(): Token {
|
||||
const startPos = this.lastIndex;
|
||||
if (startPos == this.text.length) {
|
||||
return (this.next = ["EOF", "<EOF>", startPos]);
|
||||
}
|
||||
|
||||
for (const [type, regex] of Tokens) {
|
||||
regex.lastIndex = startPos;
|
||||
const matches = regex.exec(this.text);
|
||||
if (matches) {
|
||||
this.lastIndex = regex.lastIndex;
|
||||
return (this.next = [type, matches[1], startPos]);
|
||||
}
|
||||
}
|
||||
|
||||
return (this.next = ["ERROR", "Token not matched", startPos]);
|
||||
}
|
||||
|
||||
parseScript(): Script {
|
||||
const wip = new WipScript();
|
||||
|
||||
while (true) {
|
||||
const [type, chars, pos] = this.next;
|
||||
switch (type) {
|
||||
case "text":
|
||||
wip.addWordPiece({ bare: chars, pos });
|
||||
break;
|
||||
|
||||
case "whitespace":
|
||||
wip.finishWord();
|
||||
break;
|
||||
|
||||
case "newline":
|
||||
case "semicolon":
|
||||
wip.finishCommand();
|
||||
break;
|
||||
|
||||
case "EOF":
|
||||
case "]":
|
||||
return wip.finishScript();
|
||||
|
||||
case "{":
|
||||
case "}":
|
||||
case "[":
|
||||
case "quote":
|
||||
case "backslash":
|
||||
case "comment":
|
||||
case "ERROR":
|
||||
throw new Error(`Unhandled case: ${type} (${chars})`);
|
||||
}
|
||||
|
||||
this.advance();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue