From 2c55e38822fcd2fc1a2e9cacaa85b6243de315b9 Mon Sep 17 00:00:00 2001
From: Tangent Wantwight <tangent128@gmail.com>
Date: Wed, 5 Jun 2024 19:16:56 -0400
Subject: [PATCH] WIP replacement Notcl parser

---
 src/parser2.ts | 155 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 src/parser2.ts
diff --git a/src/parser2.ts b/src/parser2.ts
new file mode 100644
index 0000000..75f9842
--- /dev/null
+++ b/src/parser2.ts
@@ -0,0 +1,155 @@
+import {
+  Command,
+  ErrorResult,
+  InterpolatedPiece,
+  Script,
+  SimplifyWord,
+  Word,
+} from "./words";
+
+/**
+ * Parse out a Notcl script into an easier-to-interpret representation.
+ * No script is actually executed yet.
+ *
+ * @param code code to parse
+ * @param offset source position of code, if embedded in a larger source document
+ * @returns parsed list of commands, or error message on failure
+ */
+export function parse(
+  code: string,
+  offset = 0
+): [true, Script] | [false, string] {
+  try {
+    const parser = new Parser(code);
+    const script = parser.parseScript();
+
+    // TODO: report error with error position
+
+    if (parser.lastIndex != code.length) {
+      return [false, "Couldn't parse full script"];
+    }
+
+    return [true, script];
+  } catch (ex) {
+    return [false, String(ex)];
+  }
+}
+
+// ---------------------------
+
+// Parser for evaluating Notcl scripts
+
+type TokenType =
+  | "newline"
+  | "whitespace"
+  | "semicolon"
+  | "{"
+  | "}"
+  | "["
+  | "]"
+  | "quote"
+  | "backslash"
+  | "comment"
+  | "text"
+  | "EOF"
+  | "ERROR";
+
+type Token = [TokenType, string, number];
+
+const Tokens: [TokenType, RegExp][] = [
+  ["newline", /(\n)/y],
+  ["whitespace", /([^\S\n]+)/y],
+  ["text", /([^\s\\;\[\]]+)/y],
+];
+
+class WipScript {
+  script: Command[] = [];
+  wipCommand: Word[] = [];
+  wipWord: InterpolatedPiece[] = [];
+  // TODO: thing to fail {}a & ""a
+
+  addWordPiece(piece: InterpolatedPiece) {
+    this.wipWord.push(piece);
+  }
+  finishWord() {
+    if (this.wipWord.length > 0) {
+      this.wipCommand.push(SimplifyWord(this.wipWord));
+      this.wipWord = [];
+    }
+  }
+  finishCommand() {
+    this.finishWord();
+    if (this.wipCommand.length > 0) {
+      this.script.push(this.wipCommand);
+      this.wipCommand = [];
+    }
+  }
+  finishScript(): Script {
+    this.finishCommand();
+    return this.script;
+  }
+}
+
+class Parser {
+  lastIndex: number = 0;
+  next: Token;
+
+  constructor(public text: string) {
+    this.next = this.advance();
+  }
+
+  advance(): Token {
+    const startPos = this.lastIndex;
+    if (startPos == this.text.length) {
+      return (this.next = ["EOF", "<EOF>", startPos]);
+    }
+
+    for (const [type, regex] of Tokens) {
+      regex.lastIndex = startPos;
+      const matches = regex.exec(this.text);
+      if (matches) {
+        this.lastIndex = regex.lastIndex;
+        return (this.next = [type, matches[1], startPos]);
+      }
+    }
+
+    return (this.next = ["ERROR", "Token not matched", startPos]);
+  }
+
+  parseScript(): Script {
+    const wip = new WipScript();
+
+    while (true) {
+      const [type, chars, pos] = this.next;
+      switch (type) {
+        case "text":
+          wip.addWordPiece({ bare: chars, pos });
+          break;
+
+        case "whitespace":
+          wip.finishWord();
+          break;
+
+        case "newline":
+        case "semicolon":
+          wip.finishCommand();
+          break;
+
+        case "EOF":
+        case "]":
+          return wip.finishScript();
+
+        case "{":
+        case "}":
+        case "[":
+        case "quote":
+        case "backslash":
+        case "comment":
+        case "ERROR":
+          throw new Error(`Unhandled case: ${type} (${chars})`);
+      }
+
+      this.advance();
+    }
+  }
+}