Start a PEG-inspired approach to parsing

This commit is contained in:
Tangent Wantwight 2023-07-29 00:11:54 -04:00
parent 371ca2b512
commit 7b42a496c2

100
notcl.js
View file

@ -5,6 +5,94 @@
* @property {string} text
*/
/**
* A Pattern is a function that matches against a string starting at a given index.
*
* If it matches successfully, it returns some captured value, and the index following the match.
*
* @template T
* @typedef {(source: string, index: number) => ([T, number] | null)} Pattern
*/
/**
* Creates a pattern that wraps another pattern, transforming the returned value on a match
* @template T, U
* @param {Pattern<T>} pattern
* @param {(value: T)=> U} map
* @return {Pattern<U>}
*/
function MapPattern(pattern, map) {
return function (source, index) {
const match = pattern(source, index);
return match ? [map(match[0]), match[1]] : null;
};
}
/**
* Creates a pattern matching a regex & returning any captures. The regex needs to be sticky (using the //y modifier)
* @param {RegExp} regex
* @return {Pattern<RegExpExecArray>}
*/
function RegexPattern(regex) {
return function (source, index) {
regex.lastIndex = index;
const matches = regex.exec(source);
return matches ? [matches, regex.lastIndex] : null;
};
}
/**
* @template T
* @param {...Pattern<T>} patterns
* @return {Pattern<T>}
*/
function Choose(...patterns) {
return function (source, index) {
for (const pattern of patterns) {
const match = pattern(source, index);
if (match) {
return match;
}
}
return null;
};
}
/**
* @template {unknown[]} T
* @param {{[K in keyof T]: Pattern<T[K]>}} patterns
* @return {Pattern<T>}
*/
function Sequence(...patterns) {
return function (source, index) {
const values = /** @type {T} */ (/** @type {unknown} */ ([]));
for (const pattern of patterns) {
const match = pattern(source, index);
if (match == null) {
return null;
}
values.push(match[0]);
index = match[1];
}
return [values, index];
};
}
const InterCommandWhitespace = RegexPattern(/[^\S\n;]*/y);
const CommentPattern = RegexPattern(/#.*\n/y);
const PreWordWhitespace = RegexPattern(/[^\S\n;]*/y);
const BasicWord = MapPattern(RegexPattern(/[^\s;]+/y), ([word]) => ({
text: word,
}));
const WordPattern = MapPattern(
Sequence(PreWordWhitespace, BasicWord),
([_, word]) => word
);
/**
* Parse out a Notcl script into an easier-to-interpret representation.
* No script is actually executed yet.
@ -18,20 +106,18 @@ function parseNotcl(code) {
code = code.replace(/(?<!\\)((\\\\)*)\\\n/g, "$1");
/* Parse */
function nextWord(/* TODO: null/] terminator */) {
// Strip whitespace
code = code.replace(/^[^\S\n;]*/, "");
function nextWord(/* TODO: null/]/" terminator */) {
// TODO: handle all kinds of brace/substitution stuff
const word = code.match(/^[^\s;]+/);
const [word, nextIndex] = WordPattern(code, 0) ?? [null, 0];
if (word) {
code = code.substring(word[0].length);
return { text: word[0] };
code = code.substring(nextIndex);
return word;
} else {
return null;
}
}
function nextCommand(/* TODO: null/] terminator */) {
function nextCommand(/* TODO: null/]/" terminator */) {
const command = /** @type {Word[]} */ ([]);
while (true) {
// Strip whitespace