Start a PEG-inspired approach to parsing
This commit is contained in:
parent
371ca2b512
commit
7b42a496c2
1 changed files with 93 additions and 7 deletions
100
notcl.js
100
notcl.js
|
@ -5,6 +5,94 @@
|
|||
* @property {string} text
|
||||
*/
|
||||
|
||||
/**
|
||||
* A Pattern is a function that matches against a string starting at a given index.
|
||||
*
|
||||
* If it matches successfully, it returns some captured value, and the index following the match.
|
||||
*
|
||||
* @template T
|
||||
* @typedef {(source: string, index: number) => ([T, number] | null)} Pattern
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creates a pattern that wraps another pattern, transforming the returned value on a match
|
||||
* @template T, U
|
||||
* @param {Pattern<T>} pattern
|
||||
* @param {(value: T)=> U} map
|
||||
* @return {Pattern<U>}
|
||||
*/
|
||||
function MapPattern(pattern, map) {
|
||||
return function (source, index) {
|
||||
const match = pattern(source, index);
|
||||
return match ? [map(match[0]), match[1]] : null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a pattern matching a regex & returning any captures. The regex needs to be sticky (using the //y modifier)
|
||||
* @param {RegExp} regex
|
||||
* @return {Pattern<RegExpExecArray>}
|
||||
*/
|
||||
function RegexPattern(regex) {
|
||||
return function (source, index) {
|
||||
regex.lastIndex = index;
|
||||
const matches = regex.exec(source);
|
||||
return matches ? [matches, regex.lastIndex] : null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @template T
|
||||
* @param {...Pattern<T>} patterns
|
||||
* @return {Pattern<T>}
|
||||
*/
|
||||
function Choose(...patterns) {
|
||||
return function (source, index) {
|
||||
for (const pattern of patterns) {
|
||||
const match = pattern(source, index);
|
||||
if (match) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @template {unknown[]} T
|
||||
* @param {{[K in keyof T]: Pattern<T[K]>}} patterns
|
||||
* @return {Pattern<T>}
|
||||
*/
|
||||
function Sequence(...patterns) {
|
||||
return function (source, index) {
|
||||
const values = /** @type {T} */ (/** @type {unknown} */ ([]));
|
||||
for (const pattern of patterns) {
|
||||
const match = pattern(source, index);
|
||||
if (match == null) {
|
||||
return null;
|
||||
}
|
||||
values.push(match[0]);
|
||||
index = match[1];
|
||||
}
|
||||
return [values, index];
|
||||
};
|
||||
}
|
||||
|
||||
const InterCommandWhitespace = RegexPattern(/[^\S\n;]*/y);
|
||||
|
||||
const CommentPattern = RegexPattern(/#.*\n/y);
|
||||
|
||||
const PreWordWhitespace = RegexPattern(/[^\S\n;]*/y);
|
||||
|
||||
const BasicWord = MapPattern(RegexPattern(/[^\s;]+/y), ([word]) => ({
|
||||
text: word,
|
||||
}));
|
||||
|
||||
const WordPattern = MapPattern(
|
||||
Sequence(PreWordWhitespace, BasicWord),
|
||||
([_, word]) => word
|
||||
);
|
||||
|
||||
/**
|
||||
* Parse out a Notcl script into an easier-to-interpret representation.
|
||||
* No script is actually executed yet.
|
||||
|
@ -18,20 +106,18 @@ function parseNotcl(code) {
|
|||
code = code.replace(/(?<!\\)((\\\\)*)\\\n/g, "$1");
|
||||
|
||||
/* Parse */
|
||||
function nextWord(/* TODO: null/] terminator */) {
|
||||
// Strip whitespace
|
||||
code = code.replace(/^[^\S\n;]*/, "");
|
||||
function nextWord(/* TODO: null/]/" terminator */) {
|
||||
// TODO: handle all kinds of brace/substitution stuff
|
||||
const word = code.match(/^[^\s;]+/);
|
||||
const [word, nextIndex] = WordPattern(code, 0) ?? [null, 0];
|
||||
if (word) {
|
||||
code = code.substring(word[0].length);
|
||||
return { text: word[0] };
|
||||
code = code.substring(nextIndex);
|
||||
return word;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function nextCommand(/* TODO: null/] terminator */) {
|
||||
function nextCommand(/* TODO: null/]/" terminator */) {
|
||||
const command = /** @type {Word[]} */ ([]);
|
||||
while (true) {
|
||||
// Strip whitespace
|
||||
|
|
Loading…
Reference in a new issue