prototype-3x5/peg.js

223 lines
7.7 KiB
JavaScript

/**
* A Pattern is a function that matches against a string starting at a given index.
*
* If it matches successfully, it returns some captured value, and the index following the match.
*
* On success or failure, it returns the furthest point the pattern could make sense of, and a description of what was expected next at that point.
*
* For simple patterns, the "furthest point" may just be the following index; however, some more complex patterns might succeed,
* but consume less input than they would have been able to if some other expected symbol was found. Reporting
* the furthest a pattern could hypothetically have gotten can help generate better error messages if no valid parse tree is found.
*
* @template T
* @callback Peg.PatternCall
* @param {string} source - the string being parsed
* @param {number} index - the index in the string to begin matching from
* @returns {[[T, number] | null, number, string]} - [successValue, furthest symbol attempted, expected pattern]
*/
/**
* @template T
* @typedef {object} Peg.PatternExt
* @property {<U>(map: (value: T) => U) => Peg.Pattern<U>} map Creates a pattern that wraps another pattern, transforming the returned value on a match
* @property {string} expectLabel A human-readable annotation describing the pattern for error messages
* @property {(label: string) => Peg.Pattern<T>} expects Adds a human-readable annotation describing the pattern
*/
/**
* @template T
* @typedef {Peg.PatternCall<T> & Peg.PatternExt<T>} Peg.Pattern
*/
var Peg = window.Peg ?? {};
/**
* Makes a pattern from a function, adding helper methods.
*
* @template T
* @param {(source: string, index: number) => ([[T, number] | null, number, string])} matchFunc
* @returns {Peg.Pattern<T>}
*/
Peg.WrapPattern = function (matchFunc) {
const pattern = /** @type {Peg.Pattern<T>} */ (matchFunc);
pattern.map = function (map) {
return Peg.WrapPattern(function (source, index) {
const [value, furthest, expected] = pattern(source, index);
return [value ? [map(value[0]), value[1]] : null, furthest, expected];
}).expects(pattern.expectLabel);
};
pattern.expectLabel = pattern.name;
pattern.expects = (label) => {
pattern.expectLabel = label;
return pattern;
};
return pattern;
};
/**
* Proxies to a pattern retrieved from an accessor function.
*
* Allows using a pattern recursively in its own definition, by returning the value of the const assigned to.
*
* @template T
* @param {() => Peg.Pattern<T>} getPattern
* @returns {Peg.Pattern<T>}
*/
Peg.Use = function (getPattern) {
return Peg.WrapPattern(function (source, index) {
return getPattern()(source, index);
}).expects(String(getPattern));
};
/**
* Creates a pattern matching a regex & returning any captures. The regex needs to be sticky (using the //y modifier)
* @param {RegExp} regex
* @return {Peg.Pattern<RegExpExecArray>}
*/
Peg.Regex = function (regex) {
/** @type {Peg.Pattern<RegExpExecArray>} */
const pattern = Peg.WrapPattern(function (source, index) {
regex.lastIndex = index;
const matches = regex.exec(source);
return matches
? [[matches, regex.lastIndex], regex.lastIndex, pattern.expectLabel]
: [null, index, pattern.expectLabel];
}).expects(regex.source);
return pattern;
};
/**
* Creates a pattern that tries the given patterns, in order, until it finds one that matches at the current index.
* @template T
* @param {...Peg.Pattern<T>} patterns
* @return {Peg.Pattern<T>}
*/
Peg.Choose = function (...patterns) {
const genericExpected = patterns
.map((pattern) => pattern.expectLabel)
.join(" | ");
return Peg.WrapPattern(function (source, index) {
let furthestFound = index;
let furthestExpected = genericExpected;
for (const pattern of patterns) {
const [value, furthest, expected] = pattern(source, index);
if (value) {
return [value, furthest, expected];
} else if (furthest > furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
}
}
return [null, furthestFound, furthestExpected];
}).expects(genericExpected);
};
/**
* Creates a pattern that concatenates the given patterns, returning a tuple of their captured values.
*
* For example, if A matches "a" and captures 1, while B matches "b" and captures null,
* then `Sequence(A,B)` will match "ab" and capture [1, null]
* @template {unknown[]} T
* @param {{[K in keyof T]: Peg.Pattern<T[K]>}} patterns
* @return {Peg.Pattern<T>}
*/
Peg.Sequence = function (...patterns) {
const genericExpected = patterns[0]?.expectLabel ?? "(nothing)";
return Peg.WrapPattern(function (source, index) {
const values = /** @type {T} */ (/** @type {unknown} */ ([]));
let furthestFound = index;
let furthestExpected = genericExpected;
for (const pattern of patterns) {
const [value, furthest, expected] = pattern(source, index);
if (furthest >= furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
}
if (value == null) {
return [null, furthestFound, furthestExpected];
}
values.push(value[0]);
index = value[1];
}
return [[values, index], furthestFound, furthestExpected];
}).expects(genericExpected);
};
/**
* Creates a pattern that matches consecutive runs of the given pattern, returning an array of all captures.
*
* The match only succeeds if the run is at least {@link min} instances long.
*
* If the given pattern does not consume input, the matching will be terminated to prevent an eternal loop.
*
* Note that if the minimum run is zero, this pattern will always succeed, but might not consume any input.
* @template {unknown} T
* @param {number} min
* @param {Peg.Pattern<T>} pattern
* @return {Peg.Pattern<T[]>}
*/
Peg.AtLeast = function (min, pattern) {
return Peg.WrapPattern(function (source, index) {
const values = /** @type {T[]} */ ([]);
let furthestFound = index;
let furthestExpected = pattern.expectLabel;
do {
const [value, furthest, expected] = pattern(source, index);
if (furthest > furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
}
if (value == null) {
break;
}
values.push(value[0]);
if (index == value[1]) {
break;
}
index = value[1];
} while (true);
if (values.length >= min) {
return [[values, index], furthestFound, furthestExpected];
} else {
return [null, furthestFound, furthestExpected];
}
}).expects(pattern.expectLabel);
};
/**
* Creates a pattern that matches the end of input
* @return {Peg.Pattern<true>}
*/
Peg.End = () => {
/** @type {Peg.Pattern<true>} */
const end = Peg.WrapPattern(function End(source, index) {
return [
source.length == index ? [/** @type {true} */ (true), index] : null,
index,
end.expectLabel,
];
}).expects("<eof>");
return end;
};
/**
* Creates a pattern that never succeeds, but reports how far its wrapped pattern could match before failing.
*
* This is a hack, meant to improve error messages after an AtLeast(). Maybe this can be removed by patterns returning both success and failure
*
* Never consumes input, and fails with zero length if the pattern succeeds.
* @param {Peg.Pattern<unknown>} pattern
* @return {Peg.Pattern<never>}
*/
Peg.Hint = function (pattern) {
return /** @type {Peg.Pattern<never>} */ (
Peg.WrapPattern(function (source, index) {
const [value, furthest, expected] = pattern(source, index);
if (value) {
console.log("oops match", value, furthest, expected);
return [null, index, pattern.expectLabel];
} else {
return [value, furthest, expected];
}
})
).expects(pattern.expectLabel);
};