prototype-3x5/src/peg.ts

198 lines
6.4 KiB
TypeScript

/**
* A Pattern matches against a string starting at a given index, looking for a value with a particular format.
*/
export class Pattern<out T> {
constructor(
public match: PatternFunc<T>,
/** A human-readable annotation describing the pattern for error messages */
public expectLabel: string = match.name
) {}
/**
* Creates a pattern that wraps another pattern, transforming the returned value on a match.
*
* @param map - Mapping function
*/
public map<U>(map: (value: T, index: number) => U): Pattern<U> {
return new Pattern((source, index) => {
const [value, furthest, expected] = this.match(source, index);
return [
value ? [map(value[0], index), value[1]] : null,
furthest,
expected,
];
}, this.expectLabel);
}
/** Adds a human-readable annotation describing the pattern */
public expects(label: string): Pattern<T> {
return new Pattern(this.match, label);
}
}
type PatternFunc<out T> = {
/**
* If the pattern matches successfully, it returns some captured value, and the index following the match.
*
* It may also return an error, if that error may have prevented the pattern from matching more than it did.
*
* Some more complex patterns might succeed, but consume less input than they would have been able to if some
* other expected symbol was found. Reporting the furthest a pattern could hypothetically have gotten can help generate
* better error messages if no valid parse tree is found.
*
* @param source - the string being parsed
* @param index - the index in the string to begin matching from
* @returns - [successValue, furthest symbol attempted, expected pattern]
*/
(this: Pattern<T>, source: string, index: number): [
[T, number] | null,
number,
string
];
};
/**
* Proxies to a pattern retrieved from an accessor function.
*
* Allows using a pattern recursively in its own definition, by returning the value of the const assigned to.
*
* @param getPattern
*/
export function Use<T>(getPattern: () => Pattern<T>): Pattern<T> {
return new Pattern(
(source, index) => getPattern().match(source, index),
String(getPattern)
);
}
/**
* Creates a pattern matching a regex & returning any captures. The regex needs to be sticky (using the //y modifier)
*/
export function Regex(regex: RegExp): Pattern<RegExpExecArray> {
return new Pattern<RegExpExecArray>(function (source, index) {
regex.lastIndex = index;
const matches = regex.exec(source);
return matches
? [[matches, regex.lastIndex], -1, this.expectLabel]
: [null, index, this.expectLabel];
}, regex.source);
}
/**
* Creates a pattern that tries the given patterns, in order, until it finds one that matches at the current index.
* @param {...Peg.Pattern<T>} patterns
* @return {}
*/
export function Choose<T>(...patterns: Pattern<T>[]): Pattern<T> {
const genericExpected = patterns
.map((pattern) => pattern.expectLabel)
.join(" | ");
return new Pattern(function (source, index) {
let furthestFound = index;
let furthestExpected = this.expectLabel;
for (const pattern of patterns) {
const [value, furthest, expected] = pattern.match(source, index);
if (value) {
return [value, furthest, expected];
} else if (furthest > furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
}
}
return [null, furthestFound, furthestExpected];
}, genericExpected);
}
/**
* Creates a pattern that concatenates the given patterns, returning a tuple of their captured values.
*
* For example, if A matches "a" and captures 1, while B matches "b" and captures null,
* then `Sequence(A,B)` will match "ab" and capture [1, null]
*/
export function Sequence<T extends unknown[]>(
...patterns: { [K in keyof T]: Pattern<T[K]> }
): Pattern<T> {
const genericExpected = patterns[0]?.expectLabel ?? "(nothing)";
return new Pattern(function (source, index) {
const values: unknown[] = [];
let furthestFound = index;
let furthestExpected = genericExpected;
for (const pattern of patterns) {
const [value, furthest, expected] = pattern.match(source, index);
if (furthest > furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
} else if (furthest == furthestFound) {
furthestExpected = furthestExpected + " | " + expected;
}
if (value == null) {
return [null, furthestFound, furthestExpected];
}
values.push(value[0]);
index = value[1];
}
return [[values as T, index], furthestFound, furthestExpected];
}, genericExpected);
}
/**
* Creates a pattern that matches consecutive runs of the given pattern, returning an array of all captures.
*
* The match only succeeds if the run is at least {@link min} instances long.
*
* If the given pattern does not consume input, the matching will be terminated to prevent an eternal loop.
*
* Note that if the minimum run is zero, this pattern will always succeed, but might not consume any input.
* @param min
*/
export function AtLeast<T>(min: number, pattern: Pattern<T>): Pattern<T[]> {
return new Pattern(function (source, index) {
const values: T[] = [];
let furthestFound = index;
let furthestExpected = this.expectLabel;
do {
const [value, furthest, expected] = pattern.match(source, index);
if (furthest > furthestFound) {
furthestFound = furthest;
furthestExpected = expected;
}
if (value == null) {
break;
}
values.push(value[0]);
if (index == value[1]) {
break;
}
index = value[1];
} while (true);
if (values.length >= min) {
return [[values, index], furthestFound, furthestExpected];
} else {
return [null, furthestFound, furthestExpected];
}
}, pattern.expectLabel);
}
/**
* Creates a pattern that matches the given pattern, but consumes no input.
*/
export function Peek<T>(pattern: Pattern<T>): Pattern<T> {
return new Pattern(function (source, index) {
const [value, furthest, expected] = pattern.match(source, index);
return [value ? [value[0], index] : null, furthest, expected];
}, pattern.expectLabel);
}
/**
* Creates a pattern that matches the end of input
*/
export function End(): Pattern<true> {
return new Pattern(function End(source, index) {
return [
source.length == index ? [true, index] : null,
index,
this.expectLabel,
];
}, "<eof>");
}