prototype-3x5/src/parser.ts

162 lines
4 KiB
TypeScript

import { escapeHtml } from './helpers';
import { AtLeast, Choose, End, Pattern, Peek, Regex, Sequence, Use } from './peg';
import {
InterpolatedPiece, Script, ScriptPiece, SimplifyWord, TextWord, Word as WordType
} from './words';
const Comment = Regex(/#([^\\\n]|\\[^])*/y)
.expects("#")
.map(() => []);
const PreWordWhitespace = Regex(/([^\S\n;]|\\\n)+/y).expects("whitespace");
const BackslashEscape = Sequence(
Regex(/\\/y).expects("BACKSLASH"),
Regex(/./y).expects("CHAR")
).map(([, [char]]) => ({ text: char }));
const BARE_WORD_CHAR = /[^\s\\;\[\]]+/y;
let BracketScript: Pattern<Script>;
const Bracket: Pattern<ScriptPiece> = Sequence(
Regex(/\[/y).expects("["),
Use(() => BracketScript)
)
.expects("[")
.map(([, script]) => ({ script }));
const BareWord = Sequence(
Regex(/(?!["{])/y),
AtLeast(
1,
Choose<InterpolatedPiece>(
BackslashEscape,
Bracket,
Regex(BARE_WORD_CHAR)
.expects("CHAR")
.map(([text]) => ({ bare: text }))
)
)
).map(([, pieces], pos) => SimplifyWord(pieces, pos));
const QuotedWord = Sequence(
Regex(/"/y).expects('"'),
AtLeast(
0,
Choose<InterpolatedPiece>(
BackslashEscape,
Bracket,
Regex(/[^"\\\[]+/y)
.expects("CHAR")
.map(([text]) => ({ text }))
)
),
Regex(/"/y).expects('"')
).map(([, pieces], pos) => SimplifyWord(pieces, pos));
export const TemplateBlock = Sequence(
AtLeast(
0,
Choose<InterpolatedPiece>(
Regex(/\\\n\s*/y)
.map(() => ({ text: " " }))
.expects("BACKSLASH"),
BackslashEscape,
Bracket,
Regex(/[^\\\[]+/y)
.expects("CHAR")
.map(([text]) => ({ text }))
)
),
End()
).map(([pieces], pos) => SimplifyWord(pieces, pos));
const Brace: Pattern<string> = Sequence(
Regex(/\{/y).expects("{"),
AtLeast(
0,
Choose(
Use(() => Brace)
.expects("{")
.map((text) => `{${text}}`),
Regex(/\\[^]/y)
.expects("BACKSLASH")
.map(([escape]) => escape),
Regex(/[^\\{}]+/y)
.expects("CHAR")
.map(([text]) => text)
)
),
Regex(/\}/y).expects("}")
).map(([, fragments]) => fragments.join(""));
export const WordPattern = Choose<WordType>(
Brace.map((text, pos) => ({ text, pos } as TextWord)),
QuotedWord,
BareWord
);
const CommandTerminator = Regex(/[\n;]/y)
.expects("NEWLINE | ;")
.map(() => true);
const Command = Sequence(
WordPattern,
AtLeast(
0,
Sequence(PreWordWhitespace, WordPattern).map(([, word]) => word)
),
AtLeast(0, PreWordWhitespace)
).map(([word, moreWords]) => [word].concat(moreWords));
function scriptTmpl(endPattern: Pattern<unknown>) {
return Sequence(
AtLeast(
0,
Choose(
PreWordWhitespace.map(() => []),
CommandTerminator.map(() => []),
Sequence(Comment, Choose(CommandTerminator, Peek(endPattern))).map(
() => []
),
Sequence(Command, Choose(CommandTerminator, Peek(endPattern))).map(
([words]) => words
)
)
),
endPattern
).map(([commands]) => commands.filter((command) => command.length > 0));
}
const Script = scriptTmpl(End());
BracketScript = scriptTmpl(Regex(/\]/y).expects("]"));
const ERROR_CONTEXT = /(?<=([^\n]{0,50}))([^\n]{0,50})/y;
/**
* Parse out a Notcl script into an easier-to-interpret representation.
* No script is actually executed yet.
*
* @param code code to parse
* @param offset source position of code, if embedded in a larger source document
* @returns parsed list of commands, or error message on failure
*/
export function parse(code: string, offset = 0): [true, Script] | [false, string] {
/* Parse */
const [commands, errorPos, expected] = Script.match(code, 0);
if (commands) {
return [true, commands[0]];
} else {
ERROR_CONTEXT.lastIndex = errorPos;
const [, before, after] = ERROR_CONTEXT.exec(code)!;
return [
false,
`<pre>Error at position ${errorPos}
${escapeHtml(before + "" + after)}
${"-".repeat(before.length)}^
Expected: ${escapeHtml(expected)}</pre>`,
];
}
}