Disallow ] in toplevel bare words, not just in command substitutions

Rationale: This differs from Tcl syntax, but means it's possible to recover a word knowing just its location in the source, without needing the surrounding context.
This commit is contained in:
Tangent Wantwight 2023-11-20 16:45:13 -05:00
parent ec763f1029
commit 119933b9b6
1 changed files with 32 additions and 43 deletions

View File

@ -15,8 +15,7 @@ const BackslashEscape = Sequence(
Regex(/./y).expects("CHAR")
).map(([, [char]]) => ({ text: char }));
const BARE_WORD_CHAR = /[^\s\\;\[]+/y;
const BARE_BRACKET_WORD_CHAR = /[^\s\\;\[\]]+/y;
const BARE_WORD_CHAR = /[^\s\\;\[\]]+/y;
let BracketScript: Pattern<Script>;
const Bracket: Pattern<ScriptPiece> = Sequence(
@ -26,21 +25,19 @@ const Bracket: Pattern<ScriptPiece> = Sequence(
.expects("[")
.map(([, script]) => ({ script }));
function bareWordTmpl(charRegex: RegExp) {
return Sequence(
Regex(/(?!["{])/y),
AtLeast(
1,
Choose<InterpolatedPiece>(
BackslashEscape,
Bracket,
Regex(charRegex)
.expects("CHAR")
.map(([text]) => ({ bare: text }))
)
const BareWord = Sequence(
Regex(/(?!["{])/y),
AtLeast(
1,
Choose<InterpolatedPiece>(
BackslashEscape,
Bracket,
Regex(BARE_WORD_CHAR)
.expects("CHAR")
.map(([text]) => ({ bare: text }))
)
).map(([, pieces], pos) => SimplifyWord(pieces, pos));
}
)
).map(([, pieces], pos) => SimplifyWord(pieces, pos));
const QuotedWord = Sequence(
Regex(/"/y).expects('"'),
@ -76,33 +73,26 @@ const Brace: Pattern<string> = Sequence(
Regex(/\}/y).expects("}")
).map(([, fragments]) => fragments.join(""));
function wordTmpl(bareWordCharRegex: RegExp): Pattern<WordType> {
return Choose<WordType>(
Brace.map((text, pos) => ({ text, pos } as TextWord)),
QuotedWord,
bareWordTmpl(bareWordCharRegex)
);
}
export const WordPattern = wordTmpl(BARE_BRACKET_WORD_CHAR);
export const WordPattern = Choose<WordType>(
Brace.map((text, pos) => ({ text, pos } as TextWord)),
QuotedWord,
BareWord
);
const CommandTerminator = Regex(/[\n;]/y)
.expects("NEWLINE | ;")
.map(() => true);
function commandTmpl(bareWordCharRegex: RegExp) {
const word = wordTmpl(bareWordCharRegex);
return Sequence(
word,
AtLeast(
0,
Sequence(PreWordWhitespace, word).map(([, word]) => word)
),
AtLeast(0, PreWordWhitespace)
).map(([word, moreWords]) => [word].concat(moreWords));
}
const Command = Sequence(
WordPattern,
AtLeast(
0,
Sequence(PreWordWhitespace, WordPattern).map(([, word]) => word)
),
AtLeast(0, PreWordWhitespace)
).map(([word, moreWords]) => [word].concat(moreWords));
function scriptTmpl(bareWordCharRegex: RegExp, endPattern: Pattern<unknown>) {
function scriptTmpl(endPattern: Pattern<unknown>) {
return Sequence(
AtLeast(
0,
@ -112,18 +102,17 @@ function scriptTmpl(bareWordCharRegex: RegExp, endPattern: Pattern<unknown>) {
Sequence(Comment, Choose(CommandTerminator, Peek(endPattern))).map(
() => []
),
Sequence(
commandTmpl(bareWordCharRegex),
Choose(CommandTerminator, Peek(endPattern))
).map(([words]) => words)
Sequence(Command, Choose(CommandTerminator, Peek(endPattern))).map(
([words]) => words
)
)
),
endPattern
).map(([commands]) => commands.filter((command) => command.length > 0));
}
const Script = scriptTmpl(BARE_WORD_CHAR, End());
BracketScript = scriptTmpl(BARE_BRACKET_WORD_CHAR, Regex(/\]/y).expects("]"));
const Script = scriptTmpl(End());
BracketScript = scriptTmpl(Regex(/\]/y).expects("]"));
const ERROR_CONTEXT = /(?<=([^\n]{0,50}))([^\n]{0,50})/y;