Add concept of "enchanted" word

An enchanted word has provenance of appearing directly in source code as-written, not the result of interpolations, quotes, or escapes.
This commit is contained in:
Tangent Wantwight 2023-08-06 23:18:13 -04:00
parent 37df46381b
commit 308f586dad
4 changed files with 99 additions and 46 deletions

View file

@ -6,7 +6,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
[
[
{
"text": "h1",
"enchanted": "h1",
},
{
"text": ""Hello,",
@ -17,13 +17,13 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "para",
"enchanted": "para",
},
{
"text": "[2",
},
{
"text": "+",
"enchanted": "+",
},
{
"text": "2]",
@ -31,7 +31,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "block",
"enchanted": "block",
},
{
"text": "
@ -41,10 +41,10 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "block",
"enchanted": "block",
},
{
"text": "-red",
"enchanted": "-red",
},
{
"text": ""Beware!"",
@ -52,61 +52,61 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "para",
"enchanted": "para",
},
{
"text": ""All",
},
{
"text": "text",
"enchanted": "text",
},
{
"text": "should",
"enchanted": "should",
},
{
"text": "be",
"enchanted": "be",
},
{
"text": "quoted,",
"enchanted": "quoted,",
},
{
"text": "it's",
"enchanted": "it's",
},
{
"text": "clearer",
"enchanted": "clearer",
},
{
"text": "that",
"enchanted": "that",
},
{
"text": "way.",
"enchanted": "way.",
},
{
"text": "&",
"enchanted": "&",
},
{
"text": "blockquotes",
"enchanted": "blockquotes",
},
{
"text": "already",
"enchanted": "already",
},
{
"text": "should",
"enchanted": "should",
},
{
"text": "contain",
"enchanted": "contain",
},
{
"text": "paragraphs.",
"enchanted": "paragraphs.",
},
{
"text": "(maybe",
"enchanted": "(maybe",
},
{
"text": "normalize",
"enchanted": "normalize",
},
{
"text": "nested",
"enchanted": "nested",
},
{
"text": "paragraphs)"",
@ -114,7 +114,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "block",
"enchanted": "block",
},
{
"text": "
@ -138,7 +138,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = `
],
[
{
"text": "para",
"enchanted": "para",
},
{
"text": "

View file

@ -6,61 +6,67 @@ describe("Parsing Notcl", () => {
expect(parse("")).toEqual([true, []]));
it("can parse a one-word command", () =>
expect(parse("a")).toEqual([true, [[{ text: "a" }]]]));
expect(parse("a")).toEqual([true, [[{ enchanted: "a" }]]]));
it("can parse a multi-word command", () =>
expect(parse("a b c")).toEqual([
true,
[[{ text: "a" }, { text: "b" }, { text: "c" }]],
[[{ enchanted: "a" }, { enchanted: "b" }, { enchanted: "c" }]],
]));
it("accepts newlines as command separators", () =>
expect(parse("a\nb")).toEqual([
true,
[[{ text: "a" }], [{ text: "b" }]],
[[{ enchanted: "a" }], [{ enchanted: "b" }]],
]));
it("does not split commands on folded newlines", () =>
expect(
parse(String.raw`a\
b`)
).toEqual([true, [[{ text: "a" }, { text: "b" }]]]));
).toEqual([true, [[{ enchanted: "a" }, { enchanted: "b" }]]]));
it("does split words on folded newlines", () =>
expect(
parse(String.raw`a\
b`)
).toEqual([true, [[{ text: "a" }, { text: "b" }]]]));
).toEqual([true, [[{ enchanted: "a" }, { enchanted: "b" }]]]));
it("does split commands on newlines with escaped backslashes", () =>
expect(
parse(String.raw`a\\
b`)
).toEqual([true, [[{ text: "a\\\\" }], [{ text: "b" }]]]));
).toEqual([true, [[{ text: "a\\\\" }], [{ enchanted: "b" }]]]));
it("does not split commands on folded newlines with escaped backslashes", () =>
expect(
parse(String.raw`a\\\
b`)
).toEqual([true, [[{ text: "a\\\\" }, { text: "b" }]]]));
).toEqual([true, [[{ text: "a\\\\" }, { enchanted: "b" }]]]));
it("accepts semicolons as command separators", () =>
expect(parse("a;b")).toEqual([true, [[{ text: "a" }], [{ text: "b" }]]]));
expect(parse("a;b")).toEqual([
true,
[[{ enchanted: "a" }], [{ enchanted: "b" }]],
]));
it("tolerates, and ignores, empty commands", () =>
expect(parse("a;;b\n\nc")).toEqual([
true,
[[{ text: "a" }], [{ text: "b" }], [{ text: "c" }]],
[[{ enchanted: "a" }], [{ enchanted: "b" }], [{ enchanted: "c" }]],
]));
test.each([[" a"], ["a "], ["a ;"], ["; a"]])(
"tolerates whitespace before and after commands {%s}",
(text) => expect(parse(text)).toEqual([true, [[{ text: "a" }]]])
(text) => expect(parse(text)).toEqual([true, [[{ enchanted: "a" }]]])
);
});
describe("Comments", () => {
it("ignores comments", () => expect(parse("#comment")).toEqual([true, []]));
it("does not treat # in argument position as a comment", () =>
expect(parse("a #1")).toEqual([true, [[{ text: "a" }, { text: "#1" }]]]));
expect(parse("a #1")).toEqual([
true,
[[{ enchanted: "a" }, { enchanted: "#1" }]],
]));
it("can have commands before a comment", () =>
expect(parse("a ;#comment")).toEqual([true, [[{ text: "a" }]]]));
expect(parse("a ;#comment")).toEqual([true, [[{ enchanted: "a" }]]]));
it("ignores the whole line after a comment", () =>
expect(parse("# comment ; not a command")).toEqual([true, []]));
@ -74,7 +80,7 @@ b`)
expect(
parse(String.raw`#a\\
b`)
).toEqual([true, [[{ text: "b" }]]]));
).toEqual([true, [[{ enchanted: "b" }]]]));
it("continues the comment through a folded newline with escaped backslashes", () =>
expect(
parse(String.raw`#a\\\

View file

@ -1,10 +1,12 @@
import { escapeHtml } from "./helpers";
import { AtLeast, Choose, End, Pattern, Regex, Sequence, Use } from "./peg";
import {
Word as WordType,
TextWord,
EnchantedWord as EnchantedWordType,
} from "./words";
export type Word = {
text: string;
};
export type Command = Word[];
export type Command = WordType[];
export type Script = Command[];
const Comment = Regex(/#[^\n]*/y)
@ -13,8 +15,12 @@ const Comment = Regex(/#[^\n]*/y)
const PreWordWhitespace = Regex(/[^\S\n;]+/y).expects("whitespace");
const EnchantedWord = Regex(/[^\]\[\}\{$\\";\s]+(?=[\s;]|$)/y)
.map(([enchanted]) => ({ enchanted } as EnchantedWordType))
.expects("ENCHANTED_WORD");
const BasicWord = Regex(/(?!\{)[^\s;]+/y)
.map(([word]) => ({ text: word }))
.map(([text]) => ({ text } as TextWord))
.expects("BASIC_WORD");
const Brace: Pattern<string> = Sequence(
@ -33,9 +39,10 @@ const Brace: Pattern<string> = Sequence(
Regex(/\}/y).expects("}")
).map(([_left, fragments, _right]) => fragments.join(""));
const Word = Choose(
const Word = Choose<WordType>(
EnchantedWord,
BasicWord,
Brace.map((text) => ({ text }))
Brace.map((text) => ({ text } as TextWord))
);
const CommandTerminator = Regex(/[\n;]/y)

40
src/words.ts Normal file
View file

@ -0,0 +1,40 @@
/**
* A word whose value is text with provenance- this literal value appeared in the source
* code, and was not the result of any backslash, variable, or command substitutions.
*
* This provides a level of intentionality that commands can use to distinguish switches:
*
* ```tcl
* puts -stderr text ;# -stderr can be interpreted as a flag and is not part of the message to print
* puts "-stderr" text ;# -stderr is not a flag, but is part of the message to print
* puts $var text ;# The value of $var is part of the message to print, even if the value happens to be "-stderr"
* ```
*/
export type EnchantedWord = {
enchanted: string;
};
/**
* A word whose value is plain text, with no special provenance.
*/
export type TextWord = {
text: string;
};
/**
* A word whose value is "safe" HTML using allowlisted elements/attributes/styles,
* suitable for inclusion in HTML output with no escaping.
*/
export type HtmlWord = {
html: string;
};
/**
* A word whose value needs to be determined by evaluating some combination of variable and command
* substitutions, and concatenating the results with any literal spans.
*/
export type InterpolatedWord = {
pieces: [];
};
export type Word = EnchantedWord | TextWord | HtmlWord | InterpolatedWord;