From 308f586dadab9da005185461918e820cad35d9ba Mon Sep 17 00:00:00 2001 From: Tangent Wantwight Date: Sun, 6 Aug 2023 23:18:13 -0400 Subject: [PATCH] Add concept of "enchanted" word An enchanted word has provenance of appearing directly in source code as-written, not the result of interpolations, quotes, or escapes. --- src/__snapshots__/notcl.test.ts.snap | 52 ++++++++++++++-------------- src/notcl.test.ts | 32 ++++++++++------- src/notcl.ts | 21 +++++++---- src/words.ts | 40 +++++++++++++++++++++ 4 files changed, 99 insertions(+), 46 deletions(-) create mode 100644 src/words.ts diff --git a/src/__snapshots__/notcl.test.ts.snap b/src/__snapshots__/notcl.test.ts.snap index d26de58..78822d7 100644 --- a/src/__snapshots__/notcl.test.ts.snap +++ b/src/__snapshots__/notcl.test.ts.snap @@ -6,7 +6,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ [ { - "text": "h1", + "enchanted": "h1", }, { "text": ""Hello,", @@ -17,13 +17,13 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "para", + "enchanted": "para", }, { "text": "[2", }, { - "text": "+", + "enchanted": "+", }, { "text": "2]", @@ -31,7 +31,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "block", + "enchanted": "block", }, { "text": " @@ -41,10 +41,10 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "block", + "enchanted": "block", }, { - "text": "-red", + "enchanted": "-red", }, { "text": ""Beware!"", @@ -52,61 +52,61 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "para", + "enchanted": "para", }, { "text": ""All", }, { - "text": "text", + "enchanted": "text", }, { - "text": "should", + "enchanted": "should", }, { - "text": "be", + "enchanted": "be", }, { - "text": "quoted,", + "enchanted": "quoted,", }, { - "text": "it's", + "enchanted": "it's", }, { - "text": "clearer", + "enchanted": "clearer", }, { - "text": "that", + "enchanted": "that", }, { - "text": "way.", + "enchanted": "way.", }, { - "text": "&", + "enchanted": "&", }, { - "text": "blockquotes", + "enchanted": "blockquotes", }, { - "text": "already", + "enchanted": "already", }, { - "text": "should", + "enchanted": "should", }, { - "text": "contain", + "enchanted": "contain", }, { - "text": "paragraphs.", + "enchanted": "paragraphs.", }, { - "text": "(maybe", + "enchanted": "(maybe", }, { - "text": "normalize", + "enchanted": "normalize", }, { - "text": "nested", + "enchanted": "nested", }, { "text": "paragraphs)"", @@ -114,7 +114,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "block", + "enchanted": "block", }, { "text": " @@ -138,7 +138,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` ], [ { - "text": "para", + "enchanted": "para", }, { "text": " diff --git a/src/notcl.test.ts b/src/notcl.test.ts index 23007dc..4c63eca 100644 --- a/src/notcl.test.ts +++ b/src/notcl.test.ts @@ -6,61 +6,67 @@ describe("Parsing Notcl", () => { expect(parse("")).toEqual([true, []])); it("can parse a one-word command", () => - expect(parse("a")).toEqual([true, [[{ text: "a" }]]])); + expect(parse("a")).toEqual([true, [[{ enchanted: "a" }]]])); it("can parse a multi-word command", () => expect(parse("a b c")).toEqual([ true, - [[{ text: "a" }, { text: "b" }, { text: "c" }]], + [[{ enchanted: "a" }, { enchanted: "b" }, { enchanted: "c" }]], ])); it("accepts newlines as command separators", () => expect(parse("a\nb")).toEqual([ true, - [[{ text: "a" }], [{ text: "b" }]], + [[{ enchanted: "a" }], [{ enchanted: "b" }]], ])); it("does not split commands on folded newlines", () => expect( parse(String.raw`a\ b`) - ).toEqual([true, [[{ text: "a" }, { text: "b" }]]])); + ).toEqual([true, [[{ enchanted: "a" }, { enchanted: "b" }]]])); it("does split words on folded newlines", () => expect( parse(String.raw`a\ b`) - ).toEqual([true, [[{ text: "a" }, { text: "b" }]]])); + ).toEqual([true, [[{ enchanted: "a" }, { enchanted: "b" }]]])); it("does split commands on newlines with escaped backslashes", () => expect( parse(String.raw`a\\ b`) - ).toEqual([true, [[{ text: "a\\\\" }], [{ text: "b" }]]])); + ).toEqual([true, [[{ text: "a\\\\" }], [{ enchanted: "b" }]]])); it("does not split commands on folded newlines with escaped backslashes", () => expect( parse(String.raw`a\\\ b`) - ).toEqual([true, [[{ text: "a\\\\" }, { text: "b" }]]])); + ).toEqual([true, [[{ text: "a\\\\" }, { enchanted: "b" }]]])); it("accepts semicolons as command separators", () => - expect(parse("a;b")).toEqual([true, [[{ text: "a" }], [{ text: "b" }]]])); + expect(parse("a;b")).toEqual([ + true, + [[{ enchanted: "a" }], [{ enchanted: "b" }]], + ])); it("tolerates, and ignores, empty commands", () => expect(parse("a;;b\n\nc")).toEqual([ true, - [[{ text: "a" }], [{ text: "b" }], [{ text: "c" }]], + [[{ enchanted: "a" }], [{ enchanted: "b" }], [{ enchanted: "c" }]], ])); test.each([[" a"], ["a "], ["a ;"], ["; a"]])( "tolerates whitespace before and after commands {%s}", - (text) => expect(parse(text)).toEqual([true, [[{ text: "a" }]]]) + (text) => expect(parse(text)).toEqual([true, [[{ enchanted: "a" }]]]) ); }); describe("Comments", () => { it("ignores comments", () => expect(parse("#comment")).toEqual([true, []])); it("does not treat # in argument position as a comment", () => - expect(parse("a #1")).toEqual([true, [[{ text: "a" }, { text: "#1" }]]])); + expect(parse("a #1")).toEqual([ + true, + [[{ enchanted: "a" }, { enchanted: "#1" }]], + ])); it("can have commands before a comment", () => - expect(parse("a ;#comment")).toEqual([true, [[{ text: "a" }]]])); + expect(parse("a ;#comment")).toEqual([true, [[{ enchanted: "a" }]]])); it("ignores the whole line after a comment", () => expect(parse("# comment ; not a command")).toEqual([true, []])); @@ -74,7 +80,7 @@ b`) expect( parse(String.raw`#a\\ b`) - ).toEqual([true, [[{ text: "b" }]]])); + ).toEqual([true, [[{ enchanted: "b" }]]])); it("continues the comment through a folded newline with escaped backslashes", () => expect( parse(String.raw`#a\\\ diff --git a/src/notcl.ts b/src/notcl.ts index dae9522..1efe060 100644 --- a/src/notcl.ts +++ b/src/notcl.ts @@ -1,10 +1,12 @@ import { escapeHtml } from "./helpers"; import { AtLeast, Choose, End, Pattern, Regex, Sequence, Use } from "./peg"; +import { + Word as WordType, + TextWord, + EnchantedWord as EnchantedWordType, +} from "./words"; -export type Word = { - text: string; -}; -export type Command = Word[]; +export type Command = WordType[]; export type Script = Command[]; const Comment = Regex(/#[^\n]*/y) @@ -13,8 +15,12 @@ const Comment = Regex(/#[^\n]*/y) const PreWordWhitespace = Regex(/[^\S\n;]+/y).expects("whitespace"); +const EnchantedWord = Regex(/[^\]\[\}\{$\\";\s]+(?=[\s;]|$)/y) + .map(([enchanted]) => ({ enchanted } as EnchantedWordType)) + .expects("ENCHANTED_WORD"); + const BasicWord = Regex(/(?!\{)[^\s;]+/y) - .map(([word]) => ({ text: word })) + .map(([text]) => ({ text } as TextWord)) .expects("BASIC_WORD"); const Brace: Pattern = Sequence( @@ -33,9 +39,10 @@ const Brace: Pattern = Sequence( Regex(/\}/y).expects("}") ).map(([_left, fragments, _right]) => fragments.join("")); -const Word = Choose( +const Word = Choose( + EnchantedWord, BasicWord, - Brace.map((text) => ({ text })) + Brace.map((text) => ({ text } as TextWord)) ); const CommandTerminator = Regex(/[\n;]/y) diff --git a/src/words.ts b/src/words.ts new file mode 100644 index 0000000..cc441da --- /dev/null +++ b/src/words.ts @@ -0,0 +1,40 @@ +/** + * A word whose value is text with provenance- this literal value appeared in the source + * code, and was not the result of any backslash, variable, or command substitutions. + * + * This provides a level of intentionality that commands can use to distinguish switches: + * + * ```tcl + * puts -stderr text ;# -stderr can be interpreted as a flag and is not part of the message to print + * puts "-stderr" text ;# -stderr is not a flag, but is part of the message to print + * puts $var text ;# The value of $var is part of the message to print, even if the value happens to be "-stderr" + * ``` + */ +export type EnchantedWord = { + enchanted: string; +}; + +/** + * A word whose value is plain text, with no special provenance. + */ +export type TextWord = { + text: string; +}; + +/** + * A word whose value is "safe" HTML using allowlisted elements/attributes/styles, + * suitable for inclusion in HTML output with no escaping. + */ +export type HtmlWord = { + html: string; +}; + +/** + * A word whose value needs to be determined by evaluating some combination of variable and command + * substitutions, and concatenating the results with any literal spans. + */ +export type InterpolatedWord = { + pieces: []; +}; + +export type Word = EnchantedWord | TextWord | HtmlWord | InterpolatedWord;