From d57f409a134dfa855767c8ce1e43f98870f4ef1c Mon Sep 17 00:00:00 2001 From: Tangent Wantwight Date: Sat, 18 Nov 2023 18:37:58 -0500 Subject: [PATCH] Add source-position information to bare words --- src/__snapshots__/parser.test.ts.snap | 11 +++ src/parser.test.ts | 126 +++++++++++++++++++------- src/parser.ts | 2 +- src/peg.ts | 8 +- 4 files changed, 113 insertions(+), 34 deletions(-) diff --git a/src/__snapshots__/parser.test.ts.snap b/src/__snapshots__/parser.test.ts.snap index 173dd18..dc0145e 100644 --- a/src/__snapshots__/parser.test.ts.snap +++ b/src/__snapshots__/parser.test.ts.snap @@ -7,6 +7,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "h1", + "pos": 5, }, { "text": "Hello, World!", @@ -15,6 +16,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "para", + "pos": 28, }, { "pieces": [ @@ -23,12 +25,15 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "2", + "pos": 34, }, { "bare": "+", + "pos": 36, }, { "bare": "2", + "pos": 38, }, ], ], @@ -39,6 +44,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "block", + "pos": 45, }, { "text": " @@ -49,9 +55,11 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "block", + "pos": 213, }, { "bare": "-red", + "pos": 219, }, { "text": "Beware!", @@ -60,6 +68,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "para", + "pos": 238, }, { "text": "All text should be quoted, it's clearer that way. & blockquotes already should contain paragraphs. (maybe normalize nested paragraphs)", @@ -68,6 +77,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "block", + "pos": 384, }, { "text": " @@ -92,6 +102,7 @@ exports[`Parsing Notcl Misc Big mess of markup 1`] = ` [ { "bare": "para", + "pos": 651, }, { "text": " diff --git a/src/parser.test.ts b/src/parser.test.ts index 41fc865..0119664 100644 --- a/src/parser.test.ts +++ b/src/parser.test.ts @@ -8,56 +8,97 @@ describe("Parsing Notcl", () => { it("can parse a multi-word command", () => expect(parse("a b c")).toEqual([ true, - [[{ bare: "a" }, { bare: "b" }, { bare: "c" }]], + [ + [ + { bare: "a", pos: 0 }, + { bare: "b", pos: 2 }, + { bare: "c", pos: 5 }, + ], + ], ])); it("accepts newlines as command separators", () => expect(parse("a\nb")).toEqual([ true, - [[{ bare: "a" }], [{ bare: "b" }]], + [[{ bare: "a", pos: 0 }], [{ bare: "b", pos: 2 }]], ])); it("does not split commands on folded newlines", () => expect( parse(String.raw`a\ b`) - ).toEqual([true, [[{ bare: "a" }, { bare: "b" }]]])); + ).toEqual([ + true, + [ + [ + { bare: "a", pos: 0 }, + { bare: "b", pos: 2 }, + ], + ], + ])); it("does split words on folded newlines", () => expect( parse(String.raw`a\ b`) - ).toEqual([true, [[{ bare: "a" }, { bare: "b" }]]])); + ).toEqual([ + true, + [ + [ + { bare: "a", pos: 0 }, + { bare: "b", pos: 2 }, + ], + ], + ])); it("does split commands on newlines with escaped backslashes", () => expect( parse(String.raw`a\\ - b`) - ).toEqual([true, [[{ text: "a\\" }], [{ bare: "b" }]]])); +b`) + ).toEqual([true, [[{ text: "a\\" }], [{ bare: "b", pos: 4 }]]])); it("does not split commands on folded newlines with escaped backslashes", () => expect( parse(String.raw`a\\\ b`) - ).toEqual([true, [[{ text: "a\\" }, { bare: "b" }]]])); + ).toEqual([true, [[{ text: "a\\" }, { bare: "b", pos: 4 }]]])); it("accepts semicolons as command separators", () => - expect(parse("a;b")).toEqual([true, [[{ bare: "a" }], [{ bare: "b" }]]])); + expect(parse("a;b")).toEqual([ + true, + [[{ bare: "a", pos: 0 }], [{ bare: "b", pos: 2 }]], + ])); it("tolerates, and ignores, empty commands", () => expect(parse("a;;b\n\nc")).toEqual([ true, - [[{ bare: "a" }], [{ bare: "b" }], [{ bare: "c" }]], + [ + [{ bare: "a", pos: 0 }], + [{ bare: "b", pos: 3 }], + [{ bare: "c", pos: 6 }], + ], ])); - test.each([[" a"], ["a "], ["a ;"], ["; a"]])( - "tolerates whitespace before and after commands {%s}", - (text) => expect(parse(text)).toEqual([true, [[{ bare: "a" }]]]) + test.each([ + [" a", 1], + ["a ", 0], + ["a ;", 0], + ["; a", 2], + ])("tolerates whitespace before and after commands {%s}", (text, pos) => + expect(parse(text)).toEqual([true, [[{ bare: "a", pos }]]]) ); }); describe("Comments", () => { it("ignores comments", () => expect(parse("#comment")).toEqual([true, []])); it("does not treat # in argument position as a comment", () => - expect(parse("a #1")).toEqual([true, [[{ bare: "a" }, { bare: "#1" }]]])); + expect(parse("a #1")).toEqual([ + true, + [ + [ + { bare: "a", pos: 0 }, + { bare: "#1", pos: 2 }, + ], + ], + ])); it("can have commands before a comment", () => - expect(parse("a ;#comment")).toEqual([true, [[{ bare: "a" }]]])); + expect(parse("a ;#comment")).toEqual([true, [[{ bare: "a", pos: 0 }]]])); it("ignores the whole line after a comment", () => expect(parse("# comment ; not a command")).toEqual([true, []])); @@ -70,8 +111,8 @@ b`) it("does not continue the comment through a newline with escaped backslashes", () => expect( parse(String.raw`#a\\ - b`) - ).toEqual([true, [[{ bare: "b" }]]])); +b`) + ).toEqual([true, [[{ bare: "b", pos: 5 }]]])); it("continues the comment through a folded newline with escaped backslashes", () => expect( parse(String.raw`#a\\\ @@ -81,9 +122,12 @@ b`) describe("interpolated words", () => { it("can parse a simple word", () => - expect(parse("a")).toEqual([true, [[{ bare: "a" }]]])); + expect(parse("a")).toEqual([true, [[{ bare: "a", pos: 0 }]]])); it("can parse a word with non-special punctuation", () => - expect(parse("-switch")).toEqual([true, [[{ bare: "-switch" }]]])); + expect(parse("-switch")).toEqual([ + true, + [[{ bare: "-switch", pos: 0 }]], + ])); it("accepts empty quotes", () => expect(parse('""')).toEqual([true, [[{ text: "" }]]])); @@ -104,9 +148,9 @@ b`) expect(parse("a\\ b")).toEqual([true, [[{ text: "a b" }]]])); it("treats a non-leading quote as a plain character", () => - expect(parse('a"')).toEqual([true, [[{ bare: 'a"' }]]])); + expect(parse('a"')).toEqual([true, [[{ bare: 'a"', pos: 0 }]]])); it("treats a non-leading brace as a plain character", () => - expect(parse("a{")).toEqual([true, [[{ bare: "a{" }]]])); + expect(parse("a{")).toEqual([true, [[{ bare: "a{", pos: 0 }]]])); it("treats an escaped quote as a plain character", () => expect(parse('\\"')).toEqual([true, [[{ text: '"' }]]])); it("treats an escaped brace as a plain character", () => @@ -127,7 +171,7 @@ b`) it("can parse one-word command interpolations", () => expect(parse("[a]")).toEqual([ true, - [[{ pieces: [{ script: [[{ bare: "a" }]] }] }]], + [[{ pieces: [{ script: [[{ bare: "a", pos: 1 }]] }] }]], ])); it("can parse multi-word command interpolations", () => expect(parse("[a b c]")).toEqual([ @@ -137,7 +181,13 @@ b`) { pieces: [ { - script: [[{ bare: "a" }, { bare: "b" }, { bare: "c" }]], + script: [ + [ + { bare: "a", pos: 1 }, + { bare: "b", pos: 3 }, + { bare: "c", pos: 5 }, + ], + ], }, ], }, @@ -152,7 +202,13 @@ b`) { pieces: [ { - script: [[{ bare: "a" }], [{ bare: "b" }, { bare: "c" }]], + script: [ + [{ bare: "a", pos: 1 }], + [ + { bare: "b", pos: 4 }, + { bare: "c", pos: 6 }, + ], + ], }, ], }, @@ -167,7 +223,9 @@ b`) { pieces: [ { - script: [[{ pieces: [{ script: [[{ bare: "a" }]] }] }]], + script: [ + [{ pieces: [{ script: [[{ bare: "a", pos: 2 }]] }] }], + ], }, ], }, @@ -181,7 +239,10 @@ b`) [ [ { - pieces: [{ script: [[{ bare: "a" }]] }, { bare: "b" }], + pieces: [ + { script: [[{ bare: "a", pos: 1 }]] }, + { bare: "b", pos: 3 }, + ], }, ], ], @@ -193,9 +254,9 @@ b`) [ { pieces: [ - { bare: "a" }, - { script: [[{ bare: "b" }]] }, - { bare: "c" }, + { bare: "a", pos: 0 }, + { script: [[{ bare: "b", pos: 2 }]] }, + { bare: "c", pos: 4 }, ], }, ], @@ -207,7 +268,10 @@ b`) [ [ { - pieces: [{ bare: "a" }, { script: [[{ bare: "b" }]] }], + pieces: [ + { bare: "a", pos: 0 }, + { script: [[{ bare: "b", pos: 2 }]] }, + ], }, ], ], @@ -219,8 +283,8 @@ b`) [ { pieces: [ - { script: [[{ bare: "a" }]] }, - { script: [[{ bare: "b" }]] }, + { script: [[{ bare: "a", pos: 1 }]] }, + { script: [[{ bare: "b", pos: 4 }]] }, ], }, ], diff --git a/src/parser.ts b/src/parser.ts index 67c1631..2c01a68 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -36,7 +36,7 @@ function bareWordTmpl(charRegex: RegExp) { Bracket, Regex(charRegex) .expects("CHAR") - .map(([text]) => ({ bare: text })) + .map(([text], index) => ({ bare: text, pos: index })) ) ) ).map(([, pieces]) => SimplifyWord(pieces)); diff --git a/src/peg.ts b/src/peg.ts index 0bc87f5..53b002a 100644 --- a/src/peg.ts +++ b/src/peg.ts @@ -13,10 +13,14 @@ export class Pattern { * * @param map - Mapping function */ - public map(map: (value: T) => U): Pattern { + public map(map: (value: T, index: number) => U): Pattern { return new Pattern((source, index) => { const [value, furthest, expected] = this.match(source, index); - return [value ? [map(value[0]), value[1]] : null, furthest, expected]; + return [ + value ? [map(value[0], index), value[1]] : null, + furthest, + expected, + ]; }, this.expectLabel); }