From 67ef250460e1b1567c2b2be1071f8bfedbf16f14 Mon Sep 17 00:00:00 2001 From: lieser Date: Wed, 1 Jan 2025 22:39:17 +0100 Subject: [PATCH] Support comments inside comments up to a recursion of 3 (#466) --- CHANGELOG.md | 1 + modules/rfcParser.mjs.js | 11 ++++++++--- test/unittest/arhParserSpec.mjs.js | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 961776d0..eaf0d157 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. ### Fixes - Fixed potential parsing error when extracting the received time from the last Received header (#455). +- When parsing now support comments inside comments up to a recursion of 3 (#466). ### Other diff --git a/modules/rfcParser.mjs.js b/modules/rfcParser.mjs.js index 580c05ae..e8699edb 100644 --- a/modules/rfcParser.mjs.js +++ b/modules/rfcParser.mjs.js @@ -1,7 +1,7 @@ /** * RegExp pattern for ABNF definitions in various RFCs. * - * Copyright (c) 2020-2023 Philippe Lieser + * Copyright (c) 2020-2025 Philippe Lieser * * This software is licensed under the terms of the MIT License. * @@ -43,8 +43,13 @@ export default class RfcParser { static get FWS_op() { return `${this.FWS}?`; } // Note: this is incomplete (obs-ctext is missing) static get ctext() { return "[!-'*-[\\]-~]"; } - // Note: this is incomplete (comment is missing) - static get ccontent() { return `(?:${this.ctext}|${this.quoted_pair})`; } + // Note: There is a recursion in ccontent/comment, which is not supported by the RegExp in JavaScript. + // We currently unroll it to support a depth of up to 3 comments. + static get ccontent_2() { return `(?:${this.ctext}|${this.quoted_pair})`; } + static get comment_2() { return `\\((?:${this.FWS_op}${this.ccontent_2})*${this.FWS_op}\\)`; } + static get ccontent_1() { return `(?:${this.ctext}|${this.quoted_pair}|${this.comment_2})`; } + static get comment_1() { return `\\((?:${this.FWS_op}${this.ccontent_1})*${this.FWS_op}\\)`; } + static get ccontent() { return `(?:${this.ctext}|${this.quoted_pair}|${this.comment_1})`; } static get comment() { return `\\((?:${this.FWS_op}${this.ccontent})*${this.FWS_op}\\)`; } static get CFWS() { return `(?:(?:(?:${this.FWS_op}${this.comment})+${this.FWS_op})|${this.FWS})`; } // Note: helper only, not part of the RFC diff --git a/test/unittest/arhParserSpec.mjs.js b/test/unittest/arhParserSpec.mjs.js index f1d0003f..29aa0fa0 100644 --- a/test/unittest/arhParserSpec.mjs.js +++ b/test/unittest/arhParserSpec.mjs.js @@ -1,5 +1,5 @@ /** - * Copyright (c) 2020-2023 Philippe Lieser + * Copyright (c) 2020-2023;2025 Philippe Lieser * * This software is licensed under the terms of the MIT License. * @@ -167,6 +167,15 @@ describe("ARH Parser [unittest]", function () { expect(res.resinfo[0]?.method).to.be.equal("unknown"); expect(res.resinfo[0]?.result).to.be.equal("foo"); }); + it("Comments in comments", function () { + const res = ArhParser.parse( + "Authentication-Results: example.com;\r\n" + + " dkim=pass (good (comment (another)) signature(s)) header.d=example.com\r\n"); + expect(res.authserv_id).to.be.equal("example.com"); + expect(res.resinfo.length).to.be.equal(1); + expect(res.resinfo[0]?.method).to.be.equal("dkim"); + expect(res.resinfo[0]?.result).to.be.equal("pass"); + }); }); describe("Relaxed parsing", function () { it("Trailing ;", function () { @@ -235,6 +244,16 @@ describe("ARH Parser [unittest]", function () { "Authentication-Results: example.com; dmarc=foo\r\n" )).to.throw(); }); + it("Comments with mismatching number of brackets", function () { + expect(() => ArhParser.parse( + "Authentication-Results: example.com;\r\n" + + " dkim=pass (good (comment (another))) signature(s)) header.d=example.com\r\n" + )).to.throw(); + expect(() => ArhParser.parse( + "Authentication-Results: example.com;\r\n" + + " dkim=pass ((good (comment (another)) signature(s)) header.d=example.com\r\n" + )).to.throw(); + }); }); describe("DKIM results", function () { it("AUID with local part", function () {