From f34522cb1cd312812d8f36bc2917907b931629a8 Mon Sep 17 00:00:00 2001 From: Dmutre <104783173+Dmutre@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:17:50 +0100 Subject: [PATCH 1/3] feat: a reference is declared, but not used in the document - added validation for unused references for txt and xml format - modified validator - added tool function for extracting from xml - added new validation function --- lib/helpers/utils.mjs | 30 +++++++ lib/index.mjs | 3 + lib/modules/sections.mjs | 66 +++++++++++++++ lib/parsers/txt.mjs | 39 ++++++++- tests/sections.test.js | 170 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 lib/helpers/utils.mjs diff --git a/lib/helpers/utils.mjs b/lib/helpers/utils.mjs new file mode 100644 index 0000000..4fbada9 --- /dev/null +++ b/lib/helpers/utils.mjs @@ -0,0 +1,30 @@ +/** + * Recursively extract all values of a specific tag and attribute from an XML document. + * + * This function traverses the XML document to find all elements matching the specified + * tag name and extracts the values of the specified attribute. + * + * @param {Object} node The root node of the XML document to traverse. + * @param {string} tagName The tag name to search for. + * @param {string} attributeName The attribute to extract values from. + * @param {Array} [extractedValues=[]] An array to accumulate found attribute values. + * @returns {Array} An array of attribute values from matching tags. + */ +export function extractRecursiveByTagAndAttribute (node, tagName, attributeName, extractedValues = []) { + if (node[tagName]) { + const tags = Array.isArray(node[tagName]) ? node[tagName] : [node[tagName]] + tags.forEach(tag => { + if (tag._attr && tag._attr[attributeName]) { + extractedValues.push(tag._attr[attributeName]) + } + }) + } + + Object.keys(node).forEach(key => { + if (typeof node[key] === 'object') { + extractRecursiveByTagAndAttribute(node[key], tagName, attributeName, extractedValues) + } + }) + + return extractedValues +} diff --git a/lib/index.mjs b/lib/index.mjs index 9bf450b..4bcb739 100644 --- a/lib/index.mjs +++ b/lib/index.mjs @@ -24,6 +24,7 @@ import { validateAuthorSection, validateIANAConsiderationsSection, validateIntroductionSection, + validateReferencesInText, validateReferencesSection, validateSecurityConsiderationsSection } from './modules/sections.mjs' @@ -134,6 +135,8 @@ export async function checkNits (raw, filename, { result.push(...await validateCategory(doc, { mode })) progressReport('Validating Version...') result.push(...await validateVersion(doc, { mode, offline })) + progressReport('Validating references in text...') + result.push(...await validateReferencesInText(doc, { mode })) // Run XML-only validations if (doc.type === 'xml') { diff --git a/lib/modules/sections.mjs b/lib/modules/sections.mjs index 583b400..ebe6587 100644 --- a/lib/modules/sections.mjs +++ b/lib/modules/sections.mjs @@ -3,6 +3,7 @@ import { MODES } from '../config/modes.mjs' import { XML_SCHEMA } from '../config/schema.mjs' import { find, get, has, isPlainObject } from 'lodash-es' import { findDescendantWith } from '../helpers/traversal.mjs' +import { extractRecursiveByTagAndAttribute } from '../helpers/utils.mjs' /** * Validate a document abstract section @@ -694,3 +695,68 @@ export async function validateIANAConsiderationsSection (doc, { mode = MODES.NOR return result } + +/** + * Validate that all references declared in the References section are used in the text. + * + * This function checks whether all references listed in the References section are actually + * mentioned in the document's text. If a reference is listed but not used, it generates + * a warning regardless of the validation mode. + * + * @param {Object} doc Document to validate + * @param {Object} [opts] Additional options + * @param {number} [opts.mode=0] Validation mode to use + * @returns {Array} List of warnings if references are not used; empty if valid + */ +export async function validateReferencesInText (doc, { mode = MODES.NORMAL } = {}) { + const result = [] + + switch (doc.type) { + case 'txt': { + const declaredReferences = [...doc.data.extractedElements.referenceSectionRfc, ...doc.data.extractedElements.referenceSectionDraftReferences] + const mentionedReferences = [...doc.data.extractedElements.nonReferenceSectionRfc, ...doc.data.extractedElements.nonReferenceSectionDraftReferences] + + // Find references declared but not used + const unusedReferences = declaredReferences.filter(ref => !mentionedReferences.includes(ref)) + + unusedReferences.forEach(ref => { + result.push(new ValidationWarning( + 'REFERENCE_NOT_USED', + `The reference RFC ${ref} is listed in the References section but is not mentioned in the document text.`, + { ref: 'https://authors.ietf.org/en/required-content#references' } + )) + }) + + break + } + + case 'xml': { + const referencesSections = doc.data.rfc.back.references.references + const definedReferences = [] + referencesSections.forEach(section => { + if (section.reference && Array.isArray(section.reference)) { + section.reference.forEach(ref => { + if (ref._attr && ref._attr.anchor) { + definedReferences.push(ref._attr.anchor) + } + }) + } + }) + const usedReferences = extractRecursiveByTagAndAttribute(doc.data.rfc, 'xref', 'target') + + const unusedReferences = definedReferences.filter(ref => !usedReferences.includes(ref)) + + unusedReferences.forEach(ref => { + result.push(new ValidationWarning( + 'REFERENCE_NOT_USED', + `The reference ${ref} is listed in the References section but is not mentioned in the document text.`, + { ref: 'https://authors.ietf.org/en/required-content#references' } + )) + }) + + break + } + } + + return result +} diff --git a/lib/parsers/txt.mjs b/lib/parsers/txt.mjs index 8649844..17c3680 100644 --- a/lib/parsers/txt.mjs +++ b/lib/parsers/txt.mjs @@ -8,6 +8,8 @@ const LINE_VALUES_EXTRACT_RE = /^(?.*)\s{2,}(?.*)$/ const AUTHOR_NAME_RE = /^[a-z]\.\s[a-z]+$/i const DATE_RE = /^(?:(?[0-9]{1,2})\s)?(?[a-z]{3,})\s(?[0-9]{4})$/i const SECTION_PATTERN = /^\d+\.\s+.+$/ +const RFC_REFERENCE_RE = /\bRFC\s?(\d+)\b|\[RFC(\d+)\]/gi +const NON_RFC_REFERENCE_RE = /\[(?!RFC\d+)[a-zA-Z0-9-.]+\]/gi // Author regexps const AUTHORS_OR_EDITORS_ADDRESSES_RE = /^(Authors?|Editors?)' Addresses$/i @@ -124,7 +126,13 @@ export async function parse (rawText, filename) { ipv4: [], ipv6: [], keywords2119: [], - boilerplate2119Keywords: [] + boilerplate2119Keywords: [], + obsoletesRfc: [], + updatesRfc: [], + nonReferenceSectionRfc: [], + referenceSectionRfc: [], + nonReferenceSectionDraftReferences: [], + referenceSectionDraftReferences: [] }, boilerplate: { rfc2119: BOILERPLATE_PATTERNS.rfc2119.test(normalizedText) || BOILERPLATE_PATTERNS.rfc2119_alt.test(normalizedText), @@ -140,6 +148,8 @@ export async function parse (rawText, filename) { let lineIdx = 0 let currentSection = null let inCodeBlock = false + let rfcMatch = null + let draftMatch = null try { const markers = { header: { start: 0, end: 0, lastAuthor: 0, closed: false }, @@ -205,6 +215,33 @@ export async function parse (rawText, filename) { }) } } + // Extract rfc references from whole text exept of reference section + while ((rfcMatch = RFC_REFERENCE_RE.exec(trimmedLine)) !== null) { + const rfcNumber = rfcMatch[1] || rfcMatch[2] + if (currentSection !== 'references') { + if (rfcNumber && !data.extractedElements.nonReferenceSectionRfc.includes(rfcNumber)) { + data.extractedElements.nonReferenceSectionRfc.push(rfcNumber) + } + } else { + if (rfcNumber && !data.extractedElements.referenceSectionRfc.includes(rfcNumber)) { + data.extractedElements.referenceSectionRfc.push(rfcNumber) + } + } + } + + // Detect draft references + while ((draftMatch = NON_RFC_REFERENCE_RE.exec(trimmedLine)) !== null) { + const draftName = draftMatch[0] + if (currentSection !== 'references') { + if (!data.extractedElements.nonReferenceSectionDraftReferences.includes(draftName)) { + data.extractedElements.nonReferenceSectionDraftReferences.push(draftName) + } + } else { + if (!data.extractedElements.referenceSectionDraftReferences.includes(draftName)) { + data.extractedElements.referenceSectionDraftReferences.push(draftName) + } + } + } // Check for references if (/\[RFC2119\]/i.test(trimmedLine)) { diff --git a/tests/sections.test.js b/tests/sections.test.js index ef50ed1..305c4a0 100644 --- a/tests/sections.test.js +++ b/tests/sections.test.js @@ -7,7 +7,8 @@ import { validateSecurityConsiderationsSection, validateAuthorSection, validateReferencesSection, - validateIANAConsiderationsSection + validateIANAConsiderationsSection, + validateReferencesInText } from '../lib/modules/sections.mjs' import { baseTXTDoc, baseXMLDoc } from './fixtures/base-doc.mjs' import { cloneDeep, set, times } from 'lodash-es' @@ -886,3 +887,170 @@ describe('document should have a valid IANA considerations section', () => { }) }) }) + +describe('validateReferencesInText', () => { + describe('TXT documents', () => { + test('should return warnings for unused references in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: ['2119', '8174', '1234'], + referenceSectionDraftReferences: ['draft-ietf-abc-01'], + nonReferenceSectionRfc: ['2119'], + nonReferenceSectionDraftReferences: [] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([ + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC 8174 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC 1234 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC draft-ietf-abc-01 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ) + ]) + }) + + test('should return an empty array if all references are used in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: ['2119'], + referenceSectionDraftReferences: ['draft-ietf-abc-01'], + nonReferenceSectionRfc: ['2119'], + nonReferenceSectionDraftReferences: ['draft-ietf-abc-01'] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([]) + }) + + test('should handle empty references in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: [], + referenceSectionDraftReferences: [], + nonReferenceSectionRfc: [], + nonReferenceSectionDraftReferences: [] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([]) + }) + }) + + describe('XML documents', () => { + test('should return warnings for unused references in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } }, + { _attr: { anchor: 'RFC1234' } } + ] + } + ] + } + }, + xref: [ + { _attr: { target: 'RFC2119' } } + ] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([ + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC8174 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC1234 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ) + ]) + }) + + test('should return an empty array if all references are used in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } } + ] + } + ] + } + }, + xref: [ + { _attr: { target: 'RFC2119' } }, + { _attr: { target: 'RFC8174' } } + ] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([]) + }) + + test('should handle empty references in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [] + } + }, + xref: [] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([]) + }) + }) +}) From d1ac3fa1f9fd42af203ea4366f940b515b57b829 Mon Sep 17 00:00:00 2001 From: Dmutre <104783173+Dmutre@users.noreply.github.com> Date: Wed, 5 Feb 2025 15:52:59 +0100 Subject: [PATCH 2/3] feat: tests for parser for parsing references --- tests/parser.test.js | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/parser.test.js b/tests/parser.test.js index 06acf8b..c6a8911 100644 --- a/tests/parser.test.js +++ b/tests/parser.test.js @@ -444,3 +444,66 @@ describe('Parsing similar to RFC2119 boilerplate text', () => { expect(result.data.boilerplate.similar2119boilerplate).toEqual(true) }) }) + +describe('Reference is declared, but not used in the document', () => { + test('Parsing declared but not used references', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.referenceSectionRfc).toContain('4360', '5701', '7153', '7432', '2345') + expect(result.data.extractedElements.referenceSectionDraftReferences).toContain('[Lalalala-Refere-Sponsor]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]') + }) + + test('Parsing references in text (only one reference)', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]') + expect(result.data.extractedElements.nonReferenceSectionRfc).toHaveLength(0) + }) + + test('Parsing references in text (multiple references)', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + [RFC255], [RFC256], [RFC257], [RFC258] + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + [I-D.ietf-bess-evpn-igmp-mld-proxy], [I-D.ietf-bess-bgp-multicast-controller], [I-D.ietf-idr-legacy-rtc] + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]') + expect(result.data.extractedElements.nonReferenceSectionRfc).toContain('255', '256', '257', '258') + }) + + test('Parsing text without reference section', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.referenceSectionRfc).toHaveLength(0) + expect(result.data.extractedElements.referenceSectionDraftReferences).toHaveLength(0) + }) +}) From f11e6021511d538680eb0885cc993fbc405d6af3 Mon Sep 17 00:00:00 2001 From: Dmutre <104783173+Dmutre@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:31:06 +0100 Subject: [PATCH 3/3] feat: rewrote usedreference extracting --- lib/helpers/utils.mjs | 30 ------------------------------ lib/modules/sections.mjs | 17 ++++++++++++++--- 2 files changed, 14 insertions(+), 33 deletions(-) delete mode 100644 lib/helpers/utils.mjs diff --git a/lib/helpers/utils.mjs b/lib/helpers/utils.mjs deleted file mode 100644 index 4fbada9..0000000 --- a/lib/helpers/utils.mjs +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Recursively extract all values of a specific tag and attribute from an XML document. - * - * This function traverses the XML document to find all elements matching the specified - * tag name and extracts the values of the specified attribute. - * - * @param {Object} node The root node of the XML document to traverse. - * @param {string} tagName The tag name to search for. - * @param {string} attributeName The attribute to extract values from. - * @param {Array} [extractedValues=[]] An array to accumulate found attribute values. - * @returns {Array} An array of attribute values from matching tags. - */ -export function extractRecursiveByTagAndAttribute (node, tagName, attributeName, extractedValues = []) { - if (node[tagName]) { - const tags = Array.isArray(node[tagName]) ? node[tagName] : [node[tagName]] - tags.forEach(tag => { - if (tag._attr && tag._attr[attributeName]) { - extractedValues.push(tag._attr[attributeName]) - } - }) - } - - Object.keys(node).forEach(key => { - if (typeof node[key] === 'object') { - extractRecursiveByTagAndAttribute(node[key], tagName, attributeName, extractedValues) - } - }) - - return extractedValues -} diff --git a/lib/modules/sections.mjs b/lib/modules/sections.mjs index ebe6587..a4f6fd8 100644 --- a/lib/modules/sections.mjs +++ b/lib/modules/sections.mjs @@ -2,8 +2,7 @@ import { ValidationComment, ValidationError, ValidationWarning } from '../helper import { MODES } from '../config/modes.mjs' import { XML_SCHEMA } from '../config/schema.mjs' import { find, get, has, isPlainObject } from 'lodash-es' -import { findDescendantWith } from '../helpers/traversal.mjs' -import { extractRecursiveByTagAndAttribute } from '../helpers/utils.mjs' +import { findAllDescendantsWith, findDescendantWith } from '../helpers/traversal.mjs' /** * Validate a document abstract section @@ -742,7 +741,19 @@ export async function validateReferencesInText (doc, { mode = MODES.NORMAL } = { }) } }) - const usedReferences = extractRecursiveByTagAndAttribute(doc.data.rfc, 'xref', 'target') + const usedReferences = findAllDescendantsWith(doc.data.rfc, (value, key) => { + if (key !== 'xref') return false + + if (Array.isArray(value)) { + return value.some(item => item._attr?.target) + } + + return value._attr?.target + }).flatMap(match => + Array.isArray(match.value) + ? match.value.map(item => item._attr?.target).filter(Boolean) + : match.value._attr?.target ? [match.value._attr.target] : [] + ) const unusedReferences = definedReferences.filter(ref => !usedReferences.includes(ref))