diff --git a/lib/index.mjs b/lib/index.mjs index 9bf450b..4bcb739 100644 --- a/lib/index.mjs +++ b/lib/index.mjs @@ -24,6 +24,7 @@ import { validateAuthorSection, validateIANAConsiderationsSection, validateIntroductionSection, + validateReferencesInText, validateReferencesSection, validateSecurityConsiderationsSection } from './modules/sections.mjs' @@ -134,6 +135,8 @@ export async function checkNits (raw, filename, { result.push(...await validateCategory(doc, { mode })) progressReport('Validating Version...') result.push(...await validateVersion(doc, { mode, offline })) + progressReport('Validating references in text...') + result.push(...await validateReferencesInText(doc, { mode })) // Run XML-only validations if (doc.type === 'xml') { diff --git a/lib/modules/sections.mjs b/lib/modules/sections.mjs index 583b400..a4f6fd8 100644 --- a/lib/modules/sections.mjs +++ b/lib/modules/sections.mjs @@ -2,7 +2,7 @@ import { ValidationComment, ValidationError, ValidationWarning } from '../helper import { MODES } from '../config/modes.mjs' import { XML_SCHEMA } from '../config/schema.mjs' import { find, get, has, isPlainObject } from 'lodash-es' -import { findDescendantWith } from '../helpers/traversal.mjs' +import { findAllDescendantsWith, findDescendantWith } from '../helpers/traversal.mjs' /** * Validate a document abstract section @@ -694,3 +694,80 @@ export async function validateIANAConsiderationsSection (doc, { mode = MODES.NOR return result } + +/** + * Validate that all references declared in the References section are used in the text. + * + * This function checks whether all references listed in the References section are actually + * mentioned in the document's text. If a reference is listed but not used, it generates + * a warning regardless of the validation mode. + * + * @param {Object} doc Document to validate + * @param {Object} [opts] Additional options + * @param {number} [opts.mode=0] Validation mode to use + * @returns {Array} List of warnings if references are not used; empty if valid + */ +export async function validateReferencesInText (doc, { mode = MODES.NORMAL } = {}) { + const result = [] + + switch (doc.type) { + case 'txt': { + const declaredReferences = [...doc.data.extractedElements.referenceSectionRfc, ...doc.data.extractedElements.referenceSectionDraftReferences] + const mentionedReferences = [...doc.data.extractedElements.nonReferenceSectionRfc, ...doc.data.extractedElements.nonReferenceSectionDraftReferences] + + // Find references declared but not used + const unusedReferences = declaredReferences.filter(ref => !mentionedReferences.includes(ref)) + + unusedReferences.forEach(ref => { + result.push(new ValidationWarning( + 'REFERENCE_NOT_USED', + `The reference RFC ${ref} is listed in the References section but is not mentioned in the document text.`, + { ref: 'https://authors.ietf.org/en/required-content#references' } + )) + }) + + break + } + + case 'xml': { + const referencesSections = doc.data.rfc.back.references.references + const definedReferences = [] + referencesSections.forEach(section => { + if (section.reference && Array.isArray(section.reference)) { + section.reference.forEach(ref => { + if (ref._attr && ref._attr.anchor) { + definedReferences.push(ref._attr.anchor) + } + }) + } + }) + const usedReferences = findAllDescendantsWith(doc.data.rfc, (value, key) => { + if (key !== 'xref') return false + + if (Array.isArray(value)) { + return value.some(item => item._attr?.target) + } + + return value._attr?.target + }).flatMap(match => + Array.isArray(match.value) + ? match.value.map(item => item._attr?.target).filter(Boolean) + : match.value._attr?.target ? [match.value._attr.target] : [] + ) + + const unusedReferences = definedReferences.filter(ref => !usedReferences.includes(ref)) + + unusedReferences.forEach(ref => { + result.push(new ValidationWarning( + 'REFERENCE_NOT_USED', + `The reference ${ref} is listed in the References section but is not mentioned in the document text.`, + { ref: 'https://authors.ietf.org/en/required-content#references' } + )) + }) + + break + } + } + + return result +} diff --git a/lib/parsers/txt.mjs b/lib/parsers/txt.mjs index 8649844..17c3680 100644 --- a/lib/parsers/txt.mjs +++ b/lib/parsers/txt.mjs @@ -8,6 +8,8 @@ const LINE_VALUES_EXTRACT_RE = /^(?.*)\s{2,}(?.*)$/ const AUTHOR_NAME_RE = /^[a-z]\.\s[a-z]+$/i const DATE_RE = /^(?:(?[0-9]{1,2})\s)?(?[a-z]{3,})\s(?[0-9]{4})$/i const SECTION_PATTERN = /^\d+\.\s+.+$/ +const RFC_REFERENCE_RE = /\bRFC\s?(\d+)\b|\[RFC(\d+)\]/gi +const NON_RFC_REFERENCE_RE = /\[(?!RFC\d+)[a-zA-Z0-9-.]+\]/gi // Author regexps const AUTHORS_OR_EDITORS_ADDRESSES_RE = /^(Authors?|Editors?)' Addresses$/i @@ -124,7 +126,13 @@ export async function parse (rawText, filename) { ipv4: [], ipv6: [], keywords2119: [], - boilerplate2119Keywords: [] + boilerplate2119Keywords: [], + obsoletesRfc: [], + updatesRfc: [], + nonReferenceSectionRfc: [], + referenceSectionRfc: [], + nonReferenceSectionDraftReferences: [], + referenceSectionDraftReferences: [] }, boilerplate: { rfc2119: BOILERPLATE_PATTERNS.rfc2119.test(normalizedText) || BOILERPLATE_PATTERNS.rfc2119_alt.test(normalizedText), @@ -140,6 +148,8 @@ export async function parse (rawText, filename) { let lineIdx = 0 let currentSection = null let inCodeBlock = false + let rfcMatch = null + let draftMatch = null try { const markers = { header: { start: 0, end: 0, lastAuthor: 0, closed: false }, @@ -205,6 +215,33 @@ export async function parse (rawText, filename) { }) } } + // Extract rfc references from whole text exept of reference section + while ((rfcMatch = RFC_REFERENCE_RE.exec(trimmedLine)) !== null) { + const rfcNumber = rfcMatch[1] || rfcMatch[2] + if (currentSection !== 'references') { + if (rfcNumber && !data.extractedElements.nonReferenceSectionRfc.includes(rfcNumber)) { + data.extractedElements.nonReferenceSectionRfc.push(rfcNumber) + } + } else { + if (rfcNumber && !data.extractedElements.referenceSectionRfc.includes(rfcNumber)) { + data.extractedElements.referenceSectionRfc.push(rfcNumber) + } + } + } + + // Detect draft references + while ((draftMatch = NON_RFC_REFERENCE_RE.exec(trimmedLine)) !== null) { + const draftName = draftMatch[0] + if (currentSection !== 'references') { + if (!data.extractedElements.nonReferenceSectionDraftReferences.includes(draftName)) { + data.extractedElements.nonReferenceSectionDraftReferences.push(draftName) + } + } else { + if (!data.extractedElements.referenceSectionDraftReferences.includes(draftName)) { + data.extractedElements.referenceSectionDraftReferences.push(draftName) + } + } + } // Check for references if (/\[RFC2119\]/i.test(trimmedLine)) { diff --git a/tests/parser.test.js b/tests/parser.test.js index 06acf8b..c6a8911 100644 --- a/tests/parser.test.js +++ b/tests/parser.test.js @@ -444,3 +444,66 @@ describe('Parsing similar to RFC2119 boilerplate text', () => { expect(result.data.boilerplate.similar2119boilerplate).toEqual(true) }) }) + +describe('Reference is declared, but not used in the document', () => { + test('Parsing declared but not used references', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.referenceSectionRfc).toContain('4360', '5701', '7153', '7432', '2345') + expect(result.data.extractedElements.referenceSectionDraftReferences).toContain('[Lalalala-Refere-Sponsor]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]') + }) + + test('Parsing references in text (only one reference)', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]') + expect(result.data.extractedElements.nonReferenceSectionRfc).toHaveLength(0) + }) + + test('Parsing references in text (multiple references)', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + [RFC255], [RFC256], [RFC257], [RFC258] + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + [I-D.ietf-bess-evpn-igmp-mld-proxy], [I-D.ietf-bess-bgp-multicast-controller], [I-D.ietf-idr-legacy-rtc] + ${securityConsiderationsTXTBlock} + ${referenceTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]') + expect(result.data.extractedElements.nonReferenceSectionRfc).toContain('255', '256', '257', '258') + }) + + test('Parsing text without reference section', async () => { + const txt = ` + ${metaTXTBlock} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ` + + const result = await parse(txt, 'txt') + expect(result.data.extractedElements.referenceSectionRfc).toHaveLength(0) + expect(result.data.extractedElements.referenceSectionDraftReferences).toHaveLength(0) + }) +}) diff --git a/tests/sections.test.js b/tests/sections.test.js index ef50ed1..305c4a0 100644 --- a/tests/sections.test.js +++ b/tests/sections.test.js @@ -7,7 +7,8 @@ import { validateSecurityConsiderationsSection, validateAuthorSection, validateReferencesSection, - validateIANAConsiderationsSection + validateIANAConsiderationsSection, + validateReferencesInText } from '../lib/modules/sections.mjs' import { baseTXTDoc, baseXMLDoc } from './fixtures/base-doc.mjs' import { cloneDeep, set, times } from 'lodash-es' @@ -886,3 +887,170 @@ describe('document should have a valid IANA considerations section', () => { }) }) }) + +describe('validateReferencesInText', () => { + describe('TXT documents', () => { + test('should return warnings for unused references in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: ['2119', '8174', '1234'], + referenceSectionDraftReferences: ['draft-ietf-abc-01'], + nonReferenceSectionRfc: ['2119'], + nonReferenceSectionDraftReferences: [] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([ + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC 8174 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC 1234 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC draft-ietf-abc-01 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ) + ]) + }) + + test('should return an empty array if all references are used in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: ['2119'], + referenceSectionDraftReferences: ['draft-ietf-abc-01'], + nonReferenceSectionRfc: ['2119'], + nonReferenceSectionDraftReferences: ['draft-ietf-abc-01'] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([]) + }) + + test('should handle empty references in a TXT document', async () => { + const txtDoc = { + type: 'txt', + data: { + extractedElements: { + referenceSectionRfc: [], + referenceSectionDraftReferences: [], + nonReferenceSectionRfc: [], + nonReferenceSectionDraftReferences: [] + } + } + } + + const result = await validateReferencesInText(txtDoc) + + expect(result).toEqual([]) + }) + }) + + describe('XML documents', () => { + test('should return warnings for unused references in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } }, + { _attr: { anchor: 'RFC1234' } } + ] + } + ] + } + }, + xref: [ + { _attr: { target: 'RFC2119' } } + ] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([ + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC8174 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ), + new ValidationWarning( + 'REFERENCE_NOT_USED', + 'The reference RFC1234 is listed in the References section but is not mentioned in the document text.', + { ref: 'https://authors.ietf.org/en/required-content#references' } + ) + ]) + }) + + test('should return an empty array if all references are used in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } } + ] + } + ] + } + }, + xref: [ + { _attr: { target: 'RFC2119' } }, + { _attr: { target: 'RFC8174' } } + ] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([]) + }) + + test('should handle empty references in an XML document', async () => { + const xmlDoc = { + type: 'xml', + data: { + rfc: { + back: { + references: { + references: [] + } + }, + xref: [] + } + } + } + + const result = await validateReferencesInText(xmlDoc) + + expect(result).toEqual([]) + }) + }) +})