From f34522cb1cd312812d8f36bc2917907b931629a8 Mon Sep 17 00:00:00 2001
From: Dmutre <104783173+Dmutre@users.noreply.github.com>
Date: Wed, 8 Jan 2025 16:17:50 +0100
Subject: [PATCH 1/3] feat: a reference is declared, but not used in the
 document

- added validation for unused references for txt and xml format
- modified validator
- added tool function for extracting from xml
- added new validation function
---
 lib/helpers/utils.mjs    |  30 +++++++
 lib/index.mjs            |   3 +
 lib/modules/sections.mjs |  66 +++++++++++++++
 lib/parsers/txt.mjs      |  39 ++++++++-
 tests/sections.test.js   | 170 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 306 insertions(+), 2 deletions(-)
 create mode 100644 lib/helpers/utils.mjs
diff --git a/lib/helpers/utils.mjs b/lib/helpers/utils.mjs
new file mode 100644
index 0000000..4fbada9
--- /dev/null
+++ b/lib/helpers/utils.mjs
@@ -0,0 +1,30 @@
+/**
+ * Recursively extract all values of a specific tag and attribute from an XML document.
+ *
+ * This function traverses the XML document to find all elements matching the specified
+ * tag name and extracts the values of the specified attribute.
+ *
+ * @param {Object} node The root node of the XML document to traverse.
+ * @param {string} tagName The tag name to search for.
+ * @param {string} attributeName The attribute to extract values from.
+ * @param {Array<string>} [extractedValues=[]] An array to accumulate found attribute values.
+ * @returns {Array<string>} An array of attribute values from matching tags.
+ */
+export function extractRecursiveByTagAndAttribute (node, tagName, attributeName, extractedValues = []) {
+  if (node[tagName]) {
+    const tags = Array.isArray(node[tagName]) ? node[tagName] : [node[tagName]]
+    tags.forEach(tag => {
+      if (tag._attr && tag._attr[attributeName]) {
+        extractedValues.push(tag._attr[attributeName])
+      }
+    })
+  }
+
+  Object.keys(node).forEach(key => {
+    if (typeof node[key] === 'object') {
+      extractRecursiveByTagAndAttribute(node[key], tagName, attributeName, extractedValues)
+    }
+  })
+
+  return extractedValues
+}
diff --git a/lib/index.mjs b/lib/index.mjs
index 9bf450b..4bcb739 100644
--- a/lib/index.mjs
+++ b/lib/index.mjs
@@ -24,6 +24,7 @@ import {
   validateAuthorSection,
   validateIANAConsiderationsSection,
   validateIntroductionSection,
+  validateReferencesInText,
   validateReferencesSection,
   validateSecurityConsiderationsSection
 } from './modules/sections.mjs'
@@ -134,6 +135,8 @@ export async function checkNits (raw, filename, {
   result.push(...await validateCategory(doc, { mode }))
   progressReport('Validating Version...')
   result.push(...await validateVersion(doc, { mode, offline }))
+  progressReport('Validating references in text...')
+  result.push(...await validateReferencesInText(doc, { mode }))
 
   // Run XML-only validations
   if (doc.type === 'xml') {
diff --git a/lib/modules/sections.mjs b/lib/modules/sections.mjs
index 583b400..ebe6587 100644
--- a/lib/modules/sections.mjs
+++ b/lib/modules/sections.mjs
@@ -3,6 +3,7 @@ import { MODES } from '../config/modes.mjs'
 import { XML_SCHEMA } from '../config/schema.mjs'
 import { find, get, has, isPlainObject } from 'lodash-es'
 import { findDescendantWith } from '../helpers/traversal.mjs'
+import { extractRecursiveByTagAndAttribute } from '../helpers/utils.mjs'
 
 /**
  * Validate a document abstract section
@@ -694,3 +695,68 @@ export async function validateIANAConsiderationsSection (doc, { mode = MODES.NOR
 
   return result
 }
+
+/**
+ * Validate that all references declared in the References section are used in the text.
+ *
+ * This function checks whether all references listed in the References section are actually
+ * mentioned in the document's text. If a reference is listed but not used, it generates
+ * a warning regardless of the validation mode.
+ *
+ * @param {Object} doc Document to validate
+ * @param {Object} [opts] Additional options
+ * @param {number} [opts.mode=0] Validation mode to use
+ * @returns {Array} List of warnings if references are not used; empty if valid
+ */
+export async function validateReferencesInText (doc, { mode = MODES.NORMAL } = {}) {
+  const result = []
+
+  switch (doc.type) {
+    case 'txt': {
+      const declaredReferences = [...doc.data.extractedElements.referenceSectionRfc, ...doc.data.extractedElements.referenceSectionDraftReferences]
+      const mentionedReferences = [...doc.data.extractedElements.nonReferenceSectionRfc, ...doc.data.extractedElements.nonReferenceSectionDraftReferences]
+
+      // Find references declared but not used
+      const unusedReferences = declaredReferences.filter(ref => !mentionedReferences.includes(ref))
+
+      unusedReferences.forEach(ref => {
+        result.push(new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          `The reference RFC ${ref} is listed in the References section but is not mentioned in the document text.`,
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        ))
+      })
+
+      break
+    }
+
+    case 'xml': {
+      const referencesSections = doc.data.rfc.back.references.references
+      const definedReferences = []
+      referencesSections.forEach(section => {
+        if (section.reference && Array.isArray(section.reference)) {
+          section.reference.forEach(ref => {
+            if (ref._attr && ref._attr.anchor) {
+              definedReferences.push(ref._attr.anchor)
+            }
+          })
+        }
+      })
+      const usedReferences = extractRecursiveByTagAndAttribute(doc.data.rfc, 'xref', 'target')
+
+      const unusedReferences = definedReferences.filter(ref => !usedReferences.includes(ref))
+
+      unusedReferences.forEach(ref => {
+        result.push(new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          `The reference ${ref} is listed in the References section but is not mentioned in the document text.`,
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        ))
+      })
+
+      break
+    }
+  }
+
+  return result
+}
diff --git a/lib/parsers/txt.mjs b/lib/parsers/txt.mjs
index 8649844..17c3680 100644
--- a/lib/parsers/txt.mjs
+++ b/lib/parsers/txt.mjs
@@ -8,6 +8,8 @@ const LINE_VALUES_EXTRACT_RE = /^(?<left>.*)\s{2,}(?<right>.*)$/
 const AUTHOR_NAME_RE = /^[a-z]\.\s[a-z]+$/i
 const DATE_RE = /^(?:(?<day>[0-9]{1,2})\s)?(?<month>[a-z]{3,})\s(?<year>[0-9]{4})$/i
 const SECTION_PATTERN = /^\d+\.\s+.+$/
+const RFC_REFERENCE_RE = /\bRFC\s?(\d+)\b|\[RFC(\d+)\]/gi
+const NON_RFC_REFERENCE_RE = /\[(?!RFC\d+)[a-zA-Z0-9-.]+\]/gi
 
 // Author regexps
 const AUTHORS_OR_EDITORS_ADDRESSES_RE = /^(Authors?|Editors?)' Addresses$/i
@@ -124,7 +126,13 @@ export async function parse (rawText, filename) {
       ipv4: [],
       ipv6: [],
       keywords2119: [],
-      boilerplate2119Keywords: []
+      boilerplate2119Keywords: [],
+      obsoletesRfc: [],
+      updatesRfc: [],
+      nonReferenceSectionRfc: [],
+      referenceSectionRfc: [],
+      nonReferenceSectionDraftReferences: [],
+      referenceSectionDraftReferences: []
     },
     boilerplate: {
       rfc2119: BOILERPLATE_PATTERNS.rfc2119.test(normalizedText) || BOILERPLATE_PATTERNS.rfc2119_alt.test(normalizedText),
@@ -140,6 +148,8 @@ export async function parse (rawText, filename) {
   let lineIdx = 0
   let currentSection = null
   let inCodeBlock = false
+  let rfcMatch = null
+  let draftMatch = null
   try {
     const markers = {
       header: { start: 0, end: 0, lastAuthor: 0, closed: false },
@@ -205,6 +215,33 @@ export async function parse (rawText, filename) {
           })
         }
       }
+      // Extract rfc references from whole text exept of reference section
+      while ((rfcMatch = RFC_REFERENCE_RE.exec(trimmedLine)) !== null) {
+        const rfcNumber = rfcMatch[1] || rfcMatch[2]
+        if (currentSection !== 'references') {
+          if (rfcNumber && !data.extractedElements.nonReferenceSectionRfc.includes(rfcNumber)) {
+            data.extractedElements.nonReferenceSectionRfc.push(rfcNumber)
+          }
+        } else {
+          if (rfcNumber && !data.extractedElements.referenceSectionRfc.includes(rfcNumber)) {
+            data.extractedElements.referenceSectionRfc.push(rfcNumber)
+          }
+        }
+      }
+
+      // Detect draft references
+      while ((draftMatch = NON_RFC_REFERENCE_RE.exec(trimmedLine)) !== null) {
+        const draftName = draftMatch[0]
+        if (currentSection !== 'references') {
+          if (!data.extractedElements.nonReferenceSectionDraftReferences.includes(draftName)) {
+            data.extractedElements.nonReferenceSectionDraftReferences.push(draftName)
+          }
+        } else {
+          if (!data.extractedElements.referenceSectionDraftReferences.includes(draftName)) {
+            data.extractedElements.referenceSectionDraftReferences.push(draftName)
+          }
+        }
+      }
 
       // Check for references
       if (/\[RFC2119\]/i.test(trimmedLine)) {
diff --git a/tests/sections.test.js b/tests/sections.test.js
index ef50ed1..305c4a0 100644
--- a/tests/sections.test.js
+++ b/tests/sections.test.js
@@ -7,7 +7,8 @@ import {
   validateSecurityConsiderationsSection,
   validateAuthorSection,
   validateReferencesSection,
-  validateIANAConsiderationsSection
+  validateIANAConsiderationsSection,
+  validateReferencesInText
 } from '../lib/modules/sections.mjs'
 import { baseTXTDoc, baseXMLDoc } from './fixtures/base-doc.mjs'
 import { cloneDeep, set, times } from 'lodash-es'
@@ -886,3 +887,170 @@ describe('document should have a valid IANA considerations section', () => {
     })
   })
 })
+
+describe('validateReferencesInText', () => {
+  describe('TXT documents', () => {
+    test('should return warnings for unused references in a TXT document', async () => {
+      const txtDoc = {
+        type: 'txt',
+        data: {
+          extractedElements: {
+            referenceSectionRfc: ['2119', '8174', '1234'],
+            referenceSectionDraftReferences: ['draft-ietf-abc-01'],
+            nonReferenceSectionRfc: ['2119'],
+            nonReferenceSectionDraftReferences: []
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(txtDoc)
+
+      expect(result).toEqual([
+        new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          'The reference RFC 8174 is listed in the References section but is not mentioned in the document text.',
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        ),
+        new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          'The reference RFC 1234 is listed in the References section but is not mentioned in the document text.',
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        ),
+        new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          'The reference RFC draft-ietf-abc-01 is listed in the References section but is not mentioned in the document text.',
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        )
+      ])
+    })
+
+    test('should return an empty array if all references are used in a TXT document', async () => {
+      const txtDoc = {
+        type: 'txt',
+        data: {
+          extractedElements: {
+            referenceSectionRfc: ['2119'],
+            referenceSectionDraftReferences: ['draft-ietf-abc-01'],
+            nonReferenceSectionRfc: ['2119'],
+            nonReferenceSectionDraftReferences: ['draft-ietf-abc-01']
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(txtDoc)
+
+      expect(result).toEqual([])
+    })
+
+    test('should handle empty references in a TXT document', async () => {
+      const txtDoc = {
+        type: 'txt',
+        data: {
+          extractedElements: {
+            referenceSectionRfc: [],
+            referenceSectionDraftReferences: [],
+            nonReferenceSectionRfc: [],
+            nonReferenceSectionDraftReferences: []
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(txtDoc)
+
+      expect(result).toEqual([])
+    })
+  })
+
+  describe('XML documents', () => {
+    test('should return warnings for unused references in an XML document', async () => {
+      const xmlDoc = {
+        type: 'xml',
+        data: {
+          rfc: {
+            back: {
+              references: {
+                references: [
+                  {
+                    reference: [
+                      { _attr: { anchor: 'RFC2119' } },
+                      { _attr: { anchor: 'RFC8174' } },
+                      { _attr: { anchor: 'RFC1234' } }
+                    ]
+                  }
+                ]
+              }
+            },
+            xref: [
+              { _attr: { target: 'RFC2119' } }
+            ]
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(xmlDoc)
+
+      expect(result).toEqual([
+        new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          'The reference RFC8174 is listed in the References section but is not mentioned in the document text.',
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        ),
+        new ValidationWarning(
+          'REFERENCE_NOT_USED',
+          'The reference RFC1234 is listed in the References section but is not mentioned in the document text.',
+          { ref: 'https://authors.ietf.org/en/required-content#references' }
+        )
+      ])
+    })
+
+    test('should return an empty array if all references are used in an XML document', async () => {
+      const xmlDoc = {
+        type: 'xml',
+        data: {
+          rfc: {
+            back: {
+              references: {
+                references: [
+                  {
+                    reference: [
+                      { _attr: { anchor: 'RFC2119' } },
+                      { _attr: { anchor: 'RFC8174' } }
+                    ]
+                  }
+                ]
+              }
+            },
+            xref: [
+              { _attr: { target: 'RFC2119' } },
+              { _attr: { target: 'RFC8174' } }
+            ]
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(xmlDoc)
+
+      expect(result).toEqual([])
+    })
+
+    test('should handle empty references in an XML document', async () => {
+      const xmlDoc = {
+        type: 'xml',
+        data: {
+          rfc: {
+            back: {
+              references: {
+                references: []
+              }
+            },
+            xref: []
+          }
+        }
+      }
+
+      const result = await validateReferencesInText(xmlDoc)
+
+      expect(result).toEqual([])
+    })
+  })
+})

From d1ac3fa1f9fd42af203ea4366f940b515b57b829 Mon Sep 17 00:00:00 2001
From: Dmutre <104783173+Dmutre@users.noreply.github.com>
Date: Wed, 5 Feb 2025 15:52:59 +0100
Subject: [PATCH 2/3] feat: tests for parser for parsing references

---
 tests/parser.test.js | 63 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/tests/parser.test.js b/tests/parser.test.js
index 06acf8b..c6a8911 100644
--- a/tests/parser.test.js
+++ b/tests/parser.test.js
@@ -444,3 +444,66 @@ describe('Parsing similar to RFC2119 boilerplate text', () => {
     expect(result.data.boilerplate.similar2119boilerplate).toEqual(true)
   })
 })
+
+describe('Reference is declared, but not used in the document', () => {
+  test('Parsing declared but not used references', async () => {
+    const txt = `
+      ${metaTXTBlock}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${referenceTXTBlock}
+    `
+
+    const result = await parse(txt, 'txt')
+    expect(result.data.extractedElements.referenceSectionRfc).toContain('4360', '5701', '7153', '7432', '2345')
+    expect(result.data.extractedElements.referenceSectionDraftReferences).toContain('[Lalalala-Refere-Sponsor]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]')
+  })
+
+  test('Parsing references in text (only one reference)', async () => {
+    const txt = `
+      ${metaTXTBlock}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${referenceTXTBlock}
+    `
+
+    const result = await parse(txt, 'txt')
+    expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]')
+    expect(result.data.extractedElements.nonReferenceSectionRfc).toHaveLength(0)
+  })
+
+  test('Parsing references in text (multiple references)', async () => {
+    const txt = `
+      ${metaTXTBlock}
+      ${tableOfContentsTXTBlock}
+      [RFC255], [RFC256], [RFC257], [RFC258]
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      [I-D.ietf-bess-evpn-igmp-mld-proxy], [I-D.ietf-bess-bgp-multicast-controller], [I-D.ietf-idr-legacy-rtc]
+      ${securityConsiderationsTXTBlock}
+      ${referenceTXTBlock}
+    `
+
+    const result = await parse(txt, 'txt')
+    expect(result.data.extractedElements.nonReferenceSectionDraftReferences).toContain('[1]', '[I-D.ietf-bess-evpn-igmp-mld-proxy]', '[I-D.ietf-bess-bgp-multicast-controller]', '[I-D.ietf-idr-legacy-rtc]')
+    expect(result.data.extractedElements.nonReferenceSectionRfc).toContain('255', '256', '257', '258')
+  })
+
+  test('Parsing text without reference section', async () => {
+    const txt = `
+      ${metaTXTBlock}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+    `
+
+    const result = await parse(txt, 'txt')
+    expect(result.data.extractedElements.referenceSectionRfc).toHaveLength(0)
+    expect(result.data.extractedElements.referenceSectionDraftReferences).toHaveLength(0)
+  })
+})

From f11e6021511d538680eb0885cc993fbc405d6af3 Mon Sep 17 00:00:00 2001
From: Dmutre <104783173+Dmutre@users.noreply.github.com>
Date: Mon, 17 Feb 2025 11:31:06 +0100
Subject: [PATCH 3/3] feat: rewrote usedreference extracting

---
 lib/helpers/utils.mjs    | 30 ------------------------------
 lib/modules/sections.mjs | 17 ++++++++++++++---
 2 files changed, 14 insertions(+), 33 deletions(-)
 delete mode 100644 lib/helpers/utils.mjs

diff --git a/lib/helpers/utils.mjs b/lib/helpers/utils.mjs
deleted file mode 100644
index 4fbada9..0000000
--- a/lib/helpers/utils.mjs
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Recursively extract all values of a specific tag and attribute from an XML document.
- *
- * This function traverses the XML document to find all elements matching the specified
- * tag name and extracts the values of the specified attribute.
- *
- * @param {Object} node The root node of the XML document to traverse.
- * @param {string} tagName The tag name to search for.
- * @param {string} attributeName The attribute to extract values from.
- * @param {Array<string>} [extractedValues=[]] An array to accumulate found attribute values.
- * @returns {Array<string>} An array of attribute values from matching tags.
- */
-export function extractRecursiveByTagAndAttribute (node, tagName, attributeName, extractedValues = []) {
-  if (node[tagName]) {
-    const tags = Array.isArray(node[tagName]) ? node[tagName] : [node[tagName]]
-    tags.forEach(tag => {
-      if (tag._attr && tag._attr[attributeName]) {
-        extractedValues.push(tag._attr[attributeName])
-      }
-    })
-  }
-
-  Object.keys(node).forEach(key => {
-    if (typeof node[key] === 'object') {
-      extractRecursiveByTagAndAttribute(node[key], tagName, attributeName, extractedValues)
-    }
-  })
-
-  return extractedValues
-}
diff --git a/lib/modules/sections.mjs b/lib/modules/sections.mjs
index ebe6587..a4f6fd8 100644
--- a/lib/modules/sections.mjs
+++ b/lib/modules/sections.mjs
@@ -2,8 +2,7 @@ import { ValidationComment, ValidationError, ValidationWarning } from '../helper
 import { MODES } from '../config/modes.mjs'
 import { XML_SCHEMA } from '../config/schema.mjs'
 import { find, get, has, isPlainObject } from 'lodash-es'
-import { findDescendantWith } from '../helpers/traversal.mjs'
-import { extractRecursiveByTagAndAttribute } from '../helpers/utils.mjs'
+import { findAllDescendantsWith, findDescendantWith } from '../helpers/traversal.mjs'
 
 /**
  * Validate a document abstract section
@@ -742,7 +741,19 @@ export async function validateReferencesInText (doc, { mode = MODES.NORMAL } = {
           })
         }
       })
-      const usedReferences = extractRecursiveByTagAndAttribute(doc.data.rfc, 'xref', 'target')
+      const usedReferences = findAllDescendantsWith(doc.data.rfc, (value, key) => {
+        if (key !== 'xref') return false
+
+        if (Array.isArray(value)) {
+          return value.some(item => item._attr?.target)
+        }
+
+        return value._attr?.target
+      }).flatMap(match =>
+        Array.isArray(match.value)
+          ? match.value.map(item => item._attr?.target).filter(Boolean)
+          : match.value._attr?.target ? [match.value._attr.target] : []
+      )
 
       const unusedReferences = definedReferences.filter(ref => !usedReferences.includes(ref))