From 9a0fcf6f37b76aa6dbd0fabb273306a7a9190fce Mon Sep 17 00:00:00 2001
From: Dmutre <104783173+Dmutre@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:26:57 +0100
Subject: [PATCH 1/2] feat: a reference appears to be a downref (noting if
 reference appears in the downref registry)

- added validation for downref validation
- added utils and remote function
- added tests
- tested app
---
 lib/config/rfc-status-hierarchy.mjs |  57 +++++++++++++
 lib/index.mjs                       |   5 ++
 lib/modules/downref.mjs             | 121 ++++++++++++++++++++++++++++
 lib/parsers/txt.mjs                 |  34 +++++++-
 lib/remote/downref.mjs              |  59 ++++++++++++++
 tests/downref.test.js               |  88 ++++++++++++++++++++
 tests/fixtures/base-doc.mjs         |  27 ++++++-
 7 files changed, 388 insertions(+), 3 deletions(-)
 create mode 100644 lib/config/rfc-status-hierarchy.mjs
 create mode 100644 lib/modules/downref.mjs
 create mode 100644 lib/remote/downref.mjs
 create mode 100644 tests/downref.test.js

diff --git a/lib/config/rfc-status-hierarchy.mjs b/lib/config/rfc-status-hierarchy.mjs
new file mode 100644
index 0000000..596b2bb
--- /dev/null
+++ b/lib/config/rfc-status-hierarchy.mjs
@@ -0,0 +1,57 @@
+export const rfcStatusHierarchy = [
+  {
+    name: 'Internet Standard',
+    regex: /internet standard/ig,
+    weight: 7
+  },
+  {
+    name: 'Draft Standard',
+    regex: /draft standard/ig,
+    weight: 6
+  },
+  {
+    name: 'Proposed Standard',
+    regex: /proposed standard/ig,
+    weight: 5
+  },
+  {
+    name: 'Standards Track',
+    regex: /standards track/ig,
+    weight: 5
+  },
+  {
+    name: 'Best Current Practice',
+    regex: /best current practice|bcp/ig,
+    weight: 4
+  },
+  {
+    name: 'Informational',
+    regex: /informational/ig,
+    weight: 3
+  },
+  {
+    name: 'Experimental',
+    regex: /experimental/ig,
+    weight: 2
+  },
+  {
+    name: 'Historic',
+    regex: /historic/ig,
+    weight: 1
+  }
+]
+
+/**
+ * Extracts the highest status weight based on RFC status hierarchy.
+ *
+ * @param {string} statusText - The status text to check.
+ * @returns {number|null} - The weight of the status or null if not found.
+ */
+export function getStatusWeight (statusText) {
+  for (const status of rfcStatusHierarchy) {
+    if (status.regex.test(statusText)) {
+      return status.weight
+    }
+  }
+  return null
+}
diff --git a/lib/index.mjs b/lib/index.mjs
index 9bf450b..41272be 100644
--- a/lib/index.mjs
+++ b/lib/index.mjs
@@ -45,6 +45,9 @@ import {
   validateLineLength,
   validateCodeComments
 } from './modules/txt.mjs'
+import {
+  validateDownrefs
+} from './modules/downref.mjs'
 
 /**
  * Check Nits
@@ -134,6 +137,8 @@ export async function checkNits (raw, filename, {
   result.push(...await validateCategory(doc, { mode }))
   progressReport('Validating Version...')
   result.push(...await validateVersion(doc, { mode, offline }))
+  progressReport('Validating downrefs in text...')
+  result.push(...await validateDownrefs(doc, { mode }))
 
   // Run XML-only validations
   if (doc.type === 'xml') {
diff --git a/lib/modules/downref.mjs b/lib/modules/downref.mjs
new file mode 100644
index 0000000..fc65896
--- /dev/null
+++ b/lib/modules/downref.mjs
@@ -0,0 +1,121 @@
+import { ValidationWarning, ValidationError } from '../helpers/error.mjs'
+import { checkReferencesInDownrefs } from '../remote/downref.mjs'
+import { MODES } from '../config/modes.mjs'
+import { findAllDescendantsWith } from '../helpers/traversal.mjs'
+
+/**
+ * Validate document references for RFCs and Drafts downrefs.
+ *
+ * @param {Object} doc - Document to validate
+ * @param {Object} [opts] - Additional options
+ * @param {number} [opts.mode=0] - Validation mode to use
+ * @param {boolean} [opts.offline=false] - Skip fetching remote data if true
+ * @returns {Array} - List of errors/warnings/comments
+ */
+export async function validateDownrefs (doc, { mode = MODES.NORMAL } = {}) {
+  const result = []
+
+  if (mode === MODES.SUBMISSION) {
+    return result
+  }
+
+  switch (doc.type) {
+    case 'txt': {
+      const { referenceSectionRfc, referenceSectionDraftReferences } = doc.data.extractedElements
+      const rfcs = referenceSectionRfc.map((rfcNumber) => `RFC ${rfcNumber}`)
+      const drafts = normalizeDraftReferences(referenceSectionDraftReferences)
+      const downrefMatches = await checkReferencesInDownrefs([...rfcs, ...drafts])
+
+      downrefMatches.forEach((match) => {
+        switch (mode) {
+          case MODES.NORMAL: {
+            result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+          case MODES.FORGIVE_CHECKLIST: {
+            result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+        }
+      })
+
+      break
+    }
+    case 'xml': {
+      const referencesSections = doc.data.rfc.back.references.references
+      const definedReferences = findAllDescendantsWith(referencesSections, (value, key) => key === '_attr' && value.anchor)
+        .flatMap(match =>
+          Array.isArray(match.value.anchor)
+            ? match.value.anchor
+            : [match.value.anchor]
+        )
+        .filter(Boolean)
+      const normilizedReferences = normalizeXmlReferences(definedReferences)
+
+      const downrefMatches = await checkReferencesInDownrefs(normilizedReferences)
+
+      downrefMatches.forEach((match) => {
+        switch (mode) {
+          case MODES.NORMAL: {
+            result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+          case MODES.FORGIVE_CHECKLIST: {
+            result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+        }
+      })
+      break
+    }
+  }
+
+  return result
+}
+
+/**
+ * Normalize references by removing brackets, versions, and checking for drafts.
+ *
+ * @param {Array} references - Array of textual references.
+ * @returns {Array} - Array of normalized references containing "draft".
+ */
+function normalizeDraftReferences (references) {
+  return references
+    .map((ref) => {
+      let normalized = ref.replace(/^\[|\]$/g, '')
+      normalized = normalized.replace(/-\d{2}$/, '')
+
+      return normalized
+    })
+    .filter((ref) => ref.toLowerCase().includes('draft'))
+}
+
+/**
+ * Normalize XML references to drafts and RFCs.
+ *
+ * @param {Array} references - Array of reference strings.
+ * @returns {Array} - Normalized references including only drafts and RFCs.
+ */
+function normalizeXmlReferences (references) {
+  const normalizedReferences = []
+
+  references.forEach((ref) => {
+    if (/^RFC\d+$/i.test(ref)) {
+      const rfcNumber = ref.match(/\d+/)[0]
+      normalizedReferences.push(`RFC ${rfcNumber}`)
+    } else if (/draft/i.test(ref)) {
+      const draftName = ref.trim().replace(/^\[|\]$/g, '').replace(/-\d{2}$/, '')
+      normalizedReferences.push(draftName)
+    }
+  })
+
+  return normalizedReferences
+}
diff --git a/lib/parsers/txt.mjs b/lib/parsers/txt.mjs
index 8649844..a7df9de 100644
--- a/lib/parsers/txt.mjs
+++ b/lib/parsers/txt.mjs
@@ -2,6 +2,7 @@ import { ValidationError } from '../helpers/error.mjs'
 import { DateTime } from 'luxon'
 import { FQDN_RE } from '../modules/fqdn.mjs'
 import { IPV4_LOOSE_RE, IPV6_LOOSE_RE } from '../modules/ip.mjs'
+import { rfcStatusHierarchy } from '../config/rfc-status-hierarchy.mjs'
 
 // Regex patterns
 const LINE_VALUES_EXTRACT_RE = /^(?<left>.*)\s{2,}(?<right>.*)$/
@@ -280,7 +281,9 @@ export async function parse (rawText, filename) {
 
             // --> Intended status
             if (values.left.startsWith('Intended')) {
-              data.header.intendedStatus = values.left.split(':')?.[1]?.trim()
+              const rawIntendedStatus = values.left.split(':')?.[1]?.trim()
+              const cleanIntendedStatus = extractStatusName(rawIntendedStatus)
+              data.header.intendedStatus = cleanIntendedStatus || rawIntendedStatus
             }
 
             // --> Obsoletes
@@ -291,7 +294,9 @@ export async function parse (rawText, filename) {
 
             // --> Category
             if (values.left.startsWith('Category')) {
-              data.header.category = values.left.split(':')?.[1]?.trim()
+              const rawCategory = values.left.split(':')?.[1]?.trim()
+              const cleanCategory = extractStatusName(rawCategory)
+              data.header.category = cleanCategory || rawCategory
             }
 
             // --> ISSN
@@ -459,3 +464,28 @@ function hasBoilerplateMatch (text, ...regexGroups) {
   }
   return false
 }
+
+/**
+ * Extracts the clean status name from a given status text using predefined regular expressions.
+ *
+ * This function iterates through an array of predefined RFC statuses, each containing
+ * a name, regex pattern, and weight. It tests the given status text against each regex
+ * and returns the corresponding clean status name if a match is found.
+ *
+ * @param {string} statusText - The raw status text to be processed (e.g., "Standards Track Juniper Networks").
+ * @returns {string|null} - The clean name of the status (e.g., "Proposed Standard") if matched,
+ *                          or `null` if no matching status is found.
+ *
+ * Example:
+ * const rawStatus = "Standards Track Juniper Networks";
+ * const cleanStatus = extractStatusName(rawStatus);
+ * console.log(cleanStatus); // Output: "Proposed Standard"
+ */
+function extractStatusName (statusText) {
+  for (const status of rfcStatusHierarchy) {
+    if (status.regex.test(statusText)) {
+      return status.name
+    }
+  }
+  return null
+}
diff --git a/lib/remote/downref.mjs b/lib/remote/downref.mjs
new file mode 100644
index 0000000..a5f81d5
--- /dev/null
+++ b/lib/remote/downref.mjs
@@ -0,0 +1,59 @@
+const DOWNREF_REGISTRY_URL = 'https://datatracker.ietf.org/doc/downref/'
+let cachedDownrefRegistry = null
+
+/**
+ * Fetch and parse the Downref Registry HTML to extract references.
+ * Caches the result to avoid redundant network requests.
+ * @returns {Promise<Set<string>>} - A set of references from the Downref Registry.
+ */
+async function fetchDownrefRegistry () {
+  if (cachedDownrefRegistry) {
+    return cachedDownrefRegistry
+  }
+
+  try {
+    const response = await fetch(DOWNREF_REGISTRY_URL, { credentials: 'omit' })
+    const html = await response.text()
+    const rfcRegex = /<a href="\/doc\/rfc(\d+)\/">([^<]+)<\/a>/g
+    const referenceRegex = /<a href="\/doc\/(?:rfc|draft-[^/]+)\/">([^<]+)<\/a>/g
+    const references = new Set()
+    let match
+
+    while ((match = rfcRegex.exec(html)) !== null) {
+      references.add(`RFC ${match[1].trim()}`)
+    }
+
+    while ((match = referenceRegex.exec(html)) !== null) {
+      references.add(match[1].trim())
+    }
+
+    cachedDownrefRegistry = references
+    return references
+  } catch (err) {
+    throw new Error(`Failed to fetch Downref Registry: ${err.message}`)
+  }
+}
+
+/**
+ * Validate references against the Downref Registry.
+ * @param {string[]} references - List of references to validate.
+ * @returns {Promise<string[]>} - A list of references found in the Downref Registry.
+ */
+export async function checkReferencesInDownrefs (references) {
+  const downrefRegistry = await fetchDownrefRegistry()
+
+  const foundDownrefs = []
+
+  references.forEach(ref => {
+    const refRegex = new RegExp(`\\b${ref.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i')
+
+    for (const downref of downrefRegistry) {
+      if (refRegex.test(downref)) {
+        foundDownrefs.push(ref)
+        break
+      }
+    }
+  })
+
+  return foundDownrefs
+}
diff --git a/tests/downref.test.js b/tests/downref.test.js
new file mode 100644
index 0000000..9da8306
--- /dev/null
+++ b/tests/downref.test.js
@@ -0,0 +1,88 @@
+import { describe, expect, test } from '@jest/globals'
+import { MODES } from '../lib/config/modes.mjs'
+import { toContainError, ValidationWarning, ValidationError } from '../lib/helpers/error.mjs'
+import { baseXMLDoc, baseTXTDoc } from './fixtures/base-doc.mjs'
+import { cloneDeep, set } from 'lodash-es'
+import { validateDownrefs } from '../lib/modules/downref.mjs'
+
+expect.extend({
+  toContainError
+})
+
+describe('validateDownrefs', () => {
+  describe('TXT Document Type', () => {
+    test('valid references with no downrefs', async () => {
+      const doc = cloneDeep(baseTXTDoc)
+      set(doc, 'data.extractedElements.referenceSectionRfc', ['4086', '8141'])
+      set(doc, 'data.extractedElements.referenceSectionDraftReferences', [
+        'draft-ietf-quic-http-34'
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toHaveLength(0)
+    })
+
+    test('invalid downref for a draft', async () => {
+      const doc = cloneDeep(baseTXTDoc)
+      set(doc, 'data.extractedElements.referenceSectionDraftReferences', [
+        'draft-ietf-emu-aka-pfs-34'
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationError)
+    })
+
+    test('invalid downref for an RFC', async () => {
+      const doc = cloneDeep(baseTXTDoc)
+      set(doc, 'data.extractedElements.referenceSectionRfc', ['952'])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationError)
+    })
+
+    test('FORGIVE_CHECKLIST mode returns warnings', async () => {
+      const doc = cloneDeep(baseTXTDoc)
+      set(doc, 'data.extractedElements.referenceSectionRfc', ['1094'])
+      set(doc, 'data.extractedElements.referenceSectionDraftReferences', [
+        'draft-ietf-quic-http-34'
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning)
+    })
+  })
+
+  describe('XML Document Type', () => {
+    test('valid XML references without downrefs', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        { reference: [{ _attr: { anchor: 'RFC4086' } }] },
+        { reference: [{ _attr: { anchor: 'RFC8141' } }] }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toHaveLength(0)
+    })
+
+    test('invalid XML downref for a draft', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        { reference: [{ _attr: { anchor: 'draft-ietf-emu-aka-pfs-34' } }] }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationError)
+    })
+
+    test('FORGIVE_CHECKLIST mode returns warnings for XML', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        { reference: [{ _attr: { anchor: 'RFC4187' } }] },
+        { reference: [{ _attr: { anchor: 'draft-ietf-quic-http-34' } }] }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning)
+    })
+  })
+})
diff --git a/tests/fixtures/base-doc.mjs b/tests/fixtures/base-doc.mjs
index 81641ef..95210fe 100644
--- a/tests/fixtures/base-doc.mjs
+++ b/tests/fixtures/base-doc.mjs
@@ -10,8 +10,33 @@ export const baseTXTDoc = {
       source: null,
       expires: null
     },
+    content: {
+      abstract: null,
+      introduction: null,
+      securityConsiderations: null,
+      authorAddress: null,
+      references: null,
+      ianaConsiderations: null
+    },
     title: null,
-    slug: null
+    slug: null,
+    extractedElements: {
+      fqdnDomains: [],
+      ipv4: [],
+      ipv6: [],
+      keywords2119: [],
+      boilerplate2119Keywords: [],
+      obsoletesRfc: [],
+      updatesRfc: [],
+      nonReferenceSectionRfc: [],
+      referenceSectionRfc: [],
+      nonReferenceSectionDraftReferences: [],
+      referenceSectionDraftReferences: []
+    },
+    possibleIssues: {
+      inlineCode: [],
+      misspeled2119Keywords: []
+    }
   }
 }
 

From 2b6987357786d3f39205a99c3fc3a1eefa42cc74 Mon Sep 17 00:00:00 2001
From: Dmutre <104783173+Dmutre@users.noreply.github.com>
Date: Thu, 20 Feb 2025 14:19:05 +0100
Subject: [PATCH 2/2] feat: covered new parser features with tests, added new
 tests for function

---
 tests/downref.test.js | 47 +++++++++++++++++++++++++++++++++++
 tests/parser.test.js  | 58 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/tests/downref.test.js b/tests/downref.test.js
index 9da8306..156bb6c 100644
--- a/tests/downref.test.js
+++ b/tests/downref.test.js
@@ -84,5 +84,52 @@ describe('validateDownrefs', () => {
       const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST })
       expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning)
     })
+
+    test('valid XML references without downrefs (multiple references in a section)', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        {
+          reference: [
+            { _attr: { anchor: 'RFC2119' } },
+            { _attr: { anchor: 'RFC8174' } },
+            { _attr: { anchor: 'RFC1234' } }
+          ]
+        }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toHaveLength(0)
+    })
+
+    test('invalid XML downref when multiple references exist in a section', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        {
+          reference: [
+            { _attr: { anchor: 'RFC2119' } },
+            { _attr: { anchor: 'RFC8174' } },
+            { _attr: { anchor: 'draft-ietf-emu-aka-pfs-34' } } // This is a downref
+          ]
+        }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.NORMAL })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationError)
+    })
+
+    test('FORGIVE_CHECKLIST mode returns warnings when multiple references exist', async () => {
+      const doc = cloneDeep(baseXMLDoc)
+      set(doc, 'data.rfc.back.references.references', [
+        {
+          reference: [
+            { _attr: { anchor: 'RFC4187' } },
+            { _attr: { anchor: 'draft-ietf-quic-http-34' } } // This is a downref
+          ]
+        }
+      ])
+
+      const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST })
+      expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning)
+    })
   })
 })
diff --git a/tests/parser.test.js b/tests/parser.test.js
index 06acf8b..ebea046 100644
--- a/tests/parser.test.js
+++ b/tests/parser.test.js
@@ -444,3 +444,61 @@ describe('Parsing similar to RFC2119 boilerplate text', () => {
     expect(result.data.boilerplate.similar2119boilerplate).toEqual(true)
   })
 })
+
+describe('Parsing Category and Intended Status from document header', () => {
+  test('Parses Category correctly', async () => {
+    const txt = `
+      ${metaTXTBlock.replace('Intended status: Standards Track', 'Category: Standards Track')}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${textWithRFC2119KeywordsTXTBlock}
+    `
+
+    const result = await parse(txt, 'test-document.txt')
+    expect(result.data.header.category).toBe('Standards Track')
+  })
+
+  test('Parses Intended Status correctly', async () => {
+    const txt = `
+      ${metaTXTBlock.replace('Intended status: Standards Track', 'Intended status: Experimental')}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${textWithRFC2119KeywordsTXTBlock}
+    `
+
+    const result = await parse(txt, 'test-document.txt')
+    expect(result.data.header.intendedStatus).toBe('Experimental')
+  })
+
+  test('Handles missing status or category', async () => {
+    const txt = `
+      ${metaTXTBlock.replace('Intended status: Standards Track', '')}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${textWithRFC2119KeywordsTXTBlock}
+    `
+
+    const result = await parse(txt, 'test-document.txt')
+    expect(result.data.header.category).toBeUndefined()
+  })
+
+  test('Handles Unknown Intended status', async () => {
+    const txt = `
+      ${metaTXTBlock.replace('Standards Track', 'Unknown')}
+      ${tableOfContentsTXTBlock}
+      ${abstractWithReferencesTXTBlock}
+      ${introductionTXTBlock}
+      ${securityConsiderationsTXTBlock}
+      ${textWithRFC2119KeywordsTXTBlock}
+    `
+
+    const result = await parse(txt, 'test-document.txt')
+    expect(result.data.header.intendedStatus).toBe('Unknown')
+  })
+})