ietf-tools · Dmutre · Jan 9, 2025 · Feb 20, 2025 · rjsparks · Feb 21, 2025
@@ -0,0 +1,57 @@
+export const rfcStatusHierarchy = [
+  {
+    name: 'Internet Standard',
+    regex: /internet standard/ig,
+    weight: 7
+  },
+  {
+    name: 'Draft Standard',
+    regex: /draft standard/ig,
+    weight: 6
+  },
+  {
+    name: 'Proposed Standard',
+    regex: /proposed standard/ig,
+    weight: 5
+  },
+  {
+    name: 'Standards Track',
+    regex: /standards track/ig,
+    weight: 5
+  },
+  {
+    name: 'Best Current Practice',
+    regex: /best current practice|bcp/ig,
+    weight: 4
+  },
+  {
+    name: 'Informational',
+    regex: /informational/ig,
+    weight: 3
+  },
+  {
+    name: 'Experimental',
+    regex: /experimental/ig,
+    weight: 2
+  },
+  {
+    name: 'Historic',
+    regex: /historic/ig,
+    weight: 1
+  }
+]
+
+/**
+ * Extracts the highest status weight based on RFC status hierarchy.
+ *
+ * @param {string} statusText - The status text to check.
+ * @returns {number|null} - The weight of the status or null if not found.
+ */
+export function getStatusWeight (statusText) {
+  for (const status of rfcStatusHierarchy) {
+    if (status.regex.test(statusText)) {
+      return status.weight
+    }
+  }
+  return null
+}
@@ -45,6 +45,9 @@ import {
   validateLineLength,
   validateCodeComments
 } from './modules/txt.mjs'
+import {
+  validateDownrefs
+} from './modules/downref.mjs'
 
 /**
  * Check Nits
@@ -134,6 +137,8 @@ export async function checkNits (raw, filename, {
   result.push(...await validateCategory(doc, { mode }))
   progressReport('Validating Version...')
   result.push(...await validateVersion(doc, { mode, offline }))
+  progressReport('Validating downrefs in text...')
+  result.push(...await validateDownrefs(doc, { mode }))
 
   // Run XML-only validations
   if (doc.type === 'xml') {

@@ -0,0 +1,121 @@
+import { ValidationWarning, ValidationError } from '../helpers/error.mjs'
+import { checkReferencesInDownrefs } from '../remote/downref.mjs'
+import { MODES } from '../config/modes.mjs'
+import { findAllDescendantsWith } from '../helpers/traversal.mjs'
+
+/**
+ * Validate document references for RFCs and Drafts downrefs.
+ *
+ * @param {Object} doc - Document to validate
+ * @param {Object} [opts] - Additional options
+ * @param {number} [opts.mode=0] - Validation mode to use
+ * @param {boolean} [opts.offline=false] - Skip fetching remote data if true
+ * @returns {Array} - List of errors/warnings/comments
+ */
+export async function validateDownrefs (doc, { mode = MODES.NORMAL } = {}) {
+  const result = []
+
+  if (mode === MODES.SUBMISSION) {
+    return result
+  }
+
+  switch (doc.type) {
+    case 'txt': {
+      const { referenceSectionRfc, referenceSectionDraftReferences } = doc.data.extractedElements
+      const rfcs = referenceSectionRfc.map((rfcNumber) => `RFC ${rfcNumber}`)
+      const drafts = normalizeDraftReferences(referenceSectionDraftReferences)
+      const downrefMatches = await checkReferencesInDownrefs([...rfcs, ...drafts])
+
+      downrefMatches.forEach((match) => {
+        switch (mode) {
+          case MODES.NORMAL: {
+            result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+          case MODES.FORGIVE_CHECKLIST: {
+            result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+        }
+      })
+
+      break
+    }
+    case 'xml': {
+      const referencesSections = doc.data.rfc.back.references.references
+      const definedReferences = findAllDescendantsWith(referencesSections, (value, key) => key === '_attr' && value.anchor)
+        .flatMap(match =>
+          Array.isArray(match.value.anchor)
+            ? match.value.anchor
+            : [match.value.anchor]
+        )
+        .filter(Boolean)
+      const normilizedReferences = normalizeXmlReferences(definedReferences)
+
+      const downrefMatches = await checkReferencesInDownrefs(normilizedReferences)
+
+      downrefMatches.forEach((match) => {
+        switch (mode) {
+          case MODES.NORMAL: {
+            result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+          case MODES.FORGIVE_CHECKLIST: {
+            result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
+              ref: `https://datatracker.ietf.org/doc/${match}`
+            }))
+            break
+          }
+        }
+      })
+      break
+    }
+  }
+
+  return result
+}
+
+/**
+ * Normalize references by removing brackets, versions, and checking for drafts.
+ *
+ * @param {Array} references - Array of textual references.
+ * @returns {Array} - Array of normalized references containing "draft".
+ */
+function normalizeDraftReferences (references) {
+  return references
+    .map((ref) => {
+      let normalized = ref.replace(/^\[|\]$/g, '')
+      normalized = normalized.replace(/-\d{2}$/, '')
+
+      return normalized
+    })
+    .filter((ref) => ref.toLowerCase().includes('draft'))
+}
+
+/**
+ * Normalize XML references to drafts and RFCs.
+ *
+ * @param {Array} references - Array of reference strings.
+ * @returns {Array} - Normalized references including only drafts and RFCs.
+ */
+function normalizeXmlReferences (references) {
+  const normalizedReferences = []
+
+  references.forEach((ref) => {
+    if (/^RFC\d+$/i.test(ref)) {
+      const rfcNumber = ref.match(/\d+/)[0]
+      normalizedReferences.push(`RFC ${rfcNumber}`)
+    } else if (/draft/i.test(ref)) {
+      const draftName = ref.trim().replace(/^\[|\]$/g, '').replace(/-\d{2}$/, '')
+      normalizedReferences.push(draftName)
+    }
+  })
+
+  return normalizedReferences
+}
@@ -2,6 +2,7 @@ import { ValidationError } from '../helpers/error.mjs'
 import { DateTime } from 'luxon'
 import { FQDN_RE } from '../modules/fqdn.mjs'
 import { IPV4_LOOSE_RE, IPV6_LOOSE_RE } from '../modules/ip.mjs'
+import { rfcStatusHierarchy } from '../config/rfc-status-hierarchy.mjs'
 
 // Regex patterns
 const LINE_VALUES_EXTRACT_RE = /^(?<left>.*)\s{2,}(?<right>.*)$/
@@ -280,7 +281,9 @@ export async function parse (rawText, filename) {
 
             // --> Intended status
             if (values.left.startsWith('Intended')) {
-              data.header.intendedStatus = values.left.split(':')?.[1]?.trim()
+              const rawIntendedStatus = values.left.split(':')?.[1]?.trim()
+              const cleanIntendedStatus = extractStatusName(rawIntendedStatus)
+              data.header.intendedStatus = cleanIntendedStatus || rawIntendedStatus
             }
 
             // --> Obsoletes
@@ -291,7 +294,9 @@ export async function parse (rawText, filename) {
 
             // --> Category
             if (values.left.startsWith('Category')) {
-              data.header.category = values.left.split(':')?.[1]?.trim()
+              const rawCategory = values.left.split(':')?.[1]?.trim()
+              const cleanCategory = extractStatusName(rawCategory)
+              data.header.category = cleanCategory || rawCategory
             }
 
             // --> ISSN
@@ -459,3 +464,28 @@ function hasBoilerplateMatch (text, ...regexGroups) {
   }
   return false
 }
+
+/**
+ * Extracts the clean status name from a given status text using predefined regular expressions.
+ *
+ * This function iterates through an array of predefined RFC statuses, each containing
+ * a name, regex pattern, and weight. It tests the given status text against each regex
+ * and returns the corresponding clean status name if a match is found.
+ *
+ * @param {string} statusText - The raw status text to be processed (e.g., "Standards Track Juniper Networks").
+ * @returns {string|null} - The clean name of the status (e.g., "Proposed Standard") if matched,
+ *                          or `null` if no matching status is found.
+ *
+ * Example:
+ * const rawStatus = "Standards Track Juniper Networks";
+ * const cleanStatus = extractStatusName(rawStatus);
+ * console.log(cleanStatus); // Output: "Proposed Standard"
+ */
+function extractStatusName (statusText) {
+  for (const status of rfcStatusHierarchy) {
+    if (status.regex.test(statusText)) {
+      return status.name
+    }
+  }
+  return null
+}
@@ -0,0 +1,59 @@
+const DOWNREF_REGISTRY_URL = 'https://datatracker.ietf.org/doc/downref/'
+let cachedDownrefRegistry = null
+
+/**
+ * Fetch and parse the Downref Registry HTML to extract references.
+ * Caches the result to avoid redundant network requests.
+ * @returns {Promise<Set<string>>} - A set of references from the Downref Registry.
+ */
+async function fetchDownrefRegistry () {
+  if (cachedDownrefRegistry) {
+    return cachedDownrefRegistry
+  }
+
+  try {
+    const response = await fetch(DOWNREF_REGISTRY_URL, { credentials: 'omit' })
+    const html = await response.text()
+    const rfcRegex = /<a href="\/doc\/rfc(\d+)\/">([^<]+)<\/a>/g
+    const referenceRegex = /<a href="\/doc\/(?:rfc|draft-[^/]+)\/">([^<]+)<\/a>/g
+    const references = new Set()
+    let match
+
+    while ((match = rfcRegex.exec(html)) !== null) {
+      references.add(`RFC ${match[1].trim()}`)
+    }
+
+    while ((match = referenceRegex.exec(html)) !== null) {
+      references.add(match[1].trim())
+    }
+
+    cachedDownrefRegistry = references
+    return references
+  } catch (err) {
+    throw new Error(`Failed to fetch Downref Registry: ${err.message}`)
+  }
+}
+
+/**
+ * Validate references against the Downref Registry.
+ * @param {string[]} references - List of references to validate.
+ * @returns {Promise<string[]>} - A list of references found in the Downref Registry.
+ */
+export async function checkReferencesInDownrefs (references) {
+  const downrefRegistry = await fetchDownrefRegistry()
+
+  const foundDownrefs = []
+
+  references.forEach(ref => {
+    const refRegex = new RegExp(`\\b${ref.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i')
+
+    for (const downref of downrefRegistry) {
+      if (refRegex.test(downref)) {
+        foundDownrefs.push(ref)
+        break
+      }
+    }
+  })
+
+  return foundDownrefs
+}