From 9a0fcf6f37b76aa6dbd0fabb273306a7a9190fce Mon Sep 17 00:00:00 2001 From: Dmutre <104783173+Dmutre@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:26:57 +0100 Subject: [PATCH 1/2] feat: a reference appears to be a downref (noting if reference appears in the downref registry) - added validation for downref validation - added utils and remote function - added tests - tested app --- lib/config/rfc-status-hierarchy.mjs | 57 +++++++++++++ lib/index.mjs | 5 ++ lib/modules/downref.mjs | 121 ++++++++++++++++++++++++++++ lib/parsers/txt.mjs | 34 +++++++- lib/remote/downref.mjs | 59 ++++++++++++++ tests/downref.test.js | 88 ++++++++++++++++++++ tests/fixtures/base-doc.mjs | 27 ++++++- 7 files changed, 388 insertions(+), 3 deletions(-) create mode 100644 lib/config/rfc-status-hierarchy.mjs create mode 100644 lib/modules/downref.mjs create mode 100644 lib/remote/downref.mjs create mode 100644 tests/downref.test.js diff --git a/lib/config/rfc-status-hierarchy.mjs b/lib/config/rfc-status-hierarchy.mjs new file mode 100644 index 0000000..596b2bb --- /dev/null +++ b/lib/config/rfc-status-hierarchy.mjs @@ -0,0 +1,57 @@ +export const rfcStatusHierarchy = [ + { + name: 'Internet Standard', + regex: /internet standard/ig, + weight: 7 + }, + { + name: 'Draft Standard', + regex: /draft standard/ig, + weight: 6 + }, + { + name: 'Proposed Standard', + regex: /proposed standard/ig, + weight: 5 + }, + { + name: 'Standards Track', + regex: /standards track/ig, + weight: 5 + }, + { + name: 'Best Current Practice', + regex: /best current practice|bcp/ig, + weight: 4 + }, + { + name: 'Informational', + regex: /informational/ig, + weight: 3 + }, + { + name: 'Experimental', + regex: /experimental/ig, + weight: 2 + }, + { + name: 'Historic', + regex: /historic/ig, + weight: 1 + } +] + +/** + * Extracts the highest status weight based on RFC status hierarchy. + * + * @param {string} statusText - The status text to check. + * @returns {number|null} - The weight of the status or null if not found. + */ +export function getStatusWeight (statusText) { + for (const status of rfcStatusHierarchy) { + if (status.regex.test(statusText)) { + return status.weight + } + } + return null +} diff --git a/lib/index.mjs b/lib/index.mjs index 9bf450b..41272be 100644 --- a/lib/index.mjs +++ b/lib/index.mjs @@ -45,6 +45,9 @@ import { validateLineLength, validateCodeComments } from './modules/txt.mjs' +import { + validateDownrefs +} from './modules/downref.mjs' /** * Check Nits @@ -134,6 +137,8 @@ export async function checkNits (raw, filename, { result.push(...await validateCategory(doc, { mode })) progressReport('Validating Version...') result.push(...await validateVersion(doc, { mode, offline })) + progressReport('Validating downrefs in text...') + result.push(...await validateDownrefs(doc, { mode })) // Run XML-only validations if (doc.type === 'xml') { diff --git a/lib/modules/downref.mjs b/lib/modules/downref.mjs new file mode 100644 index 0000000..fc65896 --- /dev/null +++ b/lib/modules/downref.mjs @@ -0,0 +1,121 @@ +import { ValidationWarning, ValidationError } from '../helpers/error.mjs' +import { checkReferencesInDownrefs } from '../remote/downref.mjs' +import { MODES } from '../config/modes.mjs' +import { findAllDescendantsWith } from '../helpers/traversal.mjs' + +/** + * Validate document references for RFCs and Drafts downrefs. + * + * @param {Object} doc - Document to validate + * @param {Object} [opts] - Additional options + * @param {number} [opts.mode=0] - Validation mode to use + * @param {boolean} [opts.offline=false] - Skip fetching remote data if true + * @returns {Array} - List of errors/warnings/comments + */ +export async function validateDownrefs (doc, { mode = MODES.NORMAL } = {}) { + const result = [] + + if (mode === MODES.SUBMISSION) { + return result + } + + switch (doc.type) { + case 'txt': { + const { referenceSectionRfc, referenceSectionDraftReferences } = doc.data.extractedElements + const rfcs = referenceSectionRfc.map((rfcNumber) => `RFC ${rfcNumber}`) + const drafts = normalizeDraftReferences(referenceSectionDraftReferences) + const downrefMatches = await checkReferencesInDownrefs([...rfcs, ...drafts]) + + downrefMatches.forEach((match) => { + switch (mode) { + case MODES.NORMAL: { + result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, { + ref: `https://datatracker.ietf.org/doc/${match}` + })) + break + } + case MODES.FORGIVE_CHECKLIST: { + result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, { + ref: `https://datatracker.ietf.org/doc/${match}` + })) + break + } + } + }) + + break + } + case 'xml': { + const referencesSections = doc.data.rfc.back.references.references + const definedReferences = findAllDescendantsWith(referencesSections, (value, key) => key === '_attr' && value.anchor) + .flatMap(match => + Array.isArray(match.value.anchor) + ? match.value.anchor + : [match.value.anchor] + ) + .filter(Boolean) + const normilizedReferences = normalizeXmlReferences(definedReferences) + + const downrefMatches = await checkReferencesInDownrefs(normilizedReferences) + + downrefMatches.forEach((match) => { + switch (mode) { + case MODES.NORMAL: { + result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, { + ref: `https://datatracker.ietf.org/doc/${match}` + })) + break + } + case MODES.FORGIVE_CHECKLIST: { + result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, { + ref: `https://datatracker.ietf.org/doc/${match}` + })) + break + } + } + }) + break + } + } + + return result +} + +/** + * Normalize references by removing brackets, versions, and checking for drafts. + * + * @param {Array} references - Array of textual references. + * @returns {Array} - Array of normalized references containing "draft". + */ +function normalizeDraftReferences (references) { + return references + .map((ref) => { + let normalized = ref.replace(/^\[|\]$/g, '') + normalized = normalized.replace(/-\d{2}$/, '') + + return normalized + }) + .filter((ref) => ref.toLowerCase().includes('draft')) +} + +/** + * Normalize XML references to drafts and RFCs. + * + * @param {Array} references - Array of reference strings. + * @returns {Array} - Normalized references including only drafts and RFCs. + */ +function normalizeXmlReferences (references) { + const normalizedReferences = [] + + references.forEach((ref) => { + if (/^RFC\d+$/i.test(ref)) { + const rfcNumber = ref.match(/\d+/)[0] + normalizedReferences.push(`RFC ${rfcNumber}`) + } else if (/draft/i.test(ref)) { + const draftName = ref.trim().replace(/^\[|\]$/g, '').replace(/-\d{2}$/, '') + normalizedReferences.push(draftName) + } + }) + + return normalizedReferences +} diff --git a/lib/parsers/txt.mjs b/lib/parsers/txt.mjs index 8649844..a7df9de 100644 --- a/lib/parsers/txt.mjs +++ b/lib/parsers/txt.mjs @@ -2,6 +2,7 @@ import { ValidationError } from '../helpers/error.mjs' import { DateTime } from 'luxon' import { FQDN_RE } from '../modules/fqdn.mjs' import { IPV4_LOOSE_RE, IPV6_LOOSE_RE } from '../modules/ip.mjs' +import { rfcStatusHierarchy } from '../config/rfc-status-hierarchy.mjs' // Regex patterns const LINE_VALUES_EXTRACT_RE = /^(?.*)\s{2,}(?.*)$/ @@ -280,7 +281,9 @@ export async function parse (rawText, filename) { // --> Intended status if (values.left.startsWith('Intended')) { - data.header.intendedStatus = values.left.split(':')?.[1]?.trim() + const rawIntendedStatus = values.left.split(':')?.[1]?.trim() + const cleanIntendedStatus = extractStatusName(rawIntendedStatus) + data.header.intendedStatus = cleanIntendedStatus || rawIntendedStatus } // --> Obsoletes @@ -291,7 +294,9 @@ export async function parse (rawText, filename) { // --> Category if (values.left.startsWith('Category')) { - data.header.category = values.left.split(':')?.[1]?.trim() + const rawCategory = values.left.split(':')?.[1]?.trim() + const cleanCategory = extractStatusName(rawCategory) + data.header.category = cleanCategory || rawCategory } // --> ISSN @@ -459,3 +464,28 @@ function hasBoilerplateMatch (text, ...regexGroups) { } return false } + +/** + * Extracts the clean status name from a given status text using predefined regular expressions. + * + * This function iterates through an array of predefined RFC statuses, each containing + * a name, regex pattern, and weight. It tests the given status text against each regex + * and returns the corresponding clean status name if a match is found. + * + * @param {string} statusText - The raw status text to be processed (e.g., "Standards Track Juniper Networks"). + * @returns {string|null} - The clean name of the status (e.g., "Proposed Standard") if matched, + * or `null` if no matching status is found. + * + * Example: + * const rawStatus = "Standards Track Juniper Networks"; + * const cleanStatus = extractStatusName(rawStatus); + * console.log(cleanStatus); // Output: "Proposed Standard" + */ +function extractStatusName (statusText) { + for (const status of rfcStatusHierarchy) { + if (status.regex.test(statusText)) { + return status.name + } + } + return null +} diff --git a/lib/remote/downref.mjs b/lib/remote/downref.mjs new file mode 100644 index 0000000..a5f81d5 --- /dev/null +++ b/lib/remote/downref.mjs @@ -0,0 +1,59 @@ +const DOWNREF_REGISTRY_URL = 'https://datatracker.ietf.org/doc/downref/' +let cachedDownrefRegistry = null + +/** + * Fetch and parse the Downref Registry HTML to extract references. + * Caches the result to avoid redundant network requests. + * @returns {Promise>} - A set of references from the Downref Registry. + */ +async function fetchDownrefRegistry () { + if (cachedDownrefRegistry) { + return cachedDownrefRegistry + } + + try { + const response = await fetch(DOWNREF_REGISTRY_URL, { credentials: 'omit' }) + const html = await response.text() + const rfcRegex = /([^<]+)<\/a>/g + const referenceRegex = /([^<]+)<\/a>/g + const references = new Set() + let match + + while ((match = rfcRegex.exec(html)) !== null) { + references.add(`RFC ${match[1].trim()}`) + } + + while ((match = referenceRegex.exec(html)) !== null) { + references.add(match[1].trim()) + } + + cachedDownrefRegistry = references + return references + } catch (err) { + throw new Error(`Failed to fetch Downref Registry: ${err.message}`) + } +} + +/** + * Validate references against the Downref Registry. + * @param {string[]} references - List of references to validate. + * @returns {Promise} - A list of references found in the Downref Registry. + */ +export async function checkReferencesInDownrefs (references) { + const downrefRegistry = await fetchDownrefRegistry() + + const foundDownrefs = [] + + references.forEach(ref => { + const refRegex = new RegExp(`\\b${ref.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i') + + for (const downref of downrefRegistry) { + if (refRegex.test(downref)) { + foundDownrefs.push(ref) + break + } + } + }) + + return foundDownrefs +} diff --git a/tests/downref.test.js b/tests/downref.test.js new file mode 100644 index 0000000..9da8306 --- /dev/null +++ b/tests/downref.test.js @@ -0,0 +1,88 @@ +import { describe, expect, test } from '@jest/globals' +import { MODES } from '../lib/config/modes.mjs' +import { toContainError, ValidationWarning, ValidationError } from '../lib/helpers/error.mjs' +import { baseXMLDoc, baseTXTDoc } from './fixtures/base-doc.mjs' +import { cloneDeep, set } from 'lodash-es' +import { validateDownrefs } from '../lib/modules/downref.mjs' + +expect.extend({ + toContainError +}) + +describe('validateDownrefs', () => { + describe('TXT Document Type', () => { + test('valid references with no downrefs', async () => { + const doc = cloneDeep(baseTXTDoc) + set(doc, 'data.extractedElements.referenceSectionRfc', ['4086', '8141']) + set(doc, 'data.extractedElements.referenceSectionDraftReferences', [ + 'draft-ietf-quic-http-34' + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toHaveLength(0) + }) + + test('invalid downref for a draft', async () => { + const doc = cloneDeep(baseTXTDoc) + set(doc, 'data.extractedElements.referenceSectionDraftReferences', [ + 'draft-ietf-emu-aka-pfs-34' + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationError) + }) + + test('invalid downref for an RFC', async () => { + const doc = cloneDeep(baseTXTDoc) + set(doc, 'data.extractedElements.referenceSectionRfc', ['952']) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationError) + }) + + test('FORGIVE_CHECKLIST mode returns warnings', async () => { + const doc = cloneDeep(baseTXTDoc) + set(doc, 'data.extractedElements.referenceSectionRfc', ['1094']) + set(doc, 'data.extractedElements.referenceSectionDraftReferences', [ + 'draft-ietf-quic-http-34' + ]) + + const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning) + }) + }) + + describe('XML Document Type', () => { + test('valid XML references without downrefs', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { reference: [{ _attr: { anchor: 'RFC4086' } }] }, + { reference: [{ _attr: { anchor: 'RFC8141' } }] } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toHaveLength(0) + }) + + test('invalid XML downref for a draft', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { reference: [{ _attr: { anchor: 'draft-ietf-emu-aka-pfs-34' } }] } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationError) + }) + + test('FORGIVE_CHECKLIST mode returns warnings for XML', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { reference: [{ _attr: { anchor: 'RFC4187' } }] }, + { reference: [{ _attr: { anchor: 'draft-ietf-quic-http-34' } }] } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning) + }) + }) +}) diff --git a/tests/fixtures/base-doc.mjs b/tests/fixtures/base-doc.mjs index 81641ef..95210fe 100644 --- a/tests/fixtures/base-doc.mjs +++ b/tests/fixtures/base-doc.mjs @@ -10,8 +10,33 @@ export const baseTXTDoc = { source: null, expires: null }, + content: { + abstract: null, + introduction: null, + securityConsiderations: null, + authorAddress: null, + references: null, + ianaConsiderations: null + }, title: null, - slug: null + slug: null, + extractedElements: { + fqdnDomains: [], + ipv4: [], + ipv6: [], + keywords2119: [], + boilerplate2119Keywords: [], + obsoletesRfc: [], + updatesRfc: [], + nonReferenceSectionRfc: [], + referenceSectionRfc: [], + nonReferenceSectionDraftReferences: [], + referenceSectionDraftReferences: [] + }, + possibleIssues: { + inlineCode: [], + misspeled2119Keywords: [] + } } } From 2b6987357786d3f39205a99c3fc3a1eefa42cc74 Mon Sep 17 00:00:00 2001 From: Dmutre <104783173+Dmutre@users.noreply.github.com> Date: Thu, 20 Feb 2025 14:19:05 +0100 Subject: [PATCH 2/2] feat: covered new parser features with tests, added new tests for function --- tests/downref.test.js | 47 +++++++++++++++++++++++++++++++++++ tests/parser.test.js | 58 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/tests/downref.test.js b/tests/downref.test.js index 9da8306..156bb6c 100644 --- a/tests/downref.test.js +++ b/tests/downref.test.js @@ -84,5 +84,52 @@ describe('validateDownrefs', () => { const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST }) expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning) }) + + test('valid XML references without downrefs (multiple references in a section)', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } }, + { _attr: { anchor: 'RFC1234' } } + ] + } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toHaveLength(0) + }) + + test('invalid XML downref when multiple references exist in a section', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { + reference: [ + { _attr: { anchor: 'RFC2119' } }, + { _attr: { anchor: 'RFC8174' } }, + { _attr: { anchor: 'draft-ietf-emu-aka-pfs-34' } } // This is a downref + ] + } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.NORMAL }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationError) + }) + + test('FORGIVE_CHECKLIST mode returns warnings when multiple references exist', async () => { + const doc = cloneDeep(baseXMLDoc) + set(doc, 'data.rfc.back.references.references', [ + { + reference: [ + { _attr: { anchor: 'RFC4187' } }, + { _attr: { anchor: 'draft-ietf-quic-http-34' } } // This is a downref + ] + } + ]) + + const result = await validateDownrefs(doc, { mode: MODES.FORGIVE_CHECKLIST }) + expect(result).toContainError('DOWNREF_DRAFT', ValidationWarning) + }) }) }) diff --git a/tests/parser.test.js b/tests/parser.test.js index 06acf8b..ebea046 100644 --- a/tests/parser.test.js +++ b/tests/parser.test.js @@ -444,3 +444,61 @@ describe('Parsing similar to RFC2119 boilerplate text', () => { expect(result.data.boilerplate.similar2119boilerplate).toEqual(true) }) }) + +describe('Parsing Category and Intended Status from document header', () => { + test('Parses Category correctly', async () => { + const txt = ` + ${metaTXTBlock.replace('Intended status: Standards Track', 'Category: Standards Track')} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${textWithRFC2119KeywordsTXTBlock} + ` + + const result = await parse(txt, 'test-document.txt') + expect(result.data.header.category).toBe('Standards Track') + }) + + test('Parses Intended Status correctly', async () => { + const txt = ` + ${metaTXTBlock.replace('Intended status: Standards Track', 'Intended status: Experimental')} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${textWithRFC2119KeywordsTXTBlock} + ` + + const result = await parse(txt, 'test-document.txt') + expect(result.data.header.intendedStatus).toBe('Experimental') + }) + + test('Handles missing status or category', async () => { + const txt = ` + ${metaTXTBlock.replace('Intended status: Standards Track', '')} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${textWithRFC2119KeywordsTXTBlock} + ` + + const result = await parse(txt, 'test-document.txt') + expect(result.data.header.category).toBeUndefined() + }) + + test('Handles Unknown Intended status', async () => { + const txt = ` + ${metaTXTBlock.replace('Standards Track', 'Unknown')} + ${tableOfContentsTXTBlock} + ${abstractWithReferencesTXTBlock} + ${introductionTXTBlock} + ${securityConsiderationsTXTBlock} + ${textWithRFC2119KeywordsTXTBlock} + ` + + const result = await parse(txt, 'test-document.txt') + expect(result.data.header.intendedStatus).toBe('Unknown') + }) +})