Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: a reference appears to be a downref (noting if reference appears in the downref registry) #80

Open
wants to merge 2 commits into
base: v3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions lib/config/rfc-status-hierarchy.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
export const rfcStatusHierarchy = [
{
name: 'Internet Standard',
regex: /internet standard/ig,
weight: 7
},
{
name: 'Draft Standard',
regex: /draft standard/ig,
weight: 6
},
{
name: 'Proposed Standard',
regex: /proposed standard/ig,
weight: 5
},
{
name: 'Standards Track',
regex: /standards track/ig,
weight: 5
},
{
name: 'Best Current Practice',
regex: /best current practice|bcp/ig,
weight: 4
},
{
name: 'Informational',
regex: /informational/ig,
weight: 3
},
{
name: 'Experimental',
regex: /experimental/ig,
weight: 2
},
{
name: 'Historic',
regex: /historic/ig,
weight: 1
}
]

/**
* Extracts the highest status weight based on RFC status hierarchy.
*
* @param {string} statusText - The status text to check.
* @returns {number|null} - The weight of the status or null if not found.
*/
export function getStatusWeight (statusText) {
for (const status of rfcStatusHierarchy) {
if (status.regex.test(statusText)) {
return status.weight
}
}
return null
}
Comment on lines +50 to +57
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, and the notion of the weights in the heirarchy are not used?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh - I see - you've used it in later PRs.
I'll make my comments on the last one in the stack related to downrefs instead.

5 changes: 5 additions & 0 deletions lib/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ import {
validateLineLength,
validateCodeComments
} from './modules/txt.mjs'
import {
validateDownrefs
} from './modules/downref.mjs'

/**
* Check Nits
Expand Down Expand Up @@ -134,6 +137,8 @@ export async function checkNits (raw, filename, {
result.push(...await validateCategory(doc, { mode }))
progressReport('Validating Version...')
result.push(...await validateVersion(doc, { mode, offline }))
progressReport('Validating downrefs in text...')
result.push(...await validateDownrefs(doc, { mode }))

// Run XML-only validations
if (doc.type === 'xml') {
Expand Down
121 changes: 121 additions & 0 deletions lib/modules/downref.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import { ValidationWarning, ValidationError } from '../helpers/error.mjs'
import { checkReferencesInDownrefs } from '../remote/downref.mjs'
import { MODES } from '../config/modes.mjs'
import { findAllDescendantsWith } from '../helpers/traversal.mjs'

/**
* Validate document references for RFCs and Drafts downrefs.
*
* @param {Object} doc - Document to validate
* @param {Object} [opts] - Additional options
* @param {number} [opts.mode=0] - Validation mode to use
* @param {boolean} [opts.offline=false] - Skip fetching remote data if true
* @returns {Array} - List of errors/warnings/comments
*/
export async function validateDownrefs (doc, { mode = MODES.NORMAL } = {}) {
const result = []

if (mode === MODES.SUBMISSION) {
return result
}

switch (doc.type) {
case 'txt': {
const { referenceSectionRfc, referenceSectionDraftReferences } = doc.data.extractedElements
const rfcs = referenceSectionRfc.map((rfcNumber) => `RFC ${rfcNumber}`)
const drafts = normalizeDraftReferences(referenceSectionDraftReferences)
const downrefMatches = await checkReferencesInDownrefs([...rfcs, ...drafts])

downrefMatches.forEach((match) => {
switch (mode) {
case MODES.NORMAL: {
result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
ref: `https://datatracker.ietf.org/doc/${match}`
}))
break
}
case MODES.FORGIVE_CHECKLIST: {
result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
ref: `https://datatracker.ietf.org/doc/${match}`
}))
break
}
}
})

break
}
case 'xml': {
const referencesSections = doc.data.rfc.back.references.references
const definedReferences = findAllDescendantsWith(referencesSections, (value, key) => key === '_attr' && value.anchor)
.flatMap(match =>
Array.isArray(match.value.anchor)
? match.value.anchor
: [match.value.anchor]
)
.filter(Boolean)
const normilizedReferences = normalizeXmlReferences(definedReferences)

const downrefMatches = await checkReferencesInDownrefs(normilizedReferences)

downrefMatches.forEach((match) => {
switch (mode) {
case MODES.NORMAL: {
result.push(new ValidationError('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
ref: `https://datatracker.ietf.org/doc/${match}`
}))
break
}
case MODES.FORGIVE_CHECKLIST: {
result.push(new ValidationWarning('DOWNREF_DRAFT', `Draft ${match} is listed in the Downref Registry.`, {
ref: `https://datatracker.ietf.org/doc/${match}`
}))
break
}
}
})
break
}
}

return result
}

/**
* Normalize references by removing brackets, versions, and checking for drafts.
*
* @param {Array} references - Array of textual references.
* @returns {Array} - Array of normalized references containing "draft".
*/
function normalizeDraftReferences (references) {
return references
.map((ref) => {
let normalized = ref.replace(/^\[|\]$/g, '')
normalized = normalized.replace(/-\d{2}$/, '')

return normalized
})
.filter((ref) => ref.toLowerCase().includes('draft'))
}

/**
* Normalize XML references to drafts and RFCs.
*
* @param {Array} references - Array of reference strings.
* @returns {Array} - Normalized references including only drafts and RFCs.
*/
function normalizeXmlReferences (references) {
const normalizedReferences = []

references.forEach((ref) => {
if (/^RFC\d+$/i.test(ref)) {
const rfcNumber = ref.match(/\d+/)[0]
normalizedReferences.push(`RFC ${rfcNumber}`)
} else if (/draft/i.test(ref)) {
const draftName = ref.trim().replace(/^\[|\]$/g, '').replace(/-\d{2}$/, '')
normalizedReferences.push(draftName)
}
})

return normalizedReferences
}
34 changes: 32 additions & 2 deletions lib/parsers/txt.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { ValidationError } from '../helpers/error.mjs'
import { DateTime } from 'luxon'
import { FQDN_RE } from '../modules/fqdn.mjs'
import { IPV4_LOOSE_RE, IPV6_LOOSE_RE } from '../modules/ip.mjs'
import { rfcStatusHierarchy } from '../config/rfc-status-hierarchy.mjs'

// Regex patterns
const LINE_VALUES_EXTRACT_RE = /^(?<left>.*)\s{2,}(?<right>.*)$/
Expand Down Expand Up @@ -280,7 +281,9 @@ export async function parse (rawText, filename) {

// --> Intended status
if (values.left.startsWith('Intended')) {
data.header.intendedStatus = values.left.split(':')?.[1]?.trim()
const rawIntendedStatus = values.left.split(':')?.[1]?.trim()
const cleanIntendedStatus = extractStatusName(rawIntendedStatus)
data.header.intendedStatus = cleanIntendedStatus || rawIntendedStatus
}

// --> Obsoletes
Expand All @@ -291,7 +294,9 @@ export async function parse (rawText, filename) {

// --> Category
if (values.left.startsWith('Category')) {
data.header.category = values.left.split(':')?.[1]?.trim()
const rawCategory = values.left.split(':')?.[1]?.trim()
const cleanCategory = extractStatusName(rawCategory)
data.header.category = cleanCategory || rawCategory
}

// --> ISSN
Expand Down Expand Up @@ -459,3 +464,28 @@ function hasBoilerplateMatch (text, ...regexGroups) {
}
return false
}

/**
* Extracts the clean status name from a given status text using predefined regular expressions.
*
* This function iterates through an array of predefined RFC statuses, each containing
* a name, regex pattern, and weight. It tests the given status text against each regex
* and returns the corresponding clean status name if a match is found.
*
* @param {string} statusText - The raw status text to be processed (e.g., "Standards Track Juniper Networks").
* @returns {string|null} - The clean name of the status (e.g., "Proposed Standard") if matched,
* or `null` if no matching status is found.
*
* Example:
* const rawStatus = "Standards Track Juniper Networks";
* const cleanStatus = extractStatusName(rawStatus);
* console.log(cleanStatus); // Output: "Proposed Standard"
*/
function extractStatusName (statusText) {
for (const status of rfcStatusHierarchy) {
if (status.regex.test(statusText)) {
return status.name
}
}
return null
}
59 changes: 59 additions & 0 deletions lib/remote/downref.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const DOWNREF_REGISTRY_URL = 'https://datatracker.ietf.org/doc/downref/'
let cachedDownrefRegistry = null

/**
* Fetch and parse the Downref Registry HTML to extract references.
* Caches the result to avoid redundant network requests.
* @returns {Promise<Set<string>>} - A set of references from the Downref Registry.
*/
async function fetchDownrefRegistry () {
if (cachedDownrefRegistry) {
return cachedDownrefRegistry
}

try {
const response = await fetch(DOWNREF_REGISTRY_URL, { credentials: 'omit' })
const html = await response.text()
const rfcRegex = /<a href="\/doc\/rfc(\d+)\/">([^<]+)<\/a>/g
const referenceRegex = /<a href="\/doc\/(?:rfc|draft-[^/]+)\/">([^<]+)<\/a>/g
const references = new Set()
let match

while ((match = rfcRegex.exec(html)) !== null) {
references.add(`RFC ${match[1].trim()}`)
}

while ((match = referenceRegex.exec(html)) !== null) {
references.add(match[1].trim())
}

cachedDownrefRegistry = references
return references
} catch (err) {
throw new Error(`Failed to fetch Downref Registry: ${err.message}`)
}
}

/**
* Validate references against the Downref Registry.
* @param {string[]} references - List of references to validate.
* @returns {Promise<string[]>} - A list of references found in the Downref Registry.
*/
export async function checkReferencesInDownrefs (references) {
const downrefRegistry = await fetchDownrefRegistry()

const foundDownrefs = []

references.forEach(ref => {
const refRegex = new RegExp(`\\b${ref.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i')

for (const downref of downrefRegistry) {
if (refRegex.test(downref)) {
foundDownrefs.push(ref)
break
}
}
})

return foundDownrefs
}
Loading
Loading