Skip to content

Commit

Permalink
small refactors
Browse files Browse the repository at this point in the history
  • Loading branch information
masylum committed Jul 1, 2024
1 parent 3865292 commit 920d6b8
Show file tree
Hide file tree
Showing 12 changed files with 1,059 additions and 146 deletions.
30 changes: 30 additions & 0 deletions src/byline.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import type { Cheerio, Element } from 'cheerio'
import { getInnerText } from './textUtils.js'

const MATCH_ID_AND_CLASS = /byline|author|dateline|writtenby|p-author/i

export function extractByline($node: Cheerio<Element>, matchString: string) {
const text = getInnerText($node)
if (!isValidByline($node.text())) return

const isAuthor = $node.attr('rel') === 'author'
if (isAuthor) return text

const hasAuthorItemprop = $node.attr('itemprop')?.includes('author')
if (hasAuthorItemprop) return text

const bylineMatch = MATCH_ID_AND_CLASS.test(matchString)
if (bylineMatch) return text

return
}

/**
* Check whether the input string could be a byline.
* This verifies that the input is a string, and that the length
* is less than 100 chars.
*/
export function isValidByline(byline: string) {
const length = byline.trim().length
return length > 0 && length < 100
}
Loading

0 comments on commit 920d6b8

Please sign in to comment.