Skip to content

Commit

Permalink
fix: ensure favicon detect in markup is expected content-type
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Jun 17, 2024
1 parent dba98fa commit dd0c2ac
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 32 deletions.
77 changes: 45 additions & 32 deletions packages/metascraper-logo-favicon/src/index.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
'use strict'

const { logo, parseUrl, normalizeUrl, toRule } = require('@metascraper/helpers')
const { isEmpty, first, toNumber, chain, orderBy } = require('lodash')
const reachableUrl = require('reachable-url')
const memoize = require('@keyvhq/memoize')

const {
logo,
parseUrl,
normalizeUrl,
toRule,
extension
} = require('@metascraper/helpers')

const ALLOWED_EXTENSION_CONTENT_TYPES = [
['ico', ['image/vnd.microsoft.icon', 'image/x-icon']],
['png', ['image/png']]
]

const SIZE_REGEX_BY_X = /\d+x\d+/

const toLogo = toRule(logo)

const isValidContenType = (contentType, contentTypes) => {
return contentType && contentTypes.some(ct => contentType.includes(ct))
}

const toSize = (input, url) => {
if (isEmpty(input)) return

Expand Down Expand Up @@ -85,10 +101,21 @@ const sizeSelectors = [
const firstReachable = async (domNodeSizes, gotOpts) => {
for (const { url } of domNodeSizes) {
const response = await reachableUrl(url, gotOpts)
if (reachableUrl.isReachable(response)) {
return response.url
if (!reachableUrl.isReachable(response)) continue // Skip to the next iteration if not reachable
const contentType = response.headers['content-type']

const urlExtension = extension(url)
const contentTypes = ALLOWED_EXTENSION_CONTENT_TYPES.find(
([ext]) => ext === urlExtension
)

if (contentTypes && !isValidContenType(contentType, contentTypes[1])) {
continue
}

return response.url
}
return undefined
}

const pickBiggerSize = async (sizes, { gotOpts } = {}) => {
Expand All @@ -109,22 +136,16 @@ const pickBiggerSize = async (sizes, { gotOpts } = {}) => {
pickBiggerSize.sortBySize = collection =>
orderBy(collection, ['size.priority'], ['desc'])

const createFavicon =
({ ext, contentTypes }) =>
async (url, { gotOpts } = {}) => {
const faviconUrl = logo(`/favicon.${ext}`, { url })
if (!faviconUrl) return undefined

const response = await reachableUrl(faviconUrl, gotOpts)
const contentType = response.headers['content-type']

const isValidContenType =
contentType && contentTypes.some(ct => contentType.includes(ct))

return isValidContenType && reachableUrl.isReachable(response)
? response.url
: undefined
}
const createFavicon = ([ext, contentTypes]) => {
return async (url, { gotOpts } = {}) => {
const faviconUrl = logo(`/favicon.${ext}`, { url })
if (!faviconUrl) return undefined
const response = await reachableUrl(faviconUrl, gotOpts)
if (!reachableUrl.isReachable(response)) return undefined
const contentType = response.headers['content-type']
return isValidContenType(contentType, contentTypes) && response.url
}
}

const google = async (url, { gotOpts } = {}) => {
const response = await reachableUrl(google.url(url), gotOpts)
Expand All @@ -136,19 +157,11 @@ google.url = (url, size = 128) =>

const createGetLogo = ({ withGoogle, withFavicon, gotOpts, keyvOpts }) => {
const getLogo = async url => {
const providers = [
withFavicon &&
createFavicon({
ext: 'png',
contentTypes: ['image/png']
}),
withFavicon &&
createFavicon({
ext: 'ico',
contentTypes: ['image/vnd.microsoft.icon', 'image/x-icon']
}),
withGoogle && google
].filter(Boolean)
const providers = ALLOWED_EXTENSION_CONTENT_TYPES.map(
ext => withFavicon && createFavicon(ext)
)
.concat(withGoogle && google)
.filter(Boolean)

for (const provider of providers) {
const logoUrl = await provider(url, { gotOpts })
Expand Down
67 changes: 67 additions & 0 deletions packages/metascraper-logo-favicon/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ const { readFile } = require('fs/promises')
const { resolve } = require('path')
const test = require('ava')

const { runServer } = require('./helpers')

const createMetascraper = opts => require('metascraper')([require('..')(opts)])

const createHtml = meta =>
Expand Down Expand Up @@ -251,3 +253,68 @@ test('avoid wrong data URI', async t => {
const metadata = await metascraper({ url, html })
t.is(metadata.logo, 'https://www.adobe.com/favicon.ico')
})

test("favicon.ico detected in HTML markup can't be random content-type", async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/svg+xml')
res.end('<svg></svg>')
})

const html =
'<link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, null)
})

test('favicon.ico detected in HTML markup can be `image/x-icon` content-type', async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/x-icon')
res.end()
})

const html =
'<link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, `${url}favicon.ico`)
})

test('favicon.ico detected in HTML markup can be `image/vnd.microsoft.icon` content-type', async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/vnd.microsoft.icon')
res.end()
})

const html =
'<link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, `${url}favicon.ico`)
})

test.only("favicon.png detected in HTML markup can't be random content-type", async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/svg+xml')
res.end('<svg></svg>')
})

const html =
'<link rel="icon" href="/favicon.png" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, null)
})

test('favicon.png detected in HTML markup can be `image/png` content-type', async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/png')
res.end()
})

const html =
'<link rel="icon" href="/favicon.png" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, `${url}favicon.png`)
})

0 comments on commit dd0c2ac

Please sign in to comment.