From 175624266243cb52c1331cb563f90c49aea29055 Mon Sep 17 00:00:00 2001 From: Andvari <31068367+dzx-dzx@users.noreply.github.com> Date: Fri, 3 May 2024 21:20:02 +0800 Subject: [PATCH] feat(route/apnews): Support rss parsing. (#15440) * feat(route/apnews): Support rss parsing. * Update lib/routes/apnews/topics.ts Co-authored-by: Tony * Update lib/routes/apnews/rss.ts Co-authored-by: Tony * Update rss.ts --------- --- lib/routes/apnews/rss.ts | 41 +++++++++++++++++++++++++++++++++++++ lib/routes/apnews/topics.ts | 18 ++-------------- lib/routes/apnews/utils.ts | 20 ++++++++++++++++++ 3 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 lib/routes/apnews/rss.ts create mode 100644 lib/routes/apnews/utils.ts diff --git a/lib/routes/apnews/rss.ts b/lib/routes/apnews/rss.ts new file mode 100644 index 00000000000000..dc600eec5235b4 --- /dev/null +++ b/lib/routes/apnews/rss.ts @@ -0,0 +1,41 @@ +import { Route } from '@/types'; +import parser from '@/utils/rss-parser'; +import { fetchArticle } from './utils'; +const HOME_PAGE = 'https://apnews.com'; + +export const route: Route = { + path: '/rss/:rss?', + categories: ['traditional-media'], + example: '/apnews/rss/business', + parameters: { rss: 'Route name from the first segment of the corresponding site, or `index` for the front page(default).' }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['apnews.com/:rss'], + target: '/rss/:rss', + }, + ], + name: 'RSS', + maintainers: ['zoenglinghou', 'mjysci', 'TonyRL'], + handler, +}; + +async function handler(ctx) { + const { rss = 'index' } = ctx.req.param(); + const url = `${HOME_PAGE}/${rss}.rss`; + const res = await parser.parseURL(url); + + const items = await Promise.all(res.items.map((item) => fetchArticle(item))); + + return { + ...rss, + item: items, + }; +} diff --git a/lib/routes/apnews/topics.ts b/lib/routes/apnews/topics.ts index e0c64c18469932..283c8e0837f5fb 100644 --- a/lib/routes/apnews/topics.ts +++ b/lib/routes/apnews/topics.ts @@ -1,8 +1,7 @@ import { Route } from '@/types'; -import cache from '@/utils/cache'; import got from '@/utils/got'; import { load } from 'cheerio'; -import timezone from '@/utils/timezone'; +import { fetchArticle } from './utils'; const HOME_PAGE = 'https://apnews.com'; export const route: Route = { @@ -44,20 +43,7 @@ async function handler(ctx) { link: $(e).find('a').attr('href'), })) .filter((e) => typeof e.link === 'string') - .map((item) => - cache.tryGet(item.link, async () => { - const { data: response } = await got(item.link); - const $ = load(response); - $('div.Enhancement').remove(); - return Object.assign(item, { - pubDate: timezone(new Date($("meta[property='article:published_time']").attr('content')), 0), - updated: timezone(new Date($("meta[property='article:modified_time']").attr('content')), 0), - description: $('div.RichTextStoryBody').html(), - category: $("meta[property='article:section']").attr('content'), - guid: $("meta[name='brightspot.contentId']").attr('content'), - }); - }) - ) + .map((item) => fetchArticle(item)) ); return { diff --git a/lib/routes/apnews/utils.ts b/lib/routes/apnews/utils.ts new file mode 100644 index 00000000000000..4e8b74eba610c1 --- /dev/null +++ b/lib/routes/apnews/utils.ts @@ -0,0 +1,20 @@ +import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; +import { load } from 'cheerio'; + +export function fetchArticle(item) { + return cache.tryGet(item.link, async () => { + const data = await ofetch(item.link); + const $ = load(data); + $('div.Enhancement').remove(); + return Object.assign(item, { + pubDate: timezone(parseDate($("meta[property='article:published_time']").attr('content')), 0), + updated: timezone(parseDate($("meta[property='article:modified_time']").attr('content')), 0), + description: $('div.RichTextStoryBody').html(), + category: $("meta[property='article:section']").attr('content'), + guid: $("meta[name='brightspot.contentId']").attr('content'), + }); + }); +}