Skip to content

Commit

Permalink
feat(route/apnews): Support rss parsing. (#15440)
Browse files Browse the repository at this point in the history
* feat(route/apnews): Support rss parsing.

* Update lib/routes/apnews/topics.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update lib/routes/apnews/rss.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update rss.ts

---------
  • Loading branch information
dzx-dzx authored May 3, 2024
1 parent 98ceba2 commit 1756242
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 16 deletions.
41 changes: 41 additions & 0 deletions lib/routes/apnews/rss.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { Route } from '@/types';
import parser from '@/utils/rss-parser';
import { fetchArticle } from './utils';
const HOME_PAGE = 'https://apnews.com';

export const route: Route = {
path: '/rss/:rss?',
categories: ['traditional-media'],
example: '/apnews/rss/business',
parameters: { rss: 'Route name from the first segment of the corresponding site, or `index` for the front page(default).' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['apnews.com/:rss'],
target: '/rss/:rss',
},
],
name: 'RSS',
maintainers: ['zoenglinghou', 'mjysci', 'TonyRL'],
handler,
};

async function handler(ctx) {
const { rss = 'index' } = ctx.req.param();
const url = `${HOME_PAGE}/${rss}.rss`;
const res = await parser.parseURL(url);

const items = await Promise.all(res.items.map((item) => fetchArticle(item)));

return {
...rss,
item: items,
};
}
18 changes: 2 additions & 16 deletions lib/routes/apnews/topics.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio';
import timezone from '@/utils/timezone';
import { fetchArticle } from './utils';
const HOME_PAGE = 'https://apnews.com';

export const route: Route = {
Expand Down Expand Up @@ -44,20 +43,7 @@ async function handler(ctx) {
link: $(e).find('a').attr('href'),
}))
.filter((e) => typeof e.link === 'string')
.map((item) =>
cache.tryGet(item.link, async () => {
const { data: response } = await got(item.link);
const $ = load(response);
$('div.Enhancement').remove();
return Object.assign(item, {
pubDate: timezone(new Date($("meta[property='article:published_time']").attr('content')), 0),
updated: timezone(new Date($("meta[property='article:modified_time']").attr('content')), 0),
description: $('div.RichTextStoryBody').html(),
category: $("meta[property='article:section']").attr('content'),
guid: $("meta[name='brightspot.contentId']").attr('content'),
});
})
)
.map((item) => fetchArticle(item))
);

return {
Expand Down
20 changes: 20 additions & 0 deletions lib/routes/apnews/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import cache from '@/utils/cache';
import ofetch from '@/utils/ofetch';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';
import { load } from 'cheerio';

export function fetchArticle(item) {
return cache.tryGet(item.link, async () => {
const data = await ofetch(item.link);
const $ = load(data);
$('div.Enhancement').remove();
return Object.assign(item, {
pubDate: timezone(parseDate($("meta[property='article:published_time']").attr('content')), 0),
updated: timezone(parseDate($("meta[property='article:modified_time']").attr('content')), 0),
description: $('div.RichTextStoryBody').html(),
category: $("meta[property='article:section']").attr('content'),
guid: $("meta[name='brightspot.contentId']").attr('content'),
});
});
}

0 comments on commit 1756242

Please sign in to comment.