From cc4c5d9bda1e8878f234853a6039b0fc93a8d7b4 Mon Sep 17 00:00:00 2001 From: Yun Du <27944418+Muyun99@users.noreply.github.com> Date: Thu, 26 Dec 2024 07:46:20 +0800 Subject: [PATCH] feat(route): add query keyword parse of cool paper (#17894) * [update] update cool paper * [add] add author of cool paper * [add] add rss of cool paper query topic * [update] update * [update] update * [update] update * [update] update * [update] update * trigger GitHub actions * [update] update * [update] update --- lib/routes/papers/index.ts | 6 +- lib/routes/papers/query.ts | 112 +++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 lib/routes/papers/query.ts diff --git a/lib/routes/papers/index.ts b/lib/routes/papers/index.ts index 3f0f0b0f8201ec..41413d70b20f6a 100644 --- a/lib/routes/papers/index.ts +++ b/lib/routes/papers/index.ts @@ -17,7 +17,7 @@ export const handler = async (ctx) => { const rootUrl = 'https://papers.cool'; const currentUrl = new URL(category, rootUrl).href; - const feedUrl = new URL(`${category}/feed`, rootUrl).href; + const feedUrl = new URL(`arxiv/${category}/feed`, rootUrl).href; const site = category.split(/\//)[0]; const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href; @@ -76,7 +76,7 @@ export const handler = async (ctx) => { }; export const route: Route = { - path: '/:category{.+}?', + path: '/arxiv/:category{.+}?', name: 'Topic', url: 'papers.cool', maintainers: ['nczitzk', 'Muyun99'], @@ -84,7 +84,7 @@ export const route: Route = { example: '/papers/arxiv/cs.AI', parameters: { category: 'Category, arXiv Artificial Intelligence (cs.AI) by default' }, description: `:::tip - If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI),where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI). + If you subscribe to [arXiv Artificial Intelligence (cs.AI)](https://papers.cool/arxiv/cs.AI), where the URL is \`https://papers.cool/arxiv/cs.AI\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/arxiv/cs.AI\`](https://rsshub.app/papers/arxiv/cs.AI). ::: | Category | id | diff --git a/lib/routes/papers/query.ts b/lib/routes/papers/query.ts new file mode 100644 index 00000000000000..94f582a9264081 --- /dev/null +++ b/lib/routes/papers/query.ts @@ -0,0 +1,112 @@ +import { Route } from '@/types'; +import { getCurrentPath } from '@/utils/helpers'; +const __dirname = getCurrentPath(import.meta.url); + +import { parseDate } from '@/utils/parse-date'; +import { art } from '@/utils/render'; +import path from 'node:path'; +import parser from '@/utils/rss-parser'; + +const pdfUrlGenerators = { + arxiv: (id: string) => `https://arxiv.org/pdf/${id}.pdf`, +}; + +export const handler = async (ctx) => { + const { keyword = 'query/Detection' } = ctx.req.param(); + const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 150; + + const rootUrl = 'https://papers.cool'; + const currentUrl = new URL(`arxiv/search?highlight=1&query=${keyword}&sort=0`, rootUrl).href; + const feedUrl = new URL(`arxiv/search/feed?query=${keyword}`, rootUrl).href; + + const site = keyword.split(/\//)[0]; + const apiKimiUrl = new URL(`${site}/kimi?paper=`, rootUrl).href; + const feed = await parser.parseURL(feedUrl); + + const language = 'en'; + + const items = feed.items.slice(0, limit).map((item) => { + const title = item.title; + const guid = item.guid; + + const id = item.link?.split(/\//).pop() ?? ''; + const kimiUrl = new URL(id, apiKimiUrl).href; + const pdfUrl = Object.hasOwn(pdfUrlGenerators, site) ? pdfUrlGenerators[site](id) : undefined; + + const authorString = item.author; + const description = art(path.join(__dirname, 'templates/description.art'), { + pdfUrl, + siteUrl: item.link, + kimiUrl, + authorString, + summary: item.summary, + }); + + return { + title, + description, + pubDate: parseDate(item.pubDate ?? ''), + link: item.link, + category: item.categories, + author: authorString, + doi: `${site}${id}`, + guid, + id: guid, + content: { + html: description, + text: item.content, + }, + language, + enclosure_url: pdfUrl, + enclosure_type: 'application/pdf', + enclosure_title: title, + }; + }); + + return { + title: feed.title, + description: feed.description, + link: currentUrl, + item: items, + allowEmpty: true, + image: feed.image?.url, + language: feed.language, + }; +}; + +export const route: Route = { + path: '/query/:keyword{.+}?', + name: 'Topic', + url: 'papers.cool', + maintainers: ['Muyun99'], + handler, + example: '/papers/query/Detection', + parameters: { keyword: 'Keyword to search for papers, e.g., Detection, Segmentation, etc.' }, + description: `:::tip + If you subscibe to [arXiv Paper queryed by Detection](https://papers.cool/arxiv/search?highlight=1&query=Detection), where the URL is \`https://papers.cool/arxiv/search?highlight=1&query=Detection\`, extract the part \`https://papers.cool/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/papers/query/Detection\`](https://rsshub.app/papers/query/Detection). + ::: + + | Category | id | + | ----------------------------------------------------- | ------------------- | + | arXiv Paper queryed by Detection | query/Detection | + | arXiv Paper queryed by Segmentation | query/Segmentation | + `, + categories: ['journal'], + + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportRadar: true, + supportBT: false, + supportPodcast: false, + supportScihub: true, + }, + radar: [ + { + title: 'arXiv Paper queryed by Keyword', + source: ['papers.cool/arxiv/search?highlight=1&query=*&sort=0'], + target: '/papers/query/:keyword', + }, + ], +};