From ea6555f4b190afd9eeeecec95329670420418ba2 Mon Sep 17 00:00:00 2001 From: Semibyte <85426571+1837634311@users.noreply.github.com> Date: Wed, 25 Dec 2024 11:59:47 +0800 Subject: [PATCH] feat(route): add Deepseek news route (#17967) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(route): add Deepseek news route * Fix news.ts * Add cache * Fix news.ts 移除 async * Update news.ts --- lib/routes/deepseek/namespace.ts | 8 +++ lib/routes/deepseek/news.ts | 95 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 lib/routes/deepseek/namespace.ts create mode 100644 lib/routes/deepseek/news.ts diff --git a/lib/routes/deepseek/namespace.ts b/lib/routes/deepseek/namespace.ts new file mode 100644 index 00000000000000..9f9807f30b12a4 --- /dev/null +++ b/lib/routes/deepseek/namespace.ts @@ -0,0 +1,8 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'Deepseek', + url: 'api-docs.deepseek.com', + description: 'Deepseek API 文档。', + lang: 'zh-CN', +}; diff --git a/lib/routes/deepseek/news.ts b/lib/routes/deepseek/news.ts new file mode 100644 index 00000000000000..c9861b39bf8f31 --- /dev/null +++ b/lib/routes/deepseek/news.ts @@ -0,0 +1,95 @@ +import { Route, Data, DataItem } from '@/types'; +import ofetch from '@/utils/ofetch'; +import { load } from 'cheerio'; +import cache from '@/utils/cache'; + +const ROOT_URL = 'https://api-docs.deepseek.com/zh-cn'; + +const NEWS_LIST_SELECTOR = 'ul.menu__list > li:nth-child(2) ul > li.theme-doc-sidebar-item-link'; +const ARTICLE_CONTENT_SELECTOR = '.theme-doc-markdown > div > div'; +const ARTICLE_TITLE_SELECTOR = ARTICLE_CONTENT_SELECTOR + ' > h1'; + + +// 获取消息列表 / get article list +const fetchPageContent = async (url: string) => { + const response = await ofetch(url); + return load(response); +}; + +// 提取正文内容 / extract article content +const extractArticleInfo = ($article: cheerio.Root, pageURL: string) => { + const contentElement = $article(ARTICLE_CONTENT_SELECTOR); + const title = $article(ARTICLE_TITLE_SELECTOR).text(); + $article(ARTICLE_TITLE_SELECTOR).remove(); // 移除标题,避免重复 / remove title to avoid duplication + const content = contentElement.html(); + return { title, content, pageURL }; +}; + +const parseDateString = (dateString: string) => { + const pubDate = new Date(dateString); + return pubDate.toUTCString(); +}; + + +// 创建消息 / create article +const createDataItem = (item: cheerio.Element, $: cheerio.Root): Promise => { + const $item = $(item); + const link = $item.find('a').attr('href'); + const dateString = $item.find('a').text().split(' ').at(-1); + const pageURL = new URL(link || '', ROOT_URL).href; + + return cache.tryGet( + pageURL, + async () => { + const $article = await fetchPageContent(pageURL); + const { title, content } = extractArticleInfo($article, pageURL); + const pubDate = parseDateString(dateString); + + return { + title, + link: pageURL, + pubDate, + description: content || undefined, + }; + }, + ); +}; + +const handler = async (): Promise => { + const $ = await fetchPageContent(ROOT_URL); + const newsList = $(NEWS_LIST_SELECTOR); + + const items: DataItem[] = await Promise.all( + newsList.toArray().map((li) => createDataItem(li, $)) + ); + + return { + title: 'DeepSeek 新闻', + link: ROOT_URL, + item: items, + allowEmpty: true, + }; +}; + +export const route: Route = { + path: '/news', + categories: ['programming'], + example: '/deepseek/news', + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['api-docs.deepseek.com'], + target: '/news', + }, + ], + name: '新闻', + maintainers: ['1837634311'], + handler, +};