Skip to content

Commit

Permalink
fix(route/ruankao): Remove font presetting and other details. (#18160)
Browse files Browse the repository at this point in the history
* fix: remove font presetting and other details.

* fix: add anti-crawler tag.

* fix: migrate jsdom to cheerio.

* fix: fix news-router name.
  • Loading branch information
PrinOrange authored Jan 22, 2025
1 parent 6b3af0b commit 90c6432
Showing 1 changed file with 26 additions and 10 deletions.
36 changes: 26 additions & 10 deletions lib/routes/ruankao/news.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
import { DataItem, Route } from '@/types';
import type { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';

const BASE_URL = 'https://www.ruankao.org.cn/index/work';

const removeFontPresetting = (html: string = ''): string => {
const $ = load(html);
$('[style]').each((_, element) => {
const style = $(element).attr('style') || '';
const cleanedStyle = style.replaceAll(/font-family:[^;]*;?/gi, '').trim();
$(element).attr('style', cleanedStyle || null);
});
$('style').each((_, styleElement) => {
const cssText = $(styleElement).html() || '';
const cleanedCssText = cssText.replaceAll(/font-family:[^;]*;?/gi, '');
$(styleElement).html(cleanedCssText);
});

return $.html();
};

const handler: Route['handler'] = async () => {
// Fetch the index page
const { data: listResponse } = await got(BASE_URL);
Expand All @@ -30,8 +45,8 @@ const handler: Route['handler'] = async () => {
});

return {
title: '计算机职业技术资格考试(软考)最新动态',
description: '计算机职业技术资格考试(软考)网站最新动态和消息推送',
title: '计算机职业技术资格考试(软考)动态',
description: '计算机职业技术资格考试(软考)消息推送',
link: BASE_URL,
image: 'https://bm.ruankao.org.cn/asset/image/public/logo.png',
item: (await Promise.all(
Expand All @@ -40,7 +55,7 @@ const handler: Route['handler'] = async () => {
const CONTENT_SELECTOR = '#contentTxt';
const { data: contentResponse } = await got(item.link);
const contentPage = load(contentResponse);
const content = contentPage(CONTENT_SELECTOR).html() || '';
const content = removeFontPresetting(contentPage(CONTENT_SELECTOR).html() || '');
return {
title: item.title,
pubDate: item.date,
Expand All @@ -52,36 +67,37 @@ const handler: Route['handler'] = async () => {
image: 'https://bm.ruankao.org.cn/asset/image/public/logo.png',
content,
updated: item.date,
language: 'zh-cn',
language: 'zh-CN',
};
})
)
)) as DataItem[],
allowEmpty: true,
language: 'zh-cn',
language: 'zh-CN',
feedLink: 'https://rsshub.app/ruankao/news',
id: 'https://rsshub.app/ruankao/news',
};
};

export const route: Route = {
path: '/news',
name: '软考最新动态',
name: '软考动态',
description: '**注意:** 官方网站限制了国外网络请求,可能需要通过部署在中国大陆内的 RSSHub 实例访问。',
maintainers: ['PrinOrange'],
handler,
categories: ['study'],
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
supportRadar: true,
},
radar: [
{
title: '软考新闻动态',
title: '计算机职业技术资格考试(软考)动态',
source: ['www.ruankao.org.cn/index/work', 'www.ruankao.org.cn'],
target: `/news`,
},
Expand Down

0 comments on commit 90c6432

Please sign in to comment.