Skip to content

Commit c68fba6

Browse files
authored
fix(route/phoronix): adapt to the new web path (#13946)
Signed-off-by: Rongrong <i@rong.moe>
1 parent faa249f commit c68fba6

File tree

5 files changed

+183
-54
lines changed

5 files changed

+183
-54
lines changed

lib/v2/phoronix/index.js

Lines changed: 177 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,185 @@
11
const parser = require('@/utils/rss-parser');
22
const cheerio = require('cheerio');
33
const got = require('@/utils/got');
4+
const dayjs = require('dayjs');
5+
dayjs.extend(require('dayjs/plugin/utc'));
6+
dayjs.extend(require('dayjs/plugin/timezone'));
7+
8+
const redirectCacheKey = 'phoronix:redirect';
9+
const webArticlesCacheKey = 'phoronix:web-articles';
10+
const articleCacheKey = 'phoronix:articles';
411

512
const baseUrl = 'https://www.phoronix.com';
13+
const rssUrl = `${baseUrl}/rss.php`;
614

7-
module.exports = async (ctx) => {
8-
const { page, queryOrItem } = ctx.params;
9-
const rssUrl = new URL('/rss.php', baseUrl);
10-
rssUrl.searchParams.set('page', page);
15+
const feedFetch = async () => {
16+
const feed = await parser.parseURL(rssUrl);
17+
return {
18+
title: feed.title,
19+
link: feed.link,
20+
description: feed.description,
21+
item: feed.items,
22+
language: feed.language,
23+
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
24+
image: 'https://www.phoronix.com/android-chrome-192x192.png',
25+
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
26+
// Copied from the web page metadata
27+
category: [
28+
'Linux Hardware Reviews',
29+
'Linux hardware benchmarks',
30+
'Linux Hardware',
31+
'Linux benchmarking',
32+
'Desktop Linux',
33+
'GNU/Linux benchmarks',
34+
'Open Source AMD',
35+
'Linux How To',
36+
'X.Org drivers',
37+
'Ubuntu hardware',
38+
'Phoronix Test Suite',
39+
],
40+
};
41+
};
42+
43+
const webFetchCb = (response) => {
44+
const $ = cheerio.load(response.body);
45+
return {
46+
title: $('title').text(),
47+
link: response.url,
48+
description: $('meta[name="Description"]').attr('content'),
49+
item: [
50+
...new Set(
51+
$('#main a')
52+
.toArray()
53+
.map((e) => e.attribs.href)
54+
),
55+
]
56+
.filter((link) => link && (link.startsWith('/review/') || link.startsWith('/news/')))
57+
.map((link) => ({ link: `${baseUrl}${link}` })),
58+
language: 'en-us',
59+
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
60+
image: 'https://www.phoronix.com/android-chrome-192x192.png',
61+
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
62+
category: $('meta[name="keywords"]').attr('content').split(', '),
63+
};
64+
};
65+
66+
const webFetch = (ctx, url) =>
67+
ctx.cache.tryGet(`${webArticlesCacheKey}:${url}`, async () => {
68+
try {
69+
return webFetchCb(await got(url));
70+
} catch (error) {
71+
if (error.name === 'HTTPError' && error.response.statusCode === 404) {
72+
return '404';
73+
}
74+
throw error;
75+
}
76+
});
77+
78+
const legacyFetch = async (ctx, page, queryOrItem) => {
79+
const legacyUrl = new URL('/scan.php', baseUrl);
80+
legacyUrl.searchParams.set('page', page);
1181
if (queryOrItem) {
1282
if (page === 'category') {
13-
rssUrl.searchParams.set('item', queryOrItem);
83+
legacyUrl.searchParams.set('item', queryOrItem);
1484
} else {
15-
rssUrl.searchParams.set('q', queryOrItem);
85+
legacyUrl.searchParams.set('q', queryOrItem);
1686
}
1787
}
1888

19-
const feed = await parser.parseURL(rssUrl.toString());
89+
let response;
90+
const webUrl = await ctx.cache.tryGet(`${redirectCacheKey}:${legacyUrl.toString()}`, async () => {
91+
response = await got(legacyUrl.toString());
92+
return response.url;
93+
});
94+
if (response) {
95+
const feed = webFetchCb(response);
96+
ctx.cache.set(`${webArticlesCacheKey}:${webUrl}`, feed);
97+
return feed;
98+
}
99+
return await webFetch(ctx, webUrl);
100+
};
20101

21-
const items = await Promise.all(
22-
feed.items.map((item) =>
23-
ctx.cache.tryGet(item.link, async () => {
102+
const tryFetch = async (ctx, category, topic) => {
103+
const webUrl = topic ? `${baseUrl}/${category}/${topic}` : `${baseUrl}/${category}`;
104+
let feed = await webFetch(ctx, webUrl);
105+
if (feed === '404') {
106+
feed = await legacyFetch(ctx, category, topic);
107+
}
108+
return feed;
109+
};
110+
111+
module.exports = async (ctx) => {
112+
const { category, topic } = ctx.params;
113+
let feed;
114+
switch (category) {
115+
case 'category':
116+
case 'news_topic':
117+
feed = await legacyFetch(ctx, category, topic);
118+
break;
119+
case 'rss':
120+
feed = await feedFetch();
121+
break;
122+
default:
123+
feed = category ? await tryFetch(ctx, category, topic) : await feedFetch();
124+
break;
125+
}
126+
127+
feed.item = await Promise.all(
128+
feed.item.map((item) =>
129+
ctx.cache.tryGet(`${articleCacheKey}:${item.link}`, async () => {
24130
const response = await got(item.link);
25131
const html = response.body;
26132
const $ = cheerio.load(html);
27133
const content = $('.content');
28134

29135
// Author
30136
const authorSelector = $('.author > a');
31-
// thel last 2 are the category and comments
137+
// the last 2 are the category and comments
32138
const author = authorSelector
33139
.slice(0, authorSelector.length - 2)
34140
.toArray()
35141
.map((e) => $(e).text());
142+
const category = [];
143+
if (item.link.includes('/news/')) {
144+
category.push('News');
145+
} else if (item.link.includes('/review/')) {
146+
category.push('Review');
147+
}
148+
const categorySelector = authorSelector.eq(-2);
149+
if (categorySelector.length) {
150+
category.push(categorySelector.text());
151+
}
152+
let pubDate;
153+
if (!item.pubDate) {
154+
// the text next to the category is the date
155+
let pubDateReadable = categorySelector.length && categorySelector[0].nextSibling?.nodeValue;
156+
if (pubDateReadable) {
157+
pubDateReadable = pubDateReadable.replace(/on|at|\./g, '').trim();
158+
if (/\d{4}$/.test(pubDateReadable)) {
159+
// Only date, no time
160+
// Michael Larabel lives in Indiana, USA, so we assume TZ=America/Indiana/Indianapolis
161+
// https://www.phoronix.com/review/phoronix_office_2014
162+
// Here we use the trick to take daylight saving into account.
163+
pubDate = dayjs
164+
// If we don't append "UTC" at the end,
165+
// dayjs.utc() may still parse the date in the platform (local) timezone.
166+
// E.g., if the platform timezone is UTC+8, then:
167+
// > dayjs.utc('2 Dec 2023').toString()
168+
// 'Fri, 01 Dec 2023 16:00:00 GMT'
169+
// > dayjs.utc('2 Dec 2023 UTC').toString()
170+
// 'Sat, 02 Dec 2023 00:00:00 GMT'
171+
// Append "UTC" at the end to explicitly prohibit the weird behavior.
172+
.utc(`${pubDateReadable} 08:00 UTC`)
173+
.tz('America/Indiana/Indianapolis', true);
174+
} else {
175+
// date, time, and timezone (including daylight saving)
176+
pubDate = dayjs(pubDateReadable);
177+
}
178+
if (!pubDate.isValid()) {
179+
pubDate = pubDateReadable;
180+
}
181+
}
182+
}
36183

37184
// Maybe it's paginated
38185
const links = $('.pagination > a')
@@ -55,53 +202,35 @@ module.exports = async (ctx) => {
55202
content.append(pages);
56203
}
57204

58-
// Summary
59-
const summary = $('.content > p:nth-child(1)');
60-
61-
// High res images
62-
content.find('img').each((_, img) => {
63-
if (img.attribs.src.endsWith('_med')) {
64-
img.attribs.src = img.attribs.src.replace('_med', '_show');
205+
const images = content.find('img');
206+
// Remove topic image
207+
const topicImage = images.first();
208+
if (topicImage.attr('src')?.startsWith('/assets/categories/')) {
209+
const topicImageContainer = topicImage.parent();
210+
if (!topicImageContainer.text().trim()) {
211+
topicImageContainer.remove();
212+
} else {
213+
topicImage.remove();
65214
}
215+
}
216+
// High-res images
217+
images.each((_, img) => {
218+
img.attribs.src = img.attribs.src.replace(/_med$/, '');
66219
});
67220

68221
return {
69-
title: item.title,
70-
id: item.guid,
71-
pubDate: item.pubDate,
222+
title: item.title || $('article h1').text(),
223+
pubDate: item.pubDate || pubDate,
72224
author: author.join(', '),
73225
link: item.link,
74-
summary: summary.html(),
226+
summary: $('meta[name="twitter:description"]').attr('content'),
75227
description: content.html(),
76-
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
77-
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
228+
image: $('meta[name="twitter:image"]').attr('content'),
229+
category: item.category || category,
78230
};
79231
})
80232
)
81233
);
82234

83-
ctx.state.data = {
84-
title: feed.title,
85-
link: feed.link,
86-
description: feed.description,
87-
item: items,
88-
language: feed.language,
89-
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
90-
image: 'https://www.phoronix.com/android-chrome-192x192.png',
91-
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
92-
// Copied from thier web page metadata
93-
category: [
94-
'Linux Hardware Reviews',
95-
'Linux hardware benchmarks',
96-
'Linux Hardware',
97-
'Linux benchmarking',
98-
'Desktop Linux',
99-
'GNU/Linux benchmarks',
100-
'Open Source AMD',
101-
'Linux How To',
102-
'X.Org drivers',
103-
'Ubuntu hardware',
104-
'Phoronix Test Suite',
105-
],
106-
};
235+
ctx.state.data = feed;
107236
};

lib/v2/phoronix/maintainer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module.exports = {
2-
'/:page/:queryOrItem?': ['oppliate'],
2+
'/:category?/:topic?': ['oppliate', 'Rongronggg9'],
33
};

lib/v2/phoronix/radar.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ module.exports = {
33
_name: 'Phoronix',
44
'.': [
55
{
6-
title: '新闻与评测',
6+
title: 'News & Reviews',
77
docs: 'https://docs.rsshub.app/routes/new-media#phoronix',
8-
source: ['/*'],
9-
target: '/phoronix/news',
8+
source: ['/:category?/:topic?'],
9+
target: '/phoronix/:category?/:topic?',
1010
},
1111
],
1212
},

lib/v2/phoronix/router.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module.exports = (router) => {
2-
router.get('/:page/:queryOrItem?', require('./index'));
2+
router.get('/:category?/:topic?', require('./index'));
33
};

website/docs/routes/new-media.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,7 @@ This route provides a flexible plan with full text content to subscribe specific
15671567

15681568
### News & Reviews {#phoronix-news-reviews}
15691569

1570-
<Route author="oppliate" example="/phoronix/news_topic/Intel" path="/phoronix/:page/:queryOrItem?" paramsDesc={['Page name', 'For `category` it corresponds to `item`, for other pages it\'s `q`. You may find available parameters from their navigator links. E.g. to subscribe to the category page `https://www.phoronix.com/scan.php?page=category&item=Computers`, fill in the path `/phoronix/category/Computers`']} radar="1"/>
1570+
<Route author="oppliate Rongronggg9" example="/phoronix/linux/KDE" path="/phoronix/:category?/:topic?" paramsDesc={['Category', 'Topic. You may find available parameters from their navigator links. E.g. to subscribe to `https://www.phoronix.com/reviews/Operating+Systems`, fill in the path `/phoronix/reviews/Operating+Systems`']} radar="1"/>
15711571

15721572
## PMCAFF {#pmcaff}
15731573

0 commit comments

Comments
 (0)