Skip to content

Commit

Permalink
Merge pull request #97 from DIYgod/master
Browse files Browse the repository at this point in the history
[pull] master from diygod:master
  • Loading branch information
pull[bot] authored Jan 15, 2024
2 parents 196c2c1 + 450bdb3 commit a9e37c6
Show file tree
Hide file tree
Showing 49 changed files with 806 additions and 106 deletions.
25 changes: 23 additions & 2 deletions lib/middleware/access-control.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,27 @@ const reject = (ctx) => {
throw Error('Authentication failed. Access denied.');
};

const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
const cidrPattern = /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\/(\d{1,2})/;

const ipInCidr = (cidr, ip) => {
const cidrMatch = cidr.match(cidrPattern);
const ipMatch = ip.match(ipv4Pattern);
if (!cidrMatch || !ipMatch) {
return false;
}
const subnetMask = parseInt(cidrMatch[2]);
const cidrIpBits = ipv4ToBitsring(cidrMatch[1]).substring(0, subnetMask);
const ipBits = ipv4ToBitsring(ip).substring(0, subnetMask);
return cidrIpBits === ipBits;
};

const ipv4ToBitsring = (ip) =>
ip
.split('.')
.map((part) => ('00000000' + parseInt(part).toString(2)).slice(-8))
.join('');

module.exports = async (ctx, next) => {
const ip = ctx.ips[0] || ctx.ip;
const requestPath = ctx.request.path;
Expand Down Expand Up @@ -39,13 +60,13 @@ module.exports = async (ctx, next) => {
}

if (config.allowlist) {
if (config.allowlist.find((item) => ip.includes(item) || requestPath.includes(item) || requestUA.includes(item))) {
if (config.allowlist.find((item) => ip.includes(item) || ipInCidr(item, ip) || requestPath.includes(item) || requestUA.includes(item))) {
return grant();
}
}

if (config.denylist) {
if (!config.denylist.find((item) => ip.includes(item) || requestPath.includes(item) || requestUA.includes(item))) {
if (!config.denylist.find((item) => ip.includes(item) || ipInCidr(item, ip) || requestPath.includes(item) || requestUA.includes(item))) {
return grant();
}
}
Expand Down
24 changes: 3 additions & 21 deletions lib/v2/agirls/index.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { CookieJar } = require('tough-cookie');
const { baseUrl, parseArticle } = require('./utils');

module.exports = async (ctx) => {
const cookieJar = new CookieJar();
const { category = '' } = ctx.params;
const link = `${baseUrl}/posts${category ? `/${category}` : ''}`;

const response = await got({
url: link,
headers: {
Referer: baseUrl,
},
cookieJar,
});
const response = await got(link);

const $ = cheerio.load(response.data);

let items = $('.ag-post-item__link')
const list = $('.ag-post-item__link')
.toArray()
.map((item) => {
item = $(item);
Expand All @@ -28,7 +19,7 @@ module.exports = async (ctx) => {
};
});

items = await Promise.all(items.map((item) => ctx.cache.tryGet(item.link, () => parseArticle(item, link, cookieJar))));
const items = await Promise.all(list.map((item) => ctx.cache.tryGet(item.link, () => parseArticle(item))));

ctx.state.data = {
title: $('head title').text().trim(),
Expand All @@ -37,13 +28,4 @@ module.exports = async (ctx) => {
item: items,
language: $('html').attr('lang'),
};

ctx.state.json = {
title: $('head title').text().trim(),
link,
description: $('head meta[name=description]').attr('content'),
item: items,
language: $('html').attr('lang'),
cookieJar,
};
};
1 change: 1 addition & 0 deletions lib/v2/agirls/maintainer.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module.exports = {
'/topic/:topic': ['TonyRL'],
'/topic_list': ['TonyRL'],
'/:category?': ['TonyRL'],
};
10 changes: 8 additions & 2 deletions lib/v2/agirls/radar.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,22 @@ module.exports = {
agirls: [
{
title: '分類',
docs: 'https://docs.rsshub.app/routes/new-media##dian-ta-shao-nu',
docs: 'https://docs.rsshub.app/routes/new-media#dian-ta-shao-nv',
source: ['/posts/:category'],
target: '/agirls/:category',
},
{
title: '精選主題列表',
docs: 'https://docs.rsshub.app/routes/new-media##dian-ta-shao-nu',
docs: 'https://docs.rsshub.app/routes/new-media#dian-ta-shao-nv',
source: ['/', '/topic'],
target: '/agirls/topic_list',
},
{
title: '精选主题',
docs: 'https://docs.rsshub.app/routes/new-media#dian-ta-shao-nv',
source: ['/topic/:topic'],
target: '/agirls/topic/:topic',
},
],
},
};
3 changes: 2 additions & 1 deletion lib/v2/agirls/router.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module.exports = function (router) {
module.exports = (router) => {
router.get('/topic/:topic', require('./topic'));
router.get('/topic_list', require('./topic_list'));
router.get('/:category?', require('./index'));
};
31 changes: 31 additions & 0 deletions lib/v2/agirls/topic.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { baseUrl, parseArticle } = require('./utils');

module.exports = async (ctx) => {
const { topic } = ctx.params;
const link = `${baseUrl}/topic/${topic}`;
const response = await got(link);

const $ = cheerio.load(response.data);
const ldJson = JSON.parse($('script[type="application/ld+json"]').text());
const list = $('.ag-post-item__link')
.toArray()
.map((item) => {
item = $(item);
return {
title: item.text().trim(),
link: `${baseUrl}${item.attr('href')}`,
};
});

const items = await Promise.all(list.map((item) => ctx.cache.tryGet(item.link, () => parseArticle(item))));

ctx.state.data = {
title: $('head title').text().trim(),
link,
description: ldJson['@graph'][0].description,
item: items,
language: $('html').attr('lang'),
};
};
15 changes: 1 addition & 14 deletions lib/v2/agirls/topic_list.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@ module.exports = async (ctx) => {
const category = 'topic';
const link = `${baseUrl}/${category}`;

const response = await got({
url: `${baseUrl}/${category}`,
headers: {
Referer: baseUrl,
},
});
const response = await got(`${baseUrl}/${category}`);

const $ = cheerio.load(response.data);

Expand All @@ -33,12 +28,4 @@ module.exports = async (ctx) => {
item: items,
language: $('html').attr('lang'),
};

ctx.state.json = {
title: $('head title').text().trim(),
link,
description: $('head meta[name=description]').attr('content'),
item: items,
language: $('html').attr('lang'),
};
};
10 changes: 2 additions & 8 deletions lib/v2/agirls/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,8 @@ const { parseDate } = require('@/utils/parse-date');

const baseUrl = 'https://agirls.aotter.net';

const parseArticle = async (item, referer, cookieJar) => {
const detailResponse = await got({
url: item.link,
headers: {
Referer: referer,
},
cookieJar,
});
const parseArticle = async (item) => {
const detailResponse = await got(item.link);
const content = cheerio.load(detailResponse.data);

item.category = [
Expand Down
139 changes: 139 additions & 0 deletions lib/v2/hket/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');
const path = require('path');
const { art } = require('@/utils/render');

const urlMap = {
srac: {
baseUrl: 'https://china.hket.com',
},
sran: {
baseUrl: 'https://inews.hket.com',
},
srat: {
baseUrl: 'https://topick.hket.com',
},
sraw: {
baseUrl: 'https://wealth.hket.com',
},
};

module.exports = async (ctx) => {
const { category = 'sran001' } = ctx.params;
const baseUrl = urlMap[category.substring(0, 4)].baseUrl;

const { data: response } = await got(`${baseUrl}/${category}`);

const $ = cheerio.load(response);

const list = $('div.listing-title > a')
.toArray()
.map((item) => {
item = $(item);
return {
title: item.text().trim(),
link: item.attr('href').startsWith('/')
? // remove tracking parameters
baseUrl + item.attr('href').split('?')[0].substring(0, item.attr('href').lastIndexOf('/'))
: item.attr('href').split('?')[0].substring(0, item.attr('href').lastIndexOf('/')),
};
});

const items = await Promise.all(
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
if (item.link.startsWith('https://invest.hket.com/') || item.link.startsWith('https://ps.hket.com/')) {
let data;

if (item.link.startsWith('https://invest.hket.com/')) {
data = await got.post('https://invest.hket.com/content-api-middleware/content', {
headers: {
referer: item.link,
},
json: {
id: item.link.split('/').pop(),
channel: 'invest',
},
});
} else {
data = await got('https://data02.hket.com/content', {
headers: {
referer: item.link,
},
searchParams: {
id: item.link.split('/').pop(),
channel: 'epc',
},
});
}
data = data.data;

item.pubDate = timezone(parseDate(data.displayDate), +8);
item.updated = timezone(parseDate(data.lastModifiedDate), +8);
item.author = data.authors?.map((e) => e.name).join(', ');
item.description = data.content.full || data.content.partial;
item.category = data.contentTags?.map((e) => e.name);

return item;
}

const { data: response } = await got(item.link);
const $ = cheerio.load(response);

item.category = $('.contentTags-container > .hotkey-container-wrapper > .hotkey-container > a')
.toArray()
.map((e) => $(e).text().trim());

// remove unwanted elements
$('source').remove();
$('p.article-detail_caption, .article-extend-button, span.click-to-enlarge').remove();
$('.loyalty-promotion-container, .relatedContents-container, .article-details-center-sharing-btn, .article-detail_login').remove();
$('.gallery-related-container, .contentTags-container').remove();
$('.listing-widget-126, div.template-default.hket-row.no-padding.detail-widget').remove();

// remove ads
$('.ad_MobileMain, .adunit, .native-ad').remove();

$('span').each((_, e) => {
if ($(e).text().startsWith('+')) {
$(e).remove();
}
});

// fix lazyload image and caption
$('img').each((_, e) => {
e = $(e);
e.replaceWith(
art(path.join(__dirname, 'templates/image.art'), {
alt: e.attr('data-alt'),
src: e.attr('data-src') ?? e.attr('src'),
})
);
});

item.description = $('div.article-detail-body-container').html();
item.pubDate = timezone(parseDate($('.article-details-info-container_date, .publish-date-time').text().trim()), +8);

return item;
})
)
);

ctx.state.data = {
title: $('head meta[name=title]').attr('content').trim(),
link: baseUrl + '/' + category,
description: $('head meta[name=description]').attr('content').trim(),
item: items,
language: 'zh-hk',
};

ctx.state.json = {
title: $('head meta[name=title]').attr('content').trim(),
link: baseUrl + '/' + category,
description: $('head meta[name=description]').attr('content').trim(),
item: items,
language: 'zh-hk',
};
};
3 changes: 3 additions & 0 deletions lib/v2/hket/maintainer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = {
'/:category?': ['TonyRL'],
};
45 changes: 45 additions & 0 deletions lib/v2/hket/radar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
module.exports = {
'hket.com': {
_name: '香港经济日报',
china: [
{
title: '新闻',
docs: 'https://docs.rsshub.app/routes/traditional-media#xiang-gang-jing-ji-ri-bao',
source: ['/:category/*'],
target: '/hket/:category',
},
],
inews: [
{
title: '新闻',
docs: 'https://docs.rsshub.app/routes/traditional-media#xiang-gang-jing-ji-ri-bao',
source: ['/:category/*'],
target: '/hket/:category',
},
],
topick: [
{
title: '新闻',
docs: 'https://docs.rsshub.app/routes/traditional-media#xiang-gang-jing-ji-ri-bao',
source: ['/:category/*'],
target: '/hket/:category',
},
],
wealth: [
{
title: '新闻',
docs: 'https://docs.rsshub.app/routes/traditional-media#xiang-gang-jing-ji-ri-bao',
source: ['/:category/*'],
target: '/hket/:category',
},
],
www: [
{
title: '新闻',
docs: 'https://docs.rsshub.app/routes/traditional-media#xiang-gang-jing-ji-ri-bao',
source: ['/'],
target: '/hket',
},
],
},
};
Loading

0 comments on commit a9e37c6

Please sign in to comment.