Skip to content

Commit

Permalink
Avoid .md files being picked up as a duplicate URL
Browse files Browse the repository at this point in the history
  • Loading branch information
arkon committed Sep 15, 2023
1 parent 3251844 commit f627d05
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion dist/index.js

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/util/urls.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ describe('urlsFromIssueBody', () => {
['foo https://github.com/tachiyomiorg bar', []],
['foo user-images.githubusercontent.com/something bar', []],
['foo www.gist.github.com/something bar', []],
[
'foo https://github.com/tachiyomiorg/tachiyomi-extensions/blob/master/REMOVED_SOURCES.md something',
[],
],
] as const
).forEach(([body, expectedUrls]) => {
expect(urlsFromIssueBody(body)).toStrictEqual(expectedUrls);
Expand All @@ -65,7 +69,7 @@ describe('cleanUrl', () => {
'https://www.tachiyomi.org/subpath?foo=bar#hash',
'tachiyomi.org/subpath?foo=bar#hash',
],
['http://google.com/', 'google.com/'],
['http://google.com/', 'google.com'],
['https://GITHUB.com', 'github.com'],
] as const
).forEach(([url, expectedUrl]) => {
Expand Down
12 changes: 9 additions & 3 deletions src/util/urls.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
const URL_REGEX = /(?:https?:\/\/)?(?:[-\w]+\.)+[a-z]{2,18}/gi;
const URL_REGEX = /(?:https?:\/\/)?(?:[-\w]+\.)+[a-z]{2,18}\/?/gi;
const EXCLUSION_LIST = [
'tachiyomi.org',
'github.com',
'user-images.githubusercontent.com',
'gist.github.com',
];
// Also file name extensions
const EXCLUDED_DOMAINS = ['.md'];

export function urlsFromIssueBody(body: string): string[] {
const urls = Array.from(body.matchAll(URL_REGEX))
.map((url) => cleanUrl(url[0]))
.filter((url) => !EXCLUSION_LIST.includes(url));
.filter((url) => !EXCLUSION_LIST.includes(url))
.filter((url) => EXCLUDED_DOMAINS.every((domain) => !url.endsWith(domain)));
return Array.from(new Set(urls));
}

export function cleanUrl(url: string): string {
return url.replace(/(https?:\/\/)?(www\.)?/g, '').toLowerCase();
return url
.toLowerCase()
.replace(/(https?:\/\/)?(www\.)?/g, '')
.replace(/\/$/, '');
}

0 comments on commit f627d05

Please sign in to comment.