Skip to content

Commit 9cc1004

Browse files
authored
fix: correctly handle skip_prefix logic for site_alts (#1092)
Fixes #1091
1 parent cdf0b50 commit 9cc1004

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

app/utils/results.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import warnings
1313

1414
SKIP_ARGS = ['ref_src', 'utm']
15-
SKIP_PREFIX = ['//www.', '//mobile.', '//m.', 'www.', 'mobile.', 'm.']
15+
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
1616
GOOG_STATIC = 'www.gstatic.com'
1717
G_M_LOGO_URL = 'https://www.gstatic.com/m/images/icons/googleg.gif'
1818
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
@@ -152,11 +152,12 @@ def get_first_link(soup: BeautifulSoup) -> str:
152152
return ''
153153

154154

155-
def get_site_alt(link: str) -> str:
155+
def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
156156
"""Returns an alternative to a particular site, if one is configured
157157
158158
Args:
159-
link: A string result URL to check against the SITE_ALTS map
159+
link: A string result URL to check against the site_alts map
160+
site_alts: A map of site alternatives to replace with. defaults to SITE_ALTS
160161
161162
Returns:
162163
str: An updated (or ignored) result link
@@ -178,9 +179,9 @@ def get_site_alt(link: str) -> str:
178179
# "https://medium.com/..." should match, but "philomedium.com" should not)
179180
hostcomp = f'{parsed_link.scheme}://{hostname}'
180181

181-
for site_key in SITE_ALTS.keys():
182+
for site_key in site_alts.keys():
182183
site_alt = f'{parsed_link.scheme}://{site_key}'
183-
if not hostname or site_alt not in hostcomp or not SITE_ALTS[site_key]:
184+
if not hostname or site_alt not in hostcomp or not site_alts[site_key]:
184185
continue
185186

186187
# Wikipedia -> Wikiless replacements require the subdomain (if it's
@@ -193,9 +194,8 @@ def get_site_alt(link: str) -> str:
193194
elif 'medium' in hostname and len(subdomain) > 0:
194195
hostname = f'{subdomain}.{hostname}'
195196

196-
parsed_alt = urlparse.urlparse(SITE_ALTS[site_key])
197-
link = link.replace(hostname, SITE_ALTS[site_key]) + params
198-
197+
parsed_alt = urlparse.urlparse(site_alts[site_key])
198+
link = link.replace(hostname, site_alts[site_key]) + params
199199
# If a scheme is specified in the alternative, this results in a
200200
# replaced link that looks like "https://http://altservice.tld".
201201
# In this case, we can remove the original scheme from the result
@@ -205,9 +205,12 @@ def get_site_alt(link: str) -> str:
205205

206206
for prefix in SKIP_PREFIX:
207207
if parsed_alt.scheme:
208-
link = link.replace(prefix, '')
208+
# If a scheme is specified, remove everything before the
209+
# first occurence of it
210+
link = f'{parsed_alt.scheme}{link.split(parsed_alt.scheme, 1)[-1]}'
209211
else:
210-
link = link.replace(prefix, '//')
212+
# Otherwise, replace the first occurrence of the prefix
213+
link = link.replace(prefix, '//', 1)
211214
break
212215

213216
return link

test/test_results.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from app.filter import Filter
33
from app.models.config import Config
44
from app.models.endpoint import Endpoint
5+
from app.utils import results
56
from app.utils.session import generate_key
67
from datetime import datetime
78
from dateutil.parser import ParserError, parse
@@ -136,3 +137,22 @@ def test_leading_slash_search(client):
136137
continue
137138

138139
assert link['href'].startswith(f'{Endpoint.search}')
140+
141+
142+
def test_site_alt_prefix_skip():
143+
# Ensure prefixes are skipped correctly for site alts
144+
145+
# default silte_alts (farside.link)
146+
assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'
147+
assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'
148+
assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'
149+
150+
test_site_alts = {
151+
'reddit.com': 'reddit.endswithmobile.domain',
152+
'twitter.com': 'https://twitter.endswithm.domain',
153+
'youtube.com': 'http://yt.endswithwww.domain',
154+
}
155+
# Domains with part of SKIP_PREFIX in them
156+
assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'
157+
assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'
158+
assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'

0 commit comments

Comments
 (0)