Skip to content

Commit 058643b

Browse files
authored
Merge pull request #157 from nanos/use-xxhash
Try to use xxHash to hash robots cache file names
2 parents 8ac868f + 939e775 commit 058643b

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

find_posts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import defusedxml.ElementTree as ET
1717
import urllib.robotparser
1818
from urllib.parse import urlparse
19-
import hashlib
19+
import xxhash
2020

2121
logger = logging.getLogger("FediFetcher")
2222
robotParser = urllib.robotparser.RobotFileParser()
@@ -1076,7 +1076,7 @@ def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5):
10761076
return result
10771077

10781078
def get_robots_txt_cache_path(robots_url):
1079-
hash = hashlib.sha256(robots_url.encode('utf-8'))
1079+
hash = xxhash.xxh128(robots_url.encode('utf-8'))
10801080
return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt')
10811081

10821082
def get_cached_robots(robots_url):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ requests==2.32.0
1212
six==1.16.0
1313
smmap==5.0.0
1414
urllib3==1.26.19
15+
xxhash==3.4.1

0 commit comments

Comments
 (0)