Skip to content

Commit

Permalink
Merge pull request #345 from HebaruSan/fix/archiveorg-timeouts
Browse files Browse the repository at this point in the history
Stop attempting archive.org download counts after timeout
  • Loading branch information
HebaruSan authored Oct 19, 2024
2 parents 093ce5e + 73f661a commit 9831903
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
29 changes: 20 additions & 9 deletions netkan/netkan/download_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from datetime import date

import requests
from requests.exceptions import ConnectTimeout

from .utils import repo_file_add_or_changed, legacy_read_text
from .repos import CkanMetaRepo
Expand Down Expand Up @@ -183,6 +184,7 @@ class InternetArchiveBatchedQuery:

def __init__(self) -> None:
self.ids: Dict[str, str] = {}
self.connect_timed_out = False

def empty(self) -> bool:
return len(self.ids) == 0
Expand All @@ -196,15 +198,24 @@ def add(self, ckan: Ckan) -> None:
def get_result(self, counts: Optional[Dict[str, int]] = None) -> Dict[str, int]:
if counts is None:
counts = {}
result = requests.get(self.IARCHIVE_API + ','.join(self.ids.values()),
timeout=60).json()
for ckan_ident, ia_ident in self.ids.items():
try:
counts[ckan_ident] = counts.get(ckan_ident, 0) + result[ia_ident]['all_time']
except KeyError as exc:
logging.error('InternetArchive id not found in downloads result: %s',
ia_ident, exc_info=exc)
return counts
if self.connect_timed_out:
return counts
try:
result = requests.get(self.IARCHIVE_API + ','.join(self.ids.values()),
timeout=60).json()
for ckan_ident, ia_ident in self.ids.items():
try:
counts[ckan_ident] = counts.get(ckan_ident, 0) + result[ia_ident]['all_time']
except KeyError as exc:
logging.error('InternetArchive id not found in downloads result: %s',
ia_ident, exc_info=exc)
return counts
except ConnectTimeout as exc:
# Cleanly turn off archive.org counting while the downtime continues
logging.error('Failed to get counts from archive.org',
exc_info=exc)
self.connect_timed_out = True
return counts


class SourceForgeQuerier:
Expand Down
2 changes: 1 addition & 1 deletion netkan/netkan/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def version(self) -> Version:
def download(self) -> str:
download = self._raw.get('download')
if isinstance(download, list):
return download[0] if len(download) > 0 else None
return download[0] if isinstance(download[0], str) and len(download) > 0 else ''
return download

# Provide all downloads with alternate property in case we need them,
Expand Down

0 comments on commit 9831903

Please sign in to comment.