Skip to content

Commit

Permalink
Merge pull request #383 from sparcs-kaist/hotfix/portal-crawl-no-src-img
Browse files Browse the repository at this point in the history
Fix no src img exception
  • Loading branch information
injoonH authored Apr 6, 2023
2 parents bdaeb0b + 191bfd0 commit bc03af6
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions apps/core/management/scripts/portal_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ def _get_new_url_and_save_to_s3(url, session):
hash = enc.hexdigest()[:20]
filename = f'files/portal_image_{hash}.{url.split("_")[-1]}'

if url.startswith("/board"):
url = str(BASE_URL) + url

r = session.get(url, stream=True, cookies=COOKIES)
if r.status_code == 200:
s3 = boto3.client("s3")
Expand All @@ -112,6 +115,8 @@ def _save_portal_image(html, session):
soup = bs(html, "lxml")
for child in soup.find_all("img", {}):
old_url = child.attrs.get("src")
if old_url is None:
continue
new_url = _get_new_url_and_save_to_s3(old_url, session)
child["src"] = new_url

Expand Down

0 comments on commit bc03af6

Please sign in to comment.