Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add strict typeCheckingMode #413

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5368f6b
refactor(type): add pyhton typeCheckingMode strict
ddungiii Sep 14, 2023
bb39233
chore(package): Add install mypy in pipfile
ddungiii Sep 14, 2023
c49a4bd
chore(package): Add django, DRF stubs in pipfile
DoyunShin Sep 21, 2023
83521f0
feat(mypy): Add mypy.ini
DoyunShin Sep 21, 2023
f35ba53
feat(mypy): mypy follow_imports removed
DoyunShin Sep 21, 2023
cfb8bb8
feat(vscode): Set mypy as a default linter.
DoyunShin Sep 21, 2023
78a1605
fix(typo): NewAra
DoyunShin Sep 21, 2023
92a7112
feat(mypy): Add typing-extensions
Qndndn Sep 21, 2023
4804e89
fix(mypy): Removed typing-extensions from package
Qndndn Sep 21, 2023
1459ca1
Merge pull request #420 from sparcs-kaist/refactor/type-hint-common-s…
Qndndn Sep 21, 2023
feba307
fix(mypy): Add all stubs, and ignore no stubs.
DoyunShin Sep 21, 2023
91f6589
Merge pull request #415 from sparcs-kaist/refactor/type-hint-django-s…
DoyunShin Sep 22, 2023
2adca8f
refactor(type): hinting apps.core.models.article
DoyunShin Oct 5, 2023
0112c90
refactor(type): hinting comment, ara
Qndndn Oct 5, 2023
6cf5250
Merge branch 'refactor/type-hint-hyooyh' into refactor/type-hint
Qndndn Oct 5, 2023
c15faed
refactor(type): fix articles in apps
DoyunShin Oct 5, 2023
8f7747b
Merge branch 'refactor/type-hint-roul' into refactor/type-hint
DoyunShin Oct 6, 2023
1bda448
Merge branch 'develop' into refactor/type-hint
injoonH Oct 26, 2023
1fd40ae
fix(hidden): remove unintended init
injoonH Oct 26, 2023
fd79160
fix(comment): remove attribute of enum
injoonH Oct 26, 2023
99990be
feat(type): follow PEP for typing
injoonH Nov 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,6 @@ config.cnf
# Created by https://www.gitignore.io/api/visualstudiocode
# Edit at https://www.gitignore.io/?templates=visualstudiocode

### VisualStudioCode ###
.vscode/*

### VisualStudioCode Patch ###
# Ignore all local history of files
Expand Down
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"mypy.enabled": true,
"mypy.configFile": "mypy.ini",
"mypy.dmypyExecutable": "/root/.local/share/virtualenvs/www-ZVUe9YDU/bin/dmypy",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

python 경로는 사람마다 달라서 다른 방식으로 넣는 게 나을 것 같습니다.

}
7 changes: 7 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ isort = "*"
pre-commit = "*"
ipython = "*"
pytest-cov = "*"
mypy = "*"
django-stubs = "~=4.2"
djangorestframework-stubs = "~=3.14"
typing-extensions = "*"
types-bleach = "*"
types-python-dateutil = "*"
types-tqdm = "*"

[requires]
python_version = "3.11"
1,368 changes: 822 additions & 546 deletions Pipfile.lock

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion apps/core/filters/article.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from django_filters.rest_framework import CharFilter, FilterSet

from apps.core.documents import ArticleDocument
from apps.core.models import Article

if TYPE_CHECKING:
from ara.db.models import MetaDataQuerySet


class ArticleFilter(FilterSet):
board = CharFilter(field_name="parent_board__slug", lookup_expr="exact")
Expand Down Expand Up @@ -60,5 +67,7 @@ class Meta:
)

@staticmethod
def get_main_search__contains(queryset, name, value):
def get_main_search__contains(
queryset: MetaDataQuerySet, name: str, value: str
) -> MetaDataQuerySet:
return queryset.filter(id__in=ArticleDocument.get_main_search_id_set(value))
6 changes: 3 additions & 3 deletions apps/core/management/commands/crawl_portal_manual.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import timedelta
from datetime import datetime, timedelta

from dateutil.parser import parse as date_parse
from django.core.management import BaseCommand
Expand All @@ -21,8 +21,8 @@ def parse_date(self, options, arg_name):
return None

def handle(self, *args, **options):
start = self.parse_date(options, "start") or timezone.datetime.today().date()
end = self.parse_date(options, "end") or timezone.datetime.today().date()
start = self.parse_date(options, "start") or datetime.today().date()
end = self.parse_date(options, "end") or datetime.today().date()

print("start:", start, "end:", end)
dates = []
Expand Down
95 changes: 59 additions & 36 deletions apps/core/management/scripts/portal_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import re
import uuid
from datetime import datetime
from pytz import timezone as pytz_timezone

import boto3
import requests
Expand All @@ -11,6 +10,7 @@
from django.db import transaction
from django.utils import timezone
from django.utils.translation import gettext
from pytz import timezone as pytz_timezone
from tqdm import tqdm

from apps.core.models import Article
Expand Down Expand Up @@ -59,9 +59,11 @@ def _already_hyperlinked(html):
for child in soup.descendants:
name = getattr(child, "name", None)
if name:
linked = child.attrs.get("src") or child.attrs.get("href")
if linked:
tagged_links.append(linked)
attrs = getattr(child, "attrs", None)
if attrs:
injoonH marked this conversation as resolved.
Show resolved Hide resolved
linked = attrs.get("src") or attrs.get("href")
if linked:
tagged_links.append(linked)

return tagged_links

Expand Down Expand Up @@ -115,21 +117,30 @@ def _save_portal_image(html, session):

article_req = session.get(url, cookies=COOKIES)
soup = bs(article_req.text, "lxml")
writer_element = soup.find("th", text="작성자(소속)")
if writer_element:
writer = (
soup.find("th", text="작성자(소속)") # ??
.findNext("td")
.select("label")[0]
.contents[0]
.strip()
)
else:
writer = None

created_at_element = soup.find("th", text="작성일(조회수)")
if created_at_element:
created_at_str = (
soup.find("th", text="작성일(조회수)") # ??
.findNext("td")
.contents[0]
.strip()
.split("(")[0]
)
else:
created_at_str = None

writer = (
soup.find("th", text="작성자(소속)")
.findNext("td")
.select("label")[0]
.contents[0]
.strip()
)
created_at_str = (
soup.find("th", text="작성일(조회수)")
.findNext("td")
.contents[0]
.strip()
.split("(")[0]
)
created_at = (
datetime.strptime(created_at_str, "%Y.%m.%d %H:%M:%S")
.astimezone(KST)
Expand All @@ -140,16 +151,22 @@ def _save_portal_image(html, session):
trs = soup.select("table > tbody > tr")
html = None

from bs4.element import Tag
injoonH marked this conversation as resolved.
Show resolved Hide resolved

for tr in trs:
if len(list(tr.children)) == 3:
html = tr.find("td").prettify()
break
td = tr.find("td")
if isinstance(td, Tag): # td가 Tag 타입인지 확인
html = td.prettify()
break

if html is None:
for tr in trs:
if len(list(tr.children)) == 2:
html = tr.find("td").prettify()
break
td = tr.find("td")
if isinstance(td, Tag): # td가 Tag 타입인지 확인
html = td.prettify()
break

html = _save_portal_image(html, session)
html = _enable_hyperlink(html)
Expand All @@ -170,8 +187,10 @@ def _save_portal_image(html, session):

def crawl_hour(day=None):
# parameter에서 default로 바로 today()하면, 캐싱되어서 업데이트가 안됨
from django.utils import timezone
injoonH marked this conversation as resolved.
Show resolved Hide resolved

if day is None:
day = timezone.datetime.today().date()
day = timezone.now().date()

session = _login_kaist_portal()

Expand Down Expand Up @@ -239,7 +258,7 @@ def _get_board_today(page_num):

created_at_utc = info["created_at"].astimezone(timezone.utc)

if (
if last_portal_article_in_db is not None and (
created_at_utc < last_portal_article_in_db.created_at
or info["title"] == prev_title
):
Expand All @@ -250,7 +269,7 @@ def _get_board_today(page_num):
)

if user_exist:
user = user_exist.first().user
user = user_exist.first()
injoonH marked this conversation as resolved.
Show resolved Hide resolved
else:
user = get_user_model().objects.create(
username=str(uuid.uuid1()), is_active=False
Expand Down Expand Up @@ -280,17 +299,18 @@ def _get_board_today(page_num):
if not new_articles:
return
earliest_new_article = new_articles[-1]
is_same_day = (
last_portal_article_in_db.created_at.date()
== earliest_new_article.created_at.date()
)
is_same_title = last_portal_article_in_db.title == earliest_new_article.title
if last_portal_article_in_db:
is_same_day = (
last_portal_article_in_db.created_at.date()
== earliest_new_article.created_at.date()
)
is_same_title = last_portal_article_in_db.title == earliest_new_article.title

if is_same_day and is_same_title:
last_portal_article_in_db.created_at = earliest_new_article.created_at
last_portal_article_in_db.content = earliest_new_article.content
last_portal_article_in_db.save()
new_articles.pop()
if is_same_day and is_same_title:
last_portal_article_in_db.created_at = earliest_new_article.created_at
last_portal_article_in_db.content = earliest_new_article.content
last_portal_article_in_db.save()
new_articles.pop()

created_articles = Article.objects.bulk_create(new_articles)

Expand Down Expand Up @@ -343,8 +363,11 @@ def _get_board(page_num):
user_exist = UserProfile.objects.filter(
nickname=info["writer"], is_newara=False
)

from typing import Optional

if user_exist:
user = user_exist.first().user
user: Optional[UserProfile] = user_exist.first().user
injoonH marked this conversation as resolved.
Show resolved Hide resolved
else:
user = get_user_model().objects.create(
username=str(uuid.uuid1()), is_active=False
Expand Down
6 changes: 4 additions & 2 deletions apps/core/management/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@ def _get_best(days, period):

vote_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)

article_votes = defaultdict(int)
from typing import Any
injoonH marked this conversation as resolved.
Show resolved Hide resolved

article_votes: defaultdict[Any, int] = defaultdict(int)
injoonH marked this conversation as resolved.
Show resolved Hide resolved
for obj in vote_objs:
article_id, vote, _, _ = obj.split(":")
article_votes[article_id] += int(vote)

type_ = "hit"
hit_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)

article_hits = defaultdict(int)
article_hits: defaultdict[Any, int] = defaultdict(int)
injoonH marked this conversation as resolved.
Show resolved Hide resolved
for obj in hit_objs:
article_id, hit, _, _ = obj.split(":")
article_hits[article_id] += int(hit)
Expand Down
Loading
Loading