From 1d98f8c2368ed8f76a02e7367826205abc4d2d49 Mon Sep 17 00:00:00 2001 From: Illia Kaialainien Date: Sat, 16 Dec 2023 13:13:57 +0200 Subject: [PATCH] added docstrings --- server/app.py | 1 + server/database.py | 26 ++++++++++++++++++++++++++ server/routes/search_result.py | 13 +++++++++++++ server/scraper.py | 11 +++++++++++ 4 files changed, 51 insertions(+) diff --git a/server/app.py b/server/app.py index 6589979..b85effc 100644 --- a/server/app.py +++ b/server/app.py @@ -1,4 +1,5 @@ from fastapi import FastAPI + from server.routes.search_result import router as SearchResultRouter diff --git a/server/database.py b/server/database.py index 2637b7c..786fbd7 100644 --- a/server/database.py +++ b/server/database.py @@ -11,6 +11,7 @@ def search_results_helper(search_result): + """Take each article and convert it to JSONable format""" return { "id": str(search_result["_id"]), "link": search_result["link"], @@ -25,6 +26,14 @@ def search_results_helper(search_result): async def add_search_results(results: list[dict]): + """Add articles to database + + Check if each article does not exist in database. If it exists, + add search words to article's 'tags' field. Else, article will + be added to a database. + :param results: List of new articles + :return: List of articles added to a database + """ new_results = [] for result in results: if await search_results_collection.find_one({"link": result['link']}): @@ -39,6 +48,7 @@ async def add_search_results(results: list[dict]): async def retrieve_search_result_by_id(id_: str): + """Find concrete article in a database by ID""" try: result = await search_results_collection.find_one({"_id": ObjectId(id_)}) if result: @@ -48,6 +58,15 @@ async def retrieve_search_result_by_id(id_: str): async def retrieve_search_results_by_tags(tags: list[str]): + """Find articles by tags + + Take search words and check if database contain articles, + which have more than half of words in 'tags' fields matches + with words in search query. If database have them, return + this articles. + :param tags: List of search words + :return: List of articles + """ matched_result = [] results = search_results_collection.find() search_tags = set(tags) @@ -59,6 +78,7 @@ async def retrieve_search_results_by_tags(tags: list[str]): async def retrieve_newest_search_results(): + """Get 20 newest articles from database""" results = [] async for result in search_results_collection.find().sort('date', -1).limit(20): results.append(search_results_helper(result)) @@ -66,6 +86,12 @@ async def retrieve_newest_search_results(): async def update_content_of_article(id_: str, content: list[list]): + """Add content to article + + :param id_: ID of existing article + :param content: List of content + :return: Article with content + """ await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}}) article = await search_results_collection.find_one({'_id': ObjectId(id_)}) return search_results_helper(article) diff --git a/server/routes/search_result.py b/server/routes/search_result.py index c5a65ab..1e57ce7 100644 --- a/server/routes/search_result.py +++ b/server/routes/search_result.py @@ -16,6 +16,13 @@ @router.get("/", status_code=status.HTTP_200_OK, response_model=list[ArticleModel]) async def get_search_results(find: str | None = None) -> list[ArticleModel]: + """Find articles by search query + + Get list of articles which match with search query from database. + If count of articles is less than 20, scrap new articles and add + them to a database. If 'find' param is empty, return 20 newest + articles. + """ if find: results = await retrieve_search_results_by_tags(find.split()) if len(results) < 20: @@ -28,6 +35,12 @@ async def get_search_results(find: str | None = None) -> list[ArticleModel]: @router.get("/{id}", status_code=status.HTTP_200_OK, response_model=ExtendArticleModel) async def get_article(id: str) -> ExtendArticleModel: + """Get concrete article with content + + Find article by ID in database and if it exists, check if it + has content in 'content' field. If it is, return it, else scrap + this content. If article is not exist in db, return 404. + """ result = await retrieve_search_result_by_id(id) if result: if not result['content']: diff --git a/server/scraper.py b/server/scraper.py index b98d87e..f856388 100644 --- a/server/scraper.py +++ b/server/scraper.py @@ -13,10 +13,16 @@ def scrap_from_search(search: str) -> list[dict]: + """Take search query and return list of articles + + :param search: Query string with what user want to find + :return: Result of search - list of articles + """ link_src = f'https://www.laptopmag.com/search?searchTerm={search}&articleType=best-pick' page_src = requests.get(link_src, headers) soup_src = BeautifulSoup(page_src.content, 'html.parser') + # parse search result laptops = soup_src.find_all('div', class_='listingResult') laptops_data = [] for laptop in laptops: @@ -34,6 +40,11 @@ def scrap_from_search(search: str) -> list[dict]: def scrap_content(link: str) -> list[list]: + """Parse concrete article's content + + :param link: URL of article + :return: list of content where each list contain type of content and content itself + """ page_src = requests.get(link, headers) soup_src = BeautifulSoup(page_src.content, 'html.parser')