Skip to content

Commit

Permalink
added docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
LLkaia committed Dec 16, 2023
1 parent 48c893a commit 1d98f8c
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 0 deletions.
1 change: 1 addition & 0 deletions server/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from fastapi import FastAPI

from server.routes.search_result import router as SearchResultRouter


Expand Down
26 changes: 26 additions & 0 deletions server/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


def search_results_helper(search_result):
"""Take each article and convert it to JSONable format"""
return {
"id": str(search_result["_id"]),
"link": search_result["link"],
Expand All @@ -25,6 +26,14 @@ def search_results_helper(search_result):


async def add_search_results(results: list[dict]):
"""Add articles to database
Check if each article does not exist in database. If it exists,
add search words to article's 'tags' field. Else, article will
be added to a database.
:param results: List of new articles
:return: List of articles added to a database
"""
new_results = []
for result in results:
if await search_results_collection.find_one({"link": result['link']}):
Expand All @@ -39,6 +48,7 @@ async def add_search_results(results: list[dict]):


async def retrieve_search_result_by_id(id_: str):
"""Find concrete article in a database by ID"""
try:
result = await search_results_collection.find_one({"_id": ObjectId(id_)})
if result:
Expand All @@ -48,6 +58,15 @@ async def retrieve_search_result_by_id(id_: str):


async def retrieve_search_results_by_tags(tags: list[str]):
"""Find articles by tags
Take search words and check if database contain articles,
which have more than half of words in 'tags' fields matches
with words in search query. If database have them, return
this articles.
:param tags: List of search words
:return: List of articles
"""
matched_result = []
results = search_results_collection.find()
search_tags = set(tags)
Expand All @@ -59,13 +78,20 @@ async def retrieve_search_results_by_tags(tags: list[str]):


async def retrieve_newest_search_results():
"""Get 20 newest articles from database"""
results = []
async for result in search_results_collection.find().sort('date', -1).limit(20):
results.append(search_results_helper(result))
return results


async def update_content_of_article(id_: str, content: list[list]):
"""Add content to article
:param id_: ID of existing article
:param content: List of content
:return: Article with content
"""
await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}})
article = await search_results_collection.find_one({'_id': ObjectId(id_)})
return search_results_helper(article)
13 changes: 13 additions & 0 deletions server/routes/search_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@

@router.get("/", status_code=status.HTTP_200_OK, response_model=list[ArticleModel])
async def get_search_results(find: str | None = None) -> list[ArticleModel]:
"""Find articles by search query
Get list of articles which match with search query from database.
If count of articles is less than 20, scrap new articles and add
them to a database. If 'find' param is empty, return 20 newest
articles.
"""
if find:
results = await retrieve_search_results_by_tags(find.split())
if len(results) < 20:
Expand All @@ -28,6 +35,12 @@ async def get_search_results(find: str | None = None) -> list[ArticleModel]:

@router.get("/{id}", status_code=status.HTTP_200_OK, response_model=ExtendArticleModel)
async def get_article(id: str) -> ExtendArticleModel:
"""Get concrete article with content
Find article by ID in database and if it exists, check if it
has content in 'content' field. If it is, return it, else scrap
this content. If article is not exist in db, return 404.
"""
result = await retrieve_search_result_by_id(id)
if result:
if not result['content']:
Expand Down
11 changes: 11 additions & 0 deletions server/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@


def scrap_from_search(search: str) -> list[dict]:
"""Take search query and return list of articles
:param search: Query string with what user want to find
:return: Result of search - list of articles
"""
link_src = f'https://www.laptopmag.com/search?searchTerm={search}&articleType=best-pick'
page_src = requests.get(link_src, headers)
soup_src = BeautifulSoup(page_src.content, 'html.parser')

# parse search result
laptops = soup_src.find_all('div', class_='listingResult')
laptops_data = []
for laptop in laptops:
Expand All @@ -34,6 +40,11 @@ def scrap_from_search(search: str) -> list[dict]:


def scrap_content(link: str) -> list[list]:
"""Parse concrete article's content
:param link: URL of article
:return: list of content where each list contain type of content and content itself
"""
page_src = requests.get(link, headers)
soup_src = BeautifulSoup(page_src.content, 'html.parser')

Expand Down

0 comments on commit 1d98f8c

Please sign in to comment.