diff --git a/server/database.py b/server/database.py index 296b57d..de43b56 100644 --- a/server/database.py +++ b/server/database.py @@ -1,8 +1,11 @@ +from datetime import datetime, timedelta + import motor.motor_asyncio from bson import ObjectId from bson.errors import InvalidId from server.scraper import scrap_from_search +from server.models.search_result import Period MONGO_DETAILS = 'mongodb://localhost:27017' @@ -56,37 +59,25 @@ async def retrieve_search_result_by_id(id_: str): return -async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int): +async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int, period: Period): """Find articles by tags Take search words and check if database contain articles, - which have more than :percentage: of words in 'tags' fields matches + which have more than 'percentage' of words in 'tags' fields matches with words in search query. If database have them, return paginated articles and total amount of them. :param limit: Page size :param page: Number of page :param tags: List of search words + :param period: Filtering period :return: Count and List of articles """ - percentage = 0.75 tags = list(set(tags)) filter_expression = { - '$expr': { - '$function': { - 'body': """ - function(search, document, percentage) { - const searchTags = search; - const documentTags = document; - const intersection = documentTags.filter(tag => searchTags.includes(tag)); - return intersection.length >= (searchTags.length * percentage); - } - """, - 'args': [tags, '$tags', percentage], - 'lang': 'js' - } - } + **resolve_period_expression(period), + **resolve_tags_expression(tags) } - results = search_results_collection.find(filter_expression).skip((page - 1) * limit).limit(limit) + results = search_results_collection.find(filter_expression).sort('date', -1).skip((page - 1) * limit).limit(limit) count = await search_results_collection.count_documents(filter_expression) return count, [search_results_helper(result) async for result in results] @@ -113,3 +104,41 @@ async def update_content_of_article(id_: str, content: list[list]): await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}}) article = await search_results_collection.find_one({'_id': ObjectId(id_)}) return search_results_helper(article) + + +def resolve_period_expression(period: Period) -> dict: + """Create expression based on Period from query""" + if period is Period.last_week: + end_date = datetime.now() + start_date = end_date - timedelta(days=7) + end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + return {'date': {'$gte': start_date, '$lt': end_date}} + if period is Period.last_month: + end_date = datetime.now() + start_date = end_date - timedelta(days=30) + end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + return {'date': {'$gte': start_date, '$lt': end_date}} + return {} + + +def resolve_tags_expression(tags: list[str]) -> dict: + """Create expression based on search tags""" + percentage = 0.75 + return { + '$expr': { + '$function': { + 'body': """ + function(search, document, percentage) { + const searchTags = search; + const documentTags = document; + const intersection = documentTags.filter(tag => searchTags.includes(tag)); + return intersection.length >= (searchTags.length * percentage); + } + """, + 'args': [tags, '$tags', percentage], + 'lang': 'js' + } + } + } diff --git a/server/models/search_result.py b/server/models/search_result.py index d4dd51e..771f4d7 100644 --- a/server/models/search_result.py +++ b/server/models/search_result.py @@ -1,4 +1,5 @@ from datetime import datetime +from enum import Enum from pydantic import BaseModel, HttpUrl @@ -21,3 +22,9 @@ class SearchResponseModel(BaseModel): class ExtendArticleModel(ArticleModel): content: list[list] = [] + + +class Period(str, Enum): + last_week = "last-week" + last_month = "last-month" + all = "all" diff --git a/server/routes/search_result.py b/server/routes/search_result.py index 2f7d64e..14def5e 100644 --- a/server/routes/search_result.py +++ b/server/routes/search_result.py @@ -1,7 +1,9 @@ -from fastapi import APIRouter, status, HTTPException +from typing import Annotated + +from fastapi import APIRouter, status, HTTPException, Query from server.scraper import scrap_content -from server.models.search_result import SearchResponseModel, ExtendArticleModel +from server.models.search_result import SearchResponseModel, ExtendArticleModel, Period from server.database import ( update_search_results, retrieve_search_result_by_id, @@ -15,7 +17,10 @@ @router.get("/search", status_code=status.HTTP_200_OK, response_model=SearchResponseModel) -async def get_search_results(find: str | None = None, page: int = 1, limit: int = 5): +async def get_search_results(find: Annotated[str | None, Query(description='Write search query here')] = None, + page: Annotated[int, Query(ge=1)] = 1, + limit: Annotated[int, Query(ge=1, le=10)] = 5, + period: Period = Period.all): """Find articles by search query Get list of articles which match with search query from database. @@ -24,10 +29,10 @@ async def get_search_results(find: str | None = None, page: int = 1, limit: int articles. """ if find: - count, results = await retrieve_search_results_by_tags(find.split(), page, limit) + count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period) if count < 5: await update_search_results(find) - count, results = await retrieve_search_results_by_tags(find.split(), page, limit) + count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period) return {'count': count, 'results': results} count, results = await retrieve_newest_search_results(page, limit) return {'count': count, 'results': results} diff --git a/server/scraper.py b/server/scraper.py index f9cf345..f856388 100644 --- a/server/scraper.py +++ b/server/scraper.py @@ -63,5 +63,3 @@ def scrap_content(link: str) -> list[list]: image = block.find('img').get('data-pin-media') content.append(('image', image)) return content - -