Merge pull request #5 from LLkaia/features/expr

Pagination, search and filtering
LLkaia · Jan 3, 2024 · 55f94d9 · 55f94d9
2 parents f0c320c + 96269f9
commit 55f94d9
Show file tree

Hide file tree

Showing 6 changed files with 115 additions and 77 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.11
+
+WORKDIR /usr/local/etc/lappy/
+COPY ./requirements.txt .
+RUN pip install -r requirements.txt
+COPY ./server ./server
+COPY ./main.py .
+
+CMD python3 main.py
diff --git a/README.md b/README.md
@@ -63,10 +63,10 @@ This is a microservice designed to fetch and serve news articles using FastAPI,
         ```
           http://localhost:8000/news/search?find=acer+aspire+7+review
         ```
-- `/news/search/{id}`: Show concrete article.
+- `/news/{id}`: Show concrete article.
 
     - Example:
 
         ```
-          http://localhost:8000/news/search/657c1690f253079b6f3ed074
+          http://localhost:8000/news/657c1690f253079b6f3ed074
         ```
diff --git a/server/app.py b/server/app.py
@@ -1,12 +1,10 @@
 from fastapi import FastAPI
-from fastapi_pagination import add_pagination
 
 from server.routes.search_result import router as SearchResultRouter
 
 
 app = FastAPI()
-add_pagination(app)
-app.include_router(SearchResultRouter, tags=["Search"], prefix="/news/search")
+app.include_router(SearchResultRouter, tags=["News"], prefix="/news")
 
 
 @app.get('/', tags=['Root'])

diff --git a/server/database.py b/server/database.py
@@ -1,11 +1,16 @@
+from datetime import datetime, timedelta
+
 import motor.motor_asyncio
 from bson import ObjectId
 from bson.errors import InvalidId
 
+from server.scraper import scrap_from_search
+from server.models.search_result import Period
+
 
-MONGO_DETAILS = 'mongodb://localhost:27017'
+MONGO_DETAILS = 'mongodb://mongodb:27017'
 client = motor.motor_asyncio.AsyncIOMotorClient(MONGO_DETAILS)
-db = client.news
+db = client.lappy
 
 search_results_collection = db.get_collection('search_results')
 
@@ -25,26 +30,23 @@ def search_results_helper(search_result):
     }
 
 
-async def add_search_results(results: list[dict]):
+async def update_search_results(search: str):
     """Add articles to database
 
     Check if each article does not exist in database. If it does,
     add search words to article's 'tags' field. Else, article will
     be added to a database.
-    :param results: List of new articles
+    :param search: Search query
     :return: List of articles added to a database
     """
-    new_results = []
+    results = scrap_from_search(search)
     for result in results:
         if await search_results_collection.find_one({"link": result['link']}):
             new_result = await search_results_collection.find_one({"link": result['link']})
             new_result["tags"] = list(set(new_result["tags"] + result['tags']))
             await search_results_collection.update_one({"_id": ObjectId(new_result["_id"])}, {"$set": new_result})
         else:
-            result = await search_results_collection.insert_one(result)
-            new_result = await search_results_collection.find_one({"_id": result.inserted_id})
-        new_results.append(search_results_helper(new_result))
-    return new_results
+            await search_results_collection.insert_one(result)
 
 
 async def retrieve_search_result_by_id(id_: str):
@@ -57,33 +59,39 @@ async def retrieve_search_result_by_id(id_: str):
         return
 
 
-async def retrieve_search_results_by_tags(tags: list[str]):
+async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int, period: Period):
     """Find articles by tags
 
     Take search words and check if database contain articles,
-    which have more than :percentage: of words in 'tags' fields matches
+    which have more than 'percentage' of words in 'tags' fields matches
     with words in search query. If database have them, return
-    this articles.
+    paginated articles and total amount of them.
+    :param limit: Page size
+    :param page: Number of page
     :param tags: List of search words
-    :return: List of articles
+    :param period: Filtering period
+    :return: Count and List of articles
     """
-    percentage = 0.75
-    matched_result = []
-    results = search_results_collection.find()
-    search_tags = set(tags)
-    async for result in results:
-        common = search_tags.intersection(result["tags"])
-        if len(common) > len(search_tags) * percentage:
-            matched_result.append(search_results_helper(result))
-    return matched_result
+    tags = list(set(tags))
+    filter_expression = {
+        **resolve_period_expression(period),
+        **resolve_tags_expression(tags)
+    }
+    results = search_results_collection.find(filter_expression).sort('date', -1).skip((page - 1) * limit).limit(limit)
+    count = await search_results_collection.count_documents(filter_expression)
+    return count, [search_results_helper(result) async for result in results]
+
 
+async def retrieve_newest_search_results(page: int, limit: int):
+    """Get the newest articles from database
 
-async def retrieve_newest_search_results():
-    """Get 20 newest articles from database"""
-    results = []
-    async for result in search_results_collection.find().sort('date', -1).limit(20):
-        results.append(search_results_helper(result))
-    return results
+    :param limit: Page size
+    :param page: Number of page
+    :return: Count and List of articles
+    """
+    results = search_results_collection.find().sort('date', -1).skip((page - 1) * limit).limit(limit)
+    count = await search_results_collection.count_documents({})
+    return count, [search_results_helper(result) async for result in results]
 
 
 async def update_content_of_article(id_: str, content: list[list]):
@@ -96,3 +104,41 @@ async def update_content_of_article(id_: str, content: list[list]):
     await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}})
     article = await search_results_collection.find_one({'_id': ObjectId(id_)})
     return search_results_helper(article)
+
+
+def resolve_period_expression(period: Period) -> dict:
+    """Create expression based on Period from query"""
+    if period is Period.last_week:
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=7)
+        end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
+        start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
+        return {'date': {'$gte': start_date, '$lt': end_date}}
+    if period is Period.last_month:
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=30)
+        end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
+        start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
+        return {'date': {'$gte': start_date, '$lt': end_date}}
+    return {}
+
+
+def resolve_tags_expression(tags: list[str]) -> dict:
+    """Create expression based on search tags"""
+    percentage = 0.75
+    return {
+        '$expr': {
+            '$function': {
+                'body': """
+                    function(search, document, percentage) {
+                        const searchTags = search;
+                        const documentTags = document;
+                        const intersection = documentTags.filter(tag => searchTags.includes(tag));
+                        return intersection.length >= (searchTags.length * percentage);
+                    }
+                    """,
+                'args': [tags, '$tags', percentage],
+                'lang': 'js'
+            }
+        }
+    }
diff --git a/server/models/search_result.py b/server/models/search_result.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from enum import Enum
 
 from pydantic import BaseModel, HttpUrl
 
@@ -13,24 +14,17 @@ class ArticleModel(BaseModel):
     description: str = ""
     tags: set[str] = set()
 
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "id": "657b4a8d9e6d5419e28aa3e1",
-                    "link": "https://www.laptopmag.com/best-picks/tips-to-improve-macbook-sound",
-                    "tags": ["acer", "aspire", "nvidia"],
-                    "image": "https://cdn.mos.cms.futurecdn.net/vzWy7ZzZy4rfZUESfUw4Lg.jpg",
-                    "title": "7 ways to improve sound on your MacBook",
-                    "author": "Alex Bracetti",
-                    "date": "2023-05-20T07:00:53Z",
-                    "description": "Unhappy with the MacBook’s sound quality? Here are some tips and tricks to enhance "
-                                   "the audio performance on your Apple laptop."
-                },
-            ]
-        }
-    }
+
+class SearchResponseModel(BaseModel):
+    count: int
+    results: list[ArticleModel]
 
 
 class ExtendArticleModel(ArticleModel):
     content: list[list] = []
+
+
+class Period(str, Enum):
+    last_week = "last-week"
+    last_month = "last-month"
+    all = "all"
diff --git a/server/routes/search_result.py b/server/routes/search_result.py
@@ -1,10 +1,11 @@
+from typing import Annotated
+
 from fastapi import APIRouter, status, HTTPException, Query
-from fastapi_pagination import Page, paginate
 
-from server.scraper import scrap_from_search, scrap_content
-from server.models.search_result import ArticleModel, ExtendArticleModel
+from server.scraper import scrap_content
+from server.models.search_result import SearchResponseModel, ExtendArticleModel, Period
 from server.database import (
-    add_search_results,
+    update_search_results,
     retrieve_search_result_by_id,
     retrieve_search_results_by_tags,
     retrieve_newest_search_results,
@@ -13,13 +14,13 @@
 
 
 router = APIRouter()
-Page = Page.with_custom_options(
-    size=Query(5, ge=1, le=10),
-)
 
 
-@router.get("/", status_code=status.HTTP_200_OK, response_model=Page[ArticleModel])
-async def get_search_results(find: str | None = None) -> Page[ArticleModel]:
+@router.get("/search", status_code=status.HTTP_200_OK, response_model=SearchResponseModel)
+async def get_search_results(find: Annotated[str | None, Query(description='Write search query here')] = None,
+                             page: Annotated[int, Query(ge=1)] = 1,
+                             limit: Annotated[int, Query(ge=1, le=10)] = 5,
+                             period: Period = Period.all):
     """Find articles by search query
 
     Get list of articles which match with search query from database.
@@ -28,27 +29,17 @@ async def get_search_results(find: str | None = None) -> Page[ArticleModel]:
     articles.
     """
     if find:
-        results = await retrieve_search_results_by_tags(find.split())
-        if len(results) < 10:
-            new_results = scrap_from_search(find)
-            new_results = await add_search_results(new_results)
-
-            # check for adding only unic
-            for new_one in new_results:
-                repeats = False
-                for old_one in results:
-                    if new_one['id'] == old_one['id']:
-                        repeats = True
-                        break
-                if not repeats:
-                    results.append(new_one)
-
-        return paginate(results)
-    return paginate(await retrieve_newest_search_results())
+        count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
+        if count < 5:
+            await update_search_results(find)
+            count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
+        return {'count': count, 'results': results}
+    count, results = await retrieve_newest_search_results(page, limit)
+    return {'count': count, 'results': results}
 
 
 @router.get("/{id}", status_code=status.HTTP_200_OK, response_model=ExtendArticleModel)
-async def get_article(id: str) -> ExtendArticleModel:
+async def get_article(id: str):
     """Get concrete article with content
 
     Find article by ID in database and if it exists, check if it