Skip to content

Commit

Permalink
Merge pull request #5 from LLkaia/features/expr
Browse files Browse the repository at this point in the history
Pagination, search and filtering
  • Loading branch information
LLkaia authored Jan 3, 2024
2 parents f0c320c + 96269f9 commit 55f94d9
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 77 deletions.
9 changes: 9 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM python:3.11

WORKDIR /usr/local/etc/lappy/
COPY ./requirements.txt .
RUN pip install -r requirements.txt
COPY ./server ./server
COPY ./main.py .

CMD python3 main.py
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ This is a microservice designed to fetch and serve news articles using FastAPI,
```
http://localhost:8000/news/search?find=acer+aspire+7+review
```
- `/news/search/{id}`: Show concrete article.
- `/news/{id}`: Show concrete article.

- Example:

```
http://localhost:8000/news/search/657c1690f253079b6f3ed074
http://localhost:8000/news/657c1690f253079b6f3ed074
```
4 changes: 1 addition & 3 deletions server/app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from fastapi import FastAPI
from fastapi_pagination import add_pagination

from server.routes.search_result import router as SearchResultRouter


app = FastAPI()
add_pagination(app)
app.include_router(SearchResultRouter, tags=["Search"], prefix="/news/search")
app.include_router(SearchResultRouter, tags=["News"], prefix="/news")


@app.get('/', tags=['Root'])
Expand Down
102 changes: 74 additions & 28 deletions server/database.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from datetime import datetime, timedelta

import motor.motor_asyncio
from bson import ObjectId
from bson.errors import InvalidId

from server.scraper import scrap_from_search
from server.models.search_result import Period


MONGO_DETAILS = 'mongodb://localhost:27017'
MONGO_DETAILS = 'mongodb://mongodb:27017'
client = motor.motor_asyncio.AsyncIOMotorClient(MONGO_DETAILS)
db = client.news
db = client.lappy

search_results_collection = db.get_collection('search_results')

Expand All @@ -25,26 +30,23 @@ def search_results_helper(search_result):
}


async def add_search_results(results: list[dict]):
async def update_search_results(search: str):
"""Add articles to database
Check if each article does not exist in database. If it does,
add search words to article's 'tags' field. Else, article will
be added to a database.
:param results: List of new articles
:param search: Search query
:return: List of articles added to a database
"""
new_results = []
results = scrap_from_search(search)
for result in results:
if await search_results_collection.find_one({"link": result['link']}):
new_result = await search_results_collection.find_one({"link": result['link']})
new_result["tags"] = list(set(new_result["tags"] + result['tags']))
await search_results_collection.update_one({"_id": ObjectId(new_result["_id"])}, {"$set": new_result})
else:
result = await search_results_collection.insert_one(result)
new_result = await search_results_collection.find_one({"_id": result.inserted_id})
new_results.append(search_results_helper(new_result))
return new_results
await search_results_collection.insert_one(result)


async def retrieve_search_result_by_id(id_: str):
Expand All @@ -57,33 +59,39 @@ async def retrieve_search_result_by_id(id_: str):
return


async def retrieve_search_results_by_tags(tags: list[str]):
async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int, period: Period):
"""Find articles by tags
Take search words and check if database contain articles,
which have more than :percentage: of words in 'tags' fields matches
which have more than 'percentage' of words in 'tags' fields matches
with words in search query. If database have them, return
this articles.
paginated articles and total amount of them.
:param limit: Page size
:param page: Number of page
:param tags: List of search words
:return: List of articles
:param period: Filtering period
:return: Count and List of articles
"""
percentage = 0.75
matched_result = []
results = search_results_collection.find()
search_tags = set(tags)
async for result in results:
common = search_tags.intersection(result["tags"])
if len(common) > len(search_tags) * percentage:
matched_result.append(search_results_helper(result))
return matched_result
tags = list(set(tags))
filter_expression = {
**resolve_period_expression(period),
**resolve_tags_expression(tags)
}
results = search_results_collection.find(filter_expression).sort('date', -1).skip((page - 1) * limit).limit(limit)
count = await search_results_collection.count_documents(filter_expression)
return count, [search_results_helper(result) async for result in results]


async def retrieve_newest_search_results(page: int, limit: int):
"""Get the newest articles from database
async def retrieve_newest_search_results():
"""Get 20 newest articles from database"""
results = []
async for result in search_results_collection.find().sort('date', -1).limit(20):
results.append(search_results_helper(result))
return results
:param limit: Page size
:param page: Number of page
:return: Count and List of articles
"""
results = search_results_collection.find().sort('date', -1).skip((page - 1) * limit).limit(limit)
count = await search_results_collection.count_documents({})
return count, [search_results_helper(result) async for result in results]


async def update_content_of_article(id_: str, content: list[list]):
Expand All @@ -96,3 +104,41 @@ async def update_content_of_article(id_: str, content: list[list]):
await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}})
article = await search_results_collection.find_one({'_id': ObjectId(id_)})
return search_results_helper(article)


def resolve_period_expression(period: Period) -> dict:
"""Create expression based on Period from query"""
if period is Period.last_week:
end_date = datetime.now()
start_date = end_date - timedelta(days=7)
end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
return {'date': {'$gte': start_date, '$lt': end_date}}
if period is Period.last_month:
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
return {'date': {'$gte': start_date, '$lt': end_date}}
return {}


def resolve_tags_expression(tags: list[str]) -> dict:
"""Create expression based on search tags"""
percentage = 0.75
return {
'$expr': {
'$function': {
'body': """
function(search, document, percentage) {
const searchTags = search;
const documentTags = document;
const intersection = documentTags.filter(tag => searchTags.includes(tag));
return intersection.length >= (searchTags.length * percentage);
}
""",
'args': [tags, '$tags', percentage],
'lang': 'js'
}
}
}
28 changes: 11 additions & 17 deletions server/models/search_result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from enum import Enum

from pydantic import BaseModel, HttpUrl

Expand All @@ -13,24 +14,17 @@ class ArticleModel(BaseModel):
description: str = ""
tags: set[str] = set()

model_config = {
"json_schema_extra": {
"examples": [
{
"id": "657b4a8d9e6d5419e28aa3e1",
"link": "https://www.laptopmag.com/best-picks/tips-to-improve-macbook-sound",
"tags": ["acer", "aspire", "nvidia"],
"image": "https://cdn.mos.cms.futurecdn.net/vzWy7ZzZy4rfZUESfUw4Lg.jpg",
"title": "7 ways to improve sound on your MacBook",
"author": "Alex Bracetti",
"date": "2023-05-20T07:00:53Z",
"description": "Unhappy with the MacBook’s sound quality? Here are some tips and tricks to enhance "
"the audio performance on your Apple laptop."
},
]
}
}

class SearchResponseModel(BaseModel):
count: int
results: list[ArticleModel]


class ExtendArticleModel(ArticleModel):
content: list[list] = []


class Period(str, Enum):
last_week = "last-week"
last_month = "last-month"
all = "all"
45 changes: 18 additions & 27 deletions server/routes/search_result.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Annotated

from fastapi import APIRouter, status, HTTPException, Query
from fastapi_pagination import Page, paginate

from server.scraper import scrap_from_search, scrap_content
from server.models.search_result import ArticleModel, ExtendArticleModel
from server.scraper import scrap_content
from server.models.search_result import SearchResponseModel, ExtendArticleModel, Period
from server.database import (
add_search_results,
update_search_results,
retrieve_search_result_by_id,
retrieve_search_results_by_tags,
retrieve_newest_search_results,
Expand All @@ -13,13 +14,13 @@


router = APIRouter()
Page = Page.with_custom_options(
size=Query(5, ge=1, le=10),
)


@router.get("/", status_code=status.HTTP_200_OK, response_model=Page[ArticleModel])
async def get_search_results(find: str | None = None) -> Page[ArticleModel]:
@router.get("/search", status_code=status.HTTP_200_OK, response_model=SearchResponseModel)
async def get_search_results(find: Annotated[str | None, Query(description='Write search query here')] = None,
page: Annotated[int, Query(ge=1)] = 1,
limit: Annotated[int, Query(ge=1, le=10)] = 5,
period: Period = Period.all):
"""Find articles by search query
Get list of articles which match with search query from database.
Expand All @@ -28,27 +29,17 @@ async def get_search_results(find: str | None = None) -> Page[ArticleModel]:
articles.
"""
if find:
results = await retrieve_search_results_by_tags(find.split())
if len(results) < 10:
new_results = scrap_from_search(find)
new_results = await add_search_results(new_results)

# check for adding only unic
for new_one in new_results:
repeats = False
for old_one in results:
if new_one['id'] == old_one['id']:
repeats = True
break
if not repeats:
results.append(new_one)

return paginate(results)
return paginate(await retrieve_newest_search_results())
count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
if count < 5:
await update_search_results(find)
count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
return {'count': count, 'results': results}
count, results = await retrieve_newest_search_results(page, limit)
return {'count': count, 'results': results}


@router.get("/{id}", status_code=status.HTTP_200_OK, response_model=ExtendArticleModel)
async def get_article(id: str) -> ExtendArticleModel:
async def get_article(id: str):
"""Get concrete article with content
Find article by ID in database and if it exists, check if it
Expand Down

0 comments on commit 55f94d9

Please sign in to comment.