Skip to content

Commit

Permalink
added period for filtering search results
Browse files Browse the repository at this point in the history
  • Loading branch information
LLkaia committed Dec 23, 2023
1 parent a7855fb commit 1bd4a03
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 25 deletions.
65 changes: 47 additions & 18 deletions server/database.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from datetime import datetime, timedelta

import motor.motor_asyncio
from bson import ObjectId
from bson.errors import InvalidId

from server.scraper import scrap_from_search
from server.models.search_result import Period


MONGO_DETAILS = 'mongodb://localhost:27017'
Expand Down Expand Up @@ -56,37 +59,25 @@ async def retrieve_search_result_by_id(id_: str):
return


async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int):
async def retrieve_search_results_by_tags(tags: list[str], page: int, limit: int, period: Period):
"""Find articles by tags
Take search words and check if database contain articles,
which have more than :percentage: of words in 'tags' fields matches
which have more than 'percentage' of words in 'tags' fields matches
with words in search query. If database have them, return
paginated articles and total amount of them.
:param limit: Page size
:param page: Number of page
:param tags: List of search words
:param period: Filtering period
:return: Count and List of articles
"""
percentage = 0.75
tags = list(set(tags))
filter_expression = {
'$expr': {
'$function': {
'body': """
function(search, document, percentage) {
const searchTags = search;
const documentTags = document;
const intersection = documentTags.filter(tag => searchTags.includes(tag));
return intersection.length >= (searchTags.length * percentage);
}
""",
'args': [tags, '$tags', percentage],
'lang': 'js'
}
}
**resolve_period_expression(period),
**resolve_tags_expression(tags)
}
results = search_results_collection.find(filter_expression).skip((page - 1) * limit).limit(limit)
results = search_results_collection.find(filter_expression).sort('date', -1).skip((page - 1) * limit).limit(limit)
count = await search_results_collection.count_documents(filter_expression)
return count, [search_results_helper(result) async for result in results]

Expand All @@ -113,3 +104,41 @@ async def update_content_of_article(id_: str, content: list[list]):
await search_results_collection.update_one({'_id': ObjectId(id_)}, {"$set": {"content": content}})
article = await search_results_collection.find_one({'_id': ObjectId(id_)})
return search_results_helper(article)


def resolve_period_expression(period: Period) -> dict:
"""Create expression based on Period from query"""
if period is Period.last_week:
end_date = datetime.now()
start_date = end_date - timedelta(days=7)
end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
return {'date': {'$gte': start_date, '$lt': end_date}}
if period is Period.last_month:
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
return {'date': {'$gte': start_date, '$lt': end_date}}
return {}


def resolve_tags_expression(tags: list[str]) -> dict:
"""Create expression based on search tags"""
percentage = 0.75
return {
'$expr': {
'$function': {
'body': """
function(search, document, percentage) {
const searchTags = search;
const documentTags = document;
const intersection = documentTags.filter(tag => searchTags.includes(tag));
return intersection.length >= (searchTags.length * percentage);
}
""",
'args': [tags, '$tags', percentage],
'lang': 'js'
}
}
}
7 changes: 7 additions & 0 deletions server/models/search_result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from enum import Enum

from pydantic import BaseModel, HttpUrl

Expand All @@ -21,3 +22,9 @@ class SearchResponseModel(BaseModel):

class ExtendArticleModel(ArticleModel):
content: list[list] = []


class Period(str, Enum):
last_week = "last-week"
last_month = "last-month"
all = "all"
15 changes: 10 additions & 5 deletions server/routes/search_result.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from fastapi import APIRouter, status, HTTPException
from typing import Annotated

from fastapi import APIRouter, status, HTTPException, Query

from server.scraper import scrap_content
from server.models.search_result import SearchResponseModel, ExtendArticleModel
from server.models.search_result import SearchResponseModel, ExtendArticleModel, Period
from server.database import (
update_search_results,
retrieve_search_result_by_id,
Expand All @@ -15,7 +17,10 @@


@router.get("/search", status_code=status.HTTP_200_OK, response_model=SearchResponseModel)
async def get_search_results(find: str | None = None, page: int = 1, limit: int = 5):
async def get_search_results(find: Annotated[str | None, Query(description='Write search query here')] = None,
page: Annotated[int, Query(ge=1)] = 1,
limit: Annotated[int, Query(ge=1, le=10)] = 5,
period: Period = Period.all):
"""Find articles by search query
Get list of articles which match with search query from database.
Expand All @@ -24,10 +29,10 @@ async def get_search_results(find: str | None = None, page: int = 1, limit: int
articles.
"""
if find:
count, results = await retrieve_search_results_by_tags(find.split(), page, limit)
count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
if count < 5:
await update_search_results(find)
count, results = await retrieve_search_results_by_tags(find.split(), page, limit)
count, results = await retrieve_search_results_by_tags(find.split(), page, limit, period)
return {'count': count, 'results': results}
count, results = await retrieve_newest_search_results(page, limit)
return {'count': count, 'results': results}
Expand Down
2 changes: 0 additions & 2 deletions server/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,3 @@ def scrap_content(link: str) -> list[list]:
image = block.find('img').get('data-pin-media')
content.append(('image', image))
return content


0 comments on commit 1bd4a03

Please sign in to comment.