Skip to content

Commit

Permalink
add phase 4 edited files
Browse files Browse the repository at this point in the history
  • Loading branch information
taranehkhosrojerdi authored Jul 1, 2024
1 parent 63fa434 commit 9c8fc8c
Show file tree
Hide file tree
Showing 2 changed files with 350 additions and 0 deletions.
219 changes: 219 additions & 0 deletions bonus_phase/main.py
Original file line number Diff line number Diff line change
@@ -1 +1,220 @@
import streamlit as st
import sys
sys.path.append(r"D:\University\MIR_project_Spring2024-Phase-1.2")
import os
from Logic import utils
import time
from enum import Enum
import random
from Logic.core.snippet import Snippet
from Logic.core.preprocess import Preprocessor

snippet_obj = Snippet(
number_of_words_on_each_side=5
)

class color(Enum):
RED = "#00BFFF" # Light blue
GREEN = "#00CED1" # Cyan
BLUE = "#1E90FF" # Light blue
YELLOW = "#00FFFF" # Cyan
PURPLE = "#ADD8E6" # Light blue
ORANGE = "#87CEEB" # Light blue
CYAN = "#F0FFFF" # Light cyan
MAGENTA = "#E0FFFF" # Light cyan

def get_summary_with_snippet(movie_info, query):
summary = movie_info["first_page_summary"]
snippet, not_exist_words = snippet_obj.find_snippet(summary, query)
if "***" in snippet:
snippet = snippet.split()
for i in range(len(snippet)):
current_word = snippet[i]
if current_word.startswith("***") and current_word.endswith("***"):
current_word_without_star = current_word[3:-3]
summary = summary.lower().replace(
current_word_without_star,
f"<b><font size='4' color={random.choice(list(color)).value}>{current_word_without_star}</font></b>",
)
return summary

def search_time(start, end):
st.success("Search took: {:.6f} milli-seconds".format((end - start) * 1e3))

def toggle_star_state(movie_id):
# session_state = st.session_state
# if 'star_states' not in session_state:
# session_state.star_states = {}

# if movie_id in session_state.star_states:
# session_state.star_states[movie_id] = not session_state.star_states[movie_id]
# else:
# session_state.star_states[movie_id] = True
pass

def search_handling(
search_button,
search_term,
search_max_num,
search_weights,
search_method,
):
if search_button:
spell_correction_dataset = [summary for movie in utils.movies_dataset for summary in movie["summaries"]]

spell_correction_dataset = Preprocessor(spell_correction_dataset).preprocess()
corrected_query = utils.correct_text(search_term, spell_correction_dataset)

if corrected_query != search_term:
st.warning(f"Your search terms were corrected to: {corrected_query}")
search_term = corrected_query

with st.spinner("Searching..."):
time.sleep(0.5)
start_time = time.time()
result = utils.search(
search_term,
search_max_num,
search_method,
search_weights,
)
print(f"Result: {result}")
end_time = time.time()
if len(result) == 0:
st.warning("No results found!")
return

search_time(start_time, end_time)

for i in range(len(result)):
card = st.columns([3, 1])
info = utils.get_movie_by_id(result[i][0], utils.movies_dataset)
movie_id = result[i][0]
relevance_score = result[i][1]

with card[0].container():
title_string = f"[{info['title']}]({info['URL']}) - {info['score']}"
st.title(title_string)

if relevance_score > 0:
st.markdown("<span style='font-size: 20px; color: yellow;'>⭐</span>", unsafe_allow_html=True)

st.markdown(
f"<b><font size = '4'>Summary:</font></b> {get_summary_with_snippet(info, search_term)}",
unsafe_allow_html=True,
)

with st.container():
with st.expander("Details"):
st.write(f"Relevance Score: {result[i][1]}")
st.markdown("**Directors:**")
if info["directors"] is not None:
for director in info["directors"]:
st.text(director)

st.markdown("**Stars:**")
stars = ", ".join(info["stars"])
st.text(stars)

st.markdown("**Genres:**")
genre_colors = iter([color.RED.value, color.GREEN.value, color.BLUE.value, color.YELLOW.value, color.PURPLE.value, color.ORANGE.value, color.CYAN.value, color.MAGENTA.value])
for genre in info["genres"]:
genre_color = next(genre_colors)
st.markdown(
f"<span style='color:{genre_color}'>{genre}</span>",
unsafe_allow_html=True,
)
with card[1].container():
st.image(info["Image_URL"], use_column_width=True)

st.divider()

def main():
st.title("IMDB Movie Search Engine")
st.write(
"Search through IMDB dataset and find the most relevant movies to your search terms."
)
st.markdown(
'<span style="color:yellow">Developed By: MIR Team at Sharif University</span>',
unsafe_allow_html=True,
)

search_term = st.text_input("Search Term", help="Enter the term you want to search for.")

with st.sidebar:
st.header("Advanced Search")
search_max_num = st.number_input(
"Maximum number of results", min_value=5, max_value=100, value=10, step=5, help="Set the maximum number of search results."
)
weight_stars = st.slider(
"Weight of stars in search",
min_value=0.0,
max_value=1.0,
value=1.0,
step=0.1,
help="Adjust the weight given to stars in the search results."
)

weight_genres = st.slider(
"Weight of genres in search",
min_value=0.0,
max_value=1.0,
value=1.0,
step=0.1,
help="Adjust the weight given to genres in the search results."
)

weight_summary = st.slider(
"Weight of summary in search",
min_value=0.0,
max_value=1.0,
value=1.0,
step=0.1,
help="Adjust the weight given to the summary in the search results."
)

search_weights = [weight_stars, weight_genres, weight_summary]
search_method = st.selectbox(
"Search method",
("ltn.lnn", "ltc.lnc", "OkapiBM25"),
help="Choose the search method."
)

search_button = st.button("Search", key="search_button")

search_handling(
search_button,
search_term,
search_max_num,
search_weights,
search_method,
)

# Custom CSS for search button
custom_css = """
<style>
.stButton button {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
text-align: center;
text-decoration: none;
display: inline-block;
font-size: 16px;
margin: 4px 2px;
cursor: pointer;
border-radius: 8px;
border: none;
transition-duration: 0.4s;
}
.stButton button:hover {
background-color: #3e8e41;
color: white;
}
</style>
"""
st.markdown(custom_css, unsafe_allow_html=True)

if __name__ == "__main__":
main()
131 changes: 131 additions & 0 deletions bonus_phase/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from typing import Dict, List
from .core.search import SearchEngine
from .core.spell_correction import SpellCorrection
from .core.preprocess import Preprocessor
from .core.snippet import Snippet
from .core.indexer.indexes_enum import Indexes, Index_types
import json

movies_dataset = json.load(open("../Logic/tests/IMDB_crawled.json", "r")) # TODO
search_engine = SearchEngine()


def correct_text(text: str, all_documents: List[str]) -> str:
"""
Correct the give query text, if it is misspelled using Jacard similarity
Paramters
---------
text: str
The query text
all_documents : list of str
The input documents.
Returns
str
The corrected form of the given text
"""
# TODO: You can add any preprocessing steps here, if needed!
text = Preprocessor([text]).preprocess()[0]
# TODO: uncomment for spell correction
# spell_correction_obj = SpellCorrection(all_documents)
# new_text = ""
# for word in text.split():
# new_text += spell_correction_obj.spell_check(word) + " "
# text = new_text
return text


def search(
query: str,
max_result_count: int,
method: str = "ltn-lnn",
weights: list = [0.3, 0.3, 0.4],
should_print=False,
preferred_genre: str = None,
):
"""
Finds relevant documents to query
Parameters
---------------------------------------------------------------------------------------------------
max_result_count: Return top 'max_result_count' docs which have the highest scores.
notice that if max_result_count = -1, then you have to return all docs
mode: 'detailed' for searching in title and text separately.
'overall' for all words, and weighted by where the word appears on.
where: when mode ='detailed', when we want search query
in title or text not both of them at the same time.
method: 'ltn.lnn' or 'ltc.lnc' or 'OkapiBM25'
preferred_genre: A list containing preference rates for each genre. If None, the preference rates are equal.
Returns
----------------------------------------------------------------------------------------------------
list
Retrieved documents with snippet
"""
dict_weights = {'stars': weights[0], 'genres': weights[1], 'summaries': weights[2]} # TODO
return search_engine.search(
query, method, dict_weights, max_results=max_result_count, safe_ranking=True
)

import os

def get_movie_by_id(id: str, movies_dataset: List[Dict[str, str]]) -> Dict[str, str]:
"""
Get movie by its id
Parameters
---------------------------------------------------------------------------------------------------
id: str
The id of the movie
movies_dataset: List[Dict[str, str]]
The dataset of movies
Returns
----------------------------------------------------------------------------------------------------
dict
The movie with the given id
"""
# TODO: self-added code
for movie in movies_dataset:
if movie["id"] == id:
result = movie
break

# result = movies_dataset.get(
# id,
# {
# "Title": "This is movie's title",
# "Summary": "This is a summary",
# "URL": "https://www.imdb.com/title/tt0111161/",
# "Cast": ["Morgan Freeman", "Tim Robbins"],
# "Genres": ["Drama", "Crime"],
# "Image_URL": "https://m.media-amazon.com/images/M/MV5BNDE3ODcxYzMtY2YzZC00NmNlLWJiNDMtZDViZWM2MzIxZDYwXkEyXkFqcGdeQXVyNjAwNDUxODI@._V1_.jpg",
# },
# )

import requests


url = "http://www.omdbapi.com/"
params = {
"i": result['id'],
"apikey": "6ee1c49f"
}

response = requests.get(url, params=params)

if response.status_code == 200:
movie_data = response.json()
result["Image_URL"] = movie_data["Poster"]
result["score"] = movie_data["Ratings"][0]["Value"]

result["URL"] = (
f"https://www.imdb.com/title/{result['id']}" # The url pattern of IMDb movies
)
return result

0 comments on commit 9c8fc8c

Please sign in to comment.