diff --git a/relevance-scoring/app.py b/relevance-scoring/app.py new file mode 100644 index 00000000..db2922e8 --- /dev/null +++ b/relevance-scoring/app.py @@ -0,0 +1,49 @@ +from metadata.image_processor import generate_image_description +from metadata.link_processor import extract_text_from_url +from embeddings.embedding_generator import generate_embedding, calculate_similarity +from scoring.relevance_scorer import calculate_relevance_score +from transformers import pipeline +import numpy as np + +def process_comment(comment, image_urls=None, links=None): + text_embedding = generate_embedding(comment) + + # Initialize the caption generator + caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") + + # Process images + image_scores = [] + if image_urls: + for url in image_urls: + metadata = generate_image_description(url, caption_generator) + if metadata and metadata['description']: + image_emb = generate_embedding(metadata['description']) + image_scores.append(calculate_similarity(text_embedding, image_emb)) + + # Process links + link_scores = [] + if links: + for link in links: + metadata = extract_text_from_url(link) + if metadata and metadata['content']: + link_emb = generate_embedding(metadata['content']) + link_scores.append(calculate_similarity(text_embedding, link_emb)) + + # Combine scores + image_embedding = max(image_scores, default=None) + link_embedding = max(link_scores, default=None) + + final_score = calculate_relevance_score( + text_embedding, + image_embedding=image_embedding if image_embedding is not None else np.zeros_like(text_embedding), + link_embedding=link_embedding if link_embedding is not None else np.zeros_like(text_embedding) + ) + return final_score + +if __name__ == "__main__": + comment = "This comment addresses layout issues in the UI." + image_urls = ["https://www.bing.com/th?id=OADD2.7490516793165_1K4Y6UMUPT5JEHB4D8&pid=21.2&c=16&roil=0&roit=0.033&roir=1&roib=0.8186&w=300&h=157&dynsize=1&qlt=90"] + links = ["https://example.com/sample-page"] + + score = process_comment(comment, image_urls, links) + print(f"Final Relevance Score: {score}") \ No newline at end of file diff --git a/relevance-scoring/metadata/image_processor.py b/relevance-scoring/metadata/image_processor.py index f48e97a5..aec74d60 100644 --- a/relevance-scoring/metadata/image_processor.py +++ b/relevance-scoring/metadata/image_processor.py @@ -48,6 +48,7 @@ def generate_image_description(image_url, caption_generator): # Sample image URL image_url = "https://th.bing.com/th?id=ORMS.99706f16f78dd7e84c31c95eef897656&pid=Wdp&w=268&h=140&qlt=90&c=1&rs=1&dpr=1.5&p=0" + # Generate and print metadata metadata = generate_image_description(image_url, caption_generator) print(metadata) \ No newline at end of file diff --git a/relevance-scoring/scoring/relevance_scorer.py b/relevance-scoring/scoring/relevance_scorer.py new file mode 100644 index 00000000..98430bee --- /dev/null +++ b/relevance-scoring/scoring/relevance_scorer.py @@ -0,0 +1,32 @@ +from sklearn.metrics.pairwise import cosine_similarity +import numpy as np +import logging + +logging.basicConfig(level=logging.INFO) + +def calculate_similarity(embedding1, embedding2): + embedding1 = np.array(embedding1).reshape(1, -1) + embedding2 = np.array(embedding2).reshape(1, -1) + logging.info(f"Embedding1 shape: {embedding1.shape}") + logging.info(f"Embedding2 shape: {embedding2.shape}") + return cosine_similarity(embedding1, embedding2)[0][0] + +def calculate_relevance_score(text_embedding, image_embedding=None, link_embedding=None): + score = 0 + if image_embedding is not None: + score += calculate_similarity(text_embedding, image_embedding) * 0.5 # Weight: 50% + if link_embedding is not None: + score += calculate_similarity(text_embedding, link_embedding) * 0.5 # Weight: 50% + return score + +if __name__ == "__main__": + # Example usage + def generate_embedding(text): + # Placeholder function for generating embeddings + # Replace this with your actual embedding generation logic + return np.random.rand(768) + + text_emb = generate_embedding("a UI with layout issues") + img_emb = generate_embedding("a UI with red lines showing a spacing issue") + relevance_score = calculate_relevance_score(text_emb, image_embedding=img_emb) + print(f"Relevance Score: {relevance_score:.2f}") \ No newline at end of file