Skip to content

Commit

Permalink
update changes
Browse files Browse the repository at this point in the history
  • Loading branch information
surafeldev committed Dec 14, 2024
1 parent 9d2a04e commit 693d333
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
49 changes: 49 additions & 0 deletions relevance-scoring/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from metadata.image_processor import generate_image_description
from metadata.link_processor import extract_text_from_url
from embeddings.embedding_generator import generate_embedding, calculate_similarity
from scoring.relevance_scorer import calculate_relevance_score
from transformers import pipeline
import numpy as np

def process_comment(comment, image_urls=None, links=None):
text_embedding = generate_embedding(comment)

# Initialize the caption generator
caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Process images
image_scores = []
if image_urls:
for url in image_urls:
metadata = generate_image_description(url, caption_generator)
if metadata and metadata['description']:
image_emb = generate_embedding(metadata['description'])
image_scores.append(calculate_similarity(text_embedding, image_emb))

# Process links
link_scores = []
if links:
for link in links:
metadata = extract_text_from_url(link)
if metadata and metadata['content']:
link_emb = generate_embedding(metadata['content'])
link_scores.append(calculate_similarity(text_embedding, link_emb))

# Combine scores
image_embedding = max(image_scores, default=None)
link_embedding = max(link_scores, default=None)

final_score = calculate_relevance_score(
text_embedding,
image_embedding=image_embedding if image_embedding is not None else np.zeros_like(text_embedding),
link_embedding=link_embedding if link_embedding is not None else np.zeros_like(text_embedding)
)
return final_score

if __name__ == "__main__":
comment = "This comment addresses layout issues in the UI."
image_urls = ["https://www.bing.com/th?id=OADD2.7490516793165_1K4Y6UMUPT5JEHB4D8&pid=21.2&c=16&roil=0&roit=0.033&roir=1&roib=0.8186&w=300&h=157&dynsize=1&qlt=90"]
links = ["https://example.com/sample-page"]

score = process_comment(comment, image_urls, links)
print(f"Final Relevance Score: {score}")
1 change: 1 addition & 0 deletions relevance-scoring/metadata/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def generate_image_description(image_url, caption_generator):

# Sample image URL
image_url = "https://th.bing.com/th?id=ORMS.99706f16f78dd7e84c31c95eef897656&pid=Wdp&w=268&h=140&qlt=90&c=1&rs=1&dpr=1.5&p=0"

# Generate and print metadata
metadata = generate_image_description(image_url, caption_generator)
print(metadata)
32 changes: 32 additions & 0 deletions relevance-scoring/scoring/relevance_scorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import logging

logging.basicConfig(level=logging.INFO)

def calculate_similarity(embedding1, embedding2):
embedding1 = np.array(embedding1).reshape(1, -1)
embedding2 = np.array(embedding2).reshape(1, -1)
logging.info(f"Embedding1 shape: {embedding1.shape}")
logging.info(f"Embedding2 shape: {embedding2.shape}")
return cosine_similarity(embedding1, embedding2)[0][0]

def calculate_relevance_score(text_embedding, image_embedding=None, link_embedding=None):
score = 0
if image_embedding is not None:
score += calculate_similarity(text_embedding, image_embedding) * 0.5 # Weight: 50%
if link_embedding is not None:
score += calculate_similarity(text_embedding, link_embedding) * 0.5 # Weight: 50%
return score

if __name__ == "__main__":
# Example usage
def generate_embedding(text):
# Placeholder function for generating embeddings
# Replace this with your actual embedding generation logic
return np.random.rand(768)

text_emb = generate_embedding("a UI with layout issues")
img_emb = generate_embedding("a UI with red lines showing a spacing issue")
relevance_score = calculate_relevance_score(text_emb, image_embedding=img_emb)
print(f"Relevance Score: {relevance_score:.2f}")

0 comments on commit 693d333

Please sign in to comment.