generated from ubiquity/ts-template
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9d2a04e
commit 693d333
Showing
3 changed files
with
82 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from metadata.image_processor import generate_image_description | ||
from metadata.link_processor import extract_text_from_url | ||
from embeddings.embedding_generator import generate_embedding, calculate_similarity | ||
from scoring.relevance_scorer import calculate_relevance_score | ||
from transformers import pipeline | ||
import numpy as np | ||
|
||
def process_comment(comment, image_urls=None, links=None): | ||
text_embedding = generate_embedding(comment) | ||
|
||
# Initialize the caption generator | ||
caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | ||
|
||
# Process images | ||
image_scores = [] | ||
if image_urls: | ||
for url in image_urls: | ||
metadata = generate_image_description(url, caption_generator) | ||
if metadata and metadata['description']: | ||
image_emb = generate_embedding(metadata['description']) | ||
image_scores.append(calculate_similarity(text_embedding, image_emb)) | ||
|
||
# Process links | ||
link_scores = [] | ||
if links: | ||
for link in links: | ||
metadata = extract_text_from_url(link) | ||
if metadata and metadata['content']: | ||
link_emb = generate_embedding(metadata['content']) | ||
link_scores.append(calculate_similarity(text_embedding, link_emb)) | ||
|
||
# Combine scores | ||
image_embedding = max(image_scores, default=None) | ||
link_embedding = max(link_scores, default=None) | ||
|
||
final_score = calculate_relevance_score( | ||
text_embedding, | ||
image_embedding=image_embedding if image_embedding is not None else np.zeros_like(text_embedding), | ||
link_embedding=link_embedding if link_embedding is not None else np.zeros_like(text_embedding) | ||
) | ||
return final_score | ||
|
||
if __name__ == "__main__": | ||
comment = "This comment addresses layout issues in the UI." | ||
image_urls = ["https://www.bing.com/th?id=OADD2.7490516793165_1K4Y6UMUPT5JEHB4D8&pid=21.2&c=16&roil=0&roit=0.033&roir=1&roib=0.8186&w=300&h=157&dynsize=1&qlt=90"] | ||
links = ["https://example.com/sample-page"] | ||
|
||
score = process_comment(comment, image_urls, links) | ||
print(f"Final Relevance Score: {score}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from sklearn.metrics.pairwise import cosine_similarity | ||
import numpy as np | ||
import logging | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
|
||
def calculate_similarity(embedding1, embedding2): | ||
embedding1 = np.array(embedding1).reshape(1, -1) | ||
embedding2 = np.array(embedding2).reshape(1, -1) | ||
logging.info(f"Embedding1 shape: {embedding1.shape}") | ||
logging.info(f"Embedding2 shape: {embedding2.shape}") | ||
return cosine_similarity(embedding1, embedding2)[0][0] | ||
|
||
def calculate_relevance_score(text_embedding, image_embedding=None, link_embedding=None): | ||
score = 0 | ||
if image_embedding is not None: | ||
score += calculate_similarity(text_embedding, image_embedding) * 0.5 # Weight: 50% | ||
if link_embedding is not None: | ||
score += calculate_similarity(text_embedding, link_embedding) * 0.5 # Weight: 50% | ||
return score | ||
|
||
if __name__ == "__main__": | ||
# Example usage | ||
def generate_embedding(text): | ||
# Placeholder function for generating embeddings | ||
# Replace this with your actual embedding generation logic | ||
return np.random.rand(768) | ||
|
||
text_emb = generate_embedding("a UI with layout issues") | ||
img_emb = generate_embedding("a UI with red lines showing a spacing issue") | ||
relevance_score = calculate_relevance_score(text_emb, image_embedding=img_emb) | ||
print(f"Relevance Score: {relevance_score:.2f}") |