-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
100 lines (80 loc) · 3.42 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import streamlit as st
import easyocr
import numpy as np
from PIL import Image
from transformers import pipeline
from groq import Groq
from fuzzywuzzy import process
# Initialize OCR reader
# Perform OCR on image
def perform_ocr(image):
ocr_reader = easyocr.Reader(["en"])
result = ocr_reader.ocr(np.array(image))
ocr_texts = [line[1][0] for line in result]
return ocr_texts
def correct_text(ocr_texts):
corrected_text = []
known_terms = ['Tiger', 'Pepsi', 'Heineken', 'Larue','Bivina','Edelweiss','Bia Viet','Strongbow','Beer carton','Beer crate','Beer bottle','Beer can','Drinker','Promotion Girl','Seller','Buyer','Customer','Ice bucket', 'Ice box', 'Fridge', 'Signage', 'billboard', 'poster', 'standee', 'Tent card', 'display stand', 'tabletop', 'Parasol']
for text in ocr_texts:
match, score = process.extractOne(text, known_terms)
if score > 50:
corrected_text.append(match)
else:
corrected_text.append(text)
return corrected_text
# Get image description using image captioning model
def get_image_caption(image):
caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
return caption_pipeline(image)[0]['generated_text']
# Analyze image information using Groq API
def analyze_image_information(image_description, ocr_results):
prompt = f"""
Analyze the following image information and provide insights based on the criteria given below:
Image Description:
{image_description}
OCR Results:
{ocr_results}
Criteria:
1. Brand Logos: Identify any brand logos mentioned in the description or OCR results.
2. Products: Mention any products such as beer kegs and bottles in the description or OCR results.
3. Customers: Describe the number of customers, their activities, and emotions.
4. Promotional Materials: Identify any posters, banners, and billboards.
5. Setup Context: Determine the scene context (e.g., bar, restaurant, grocery store, or supermarket).
Insights:
Summarize all of criteria and give context
"""
# Replace with your Groq API key
client = Groq(api_key="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")
data = {
"model": "llama3-8b-8192",
"messages": [{"role": "user", "content": prompt}]
}
chat_completion = client.chat.completions.create(**data)
return chat_completion.choices[0].message.content
# Streamlit app
st.set_page_config(layout="wide")
st.title("Image Analysis App")
# Create three columns with custom widths
col1, col2, col3 = st.columns([1, 2, 2])
with col1:
st.header("Upload Image")
uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
with col2:
st.header("OCR and Description")
if uploaded_file is not None:
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption='Uploaded Image', use_column_width=True)
st.subheader("Image Description")
image_description = get_image_caption(image)
st.write(image_description)
ocr_texts = perform_ocr(image)
ocr_texts = correct_text(ocr_texts)
st.subheader("OCR Texts")
for text in ocr_texts:
st.write(text)
with col3:
st.header("Analysis")
if uploaded_file is not None:
ocr_results = ' '.join(ocr_texts)
analysis = analyze_image_information(image_description, ocr_results)
st.write(analysis)