-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
221 lines (184 loc) · 7.05 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
########
# Document-Image Matching System Implementation
#######
# Developed by:
# Partha Pratim Ray
# http://github.com/ParthaPRay
# Commercial Licencse [Do NOT use the code without written permission for any type of work such as research or duplication or anything similar]
# Contact parthapratimray1986@gmail.com
# 2024
##########
# Version 1
#######################################################
### Used opanAI and open library APIs
# Create a .env file in same directory of this code and save youw own openai API key
"""
OPENAI_API_KEY=your_secret_api_key_here
"""
import os
import base64
import requests
import pandas as pd
import gradio as gr
import asyncio
from dotenv import load_dotenv
from openai import AsyncOpenAI # Updated library
# Load environment variables from .env file
load_dotenv()
# Get the OpenAI API key from the environment
api_key = os.getenv("OPENAI_API_KEY")
# Create a global AsyncOpenAI client
client = AsyncOpenAI(api_key=api_key)
####### For low res mode, openai expects a 512px x 512px image.
####### For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px.
############################
# 1) Async Vision + Summaries
############################
async def analyze_image_and_extract_keywords(image_base64: str) -> str:
"""
1) Sends base64-encoded image to an OpenAI vision model to get a textual description.
2) Summarizes that textual description into 1 keyword.
Returns a string of those keywords.
"""
# STEP A: Send image to an OpenAI vision-capable model for description
try:
vision_response = await client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"Describe the main objects, concepts, or actions in this image."
),
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
},
},
],
}
],
max_tokens=150,
temperature=0.7,
)
description_text = vision_response.choices[0].message.content.strip()
except Exception as e:
return f"Error analyzing image: {e}"
# STEP B: Summarize that textual description into 1 keyword
try:
summarization_response = await client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": "You are an assistant who extracts concise keywords.",
},
{
"role": "user",
"content": (
f"Original description:\n\n{description_text}\n\n"
"Extract the top 1 keyword or named entity from this text."
),
},
],
max_tokens=50,
temperature=0.0,
)
summary_keywords = summarization_response.choices[0].message.content.strip()
except Exception as e:
summary_keywords = f"Error extracting keywords: {e}"
return summary_keywords
############################
# 2) Open Library Fetch
############################
def fetch_books(query, limit=5):
"""
Fetches books from the Open Library Search API using a general ('q') query.
Returns a DataFrame with relevant book details.
"""
base_url = "https://openlibrary.org/search.json"
params = {"q": query, "limit": limit}
try:
response = requests.get(base_url, params=params)
response.raise_for_status()
data = response.json()
books = []
for doc in data.get("docs", []):
book = {
"Title": doc.get("title", "N/A"),
"Author": ", ".join(doc.get("author_name", ["Unknown"])),
"First Publish Year": doc.get("first_publish_year", "N/A"),
"Edition Count": doc.get("edition_count", "N/A"),
"Open Library ID": doc.get("key", "N/A"),
}
books.append(book)
return pd.DataFrame(books)
except requests.RequestException as e:
return pd.DataFrame([{"Error": f"Error fetching data: {e}"}])
############################
# 3) Async Pipeline
############################
async def async_process_image(image):
"""
Async pipeline that:
1) Converts the uploaded image to base64
2) Sends it to the OpenAI Vision + Summaries to get keywords
3) Uses those keywords to query Open Library
4) Returns (text_summary, dataframe)
"""
if image is None:
return "No image provided", None
# Convert image to base64
with open(image, "rb") as f:
base64_image = base64.b64encode(f.read()).decode("utf-8")
# 1) Analyze + Summarize
keywords = await analyze_image_and_extract_keywords(base64_image)
# 2) Query books
books_df = fetch_books(keywords, limit=5)
if books_df.empty:
return f"**Extracted Keywords**: {keywords}\n\nNo books found.", None
# 3) Format results
books_str = books_df.to_string(index=False)
output_text = (
f"**Extracted Keywords**: {keywords}\n\n"
f"**Top Books**:\n{books_str}"
)
return output_text, books_df
def process_image(image):
"""
This is the synchronous function Gradio calls.
Under the hood, it runs the async pipeline via asyncio.run().
"""
return asyncio.run(async_process_image(image))
############################
# 4) Gradio Interface
############################
with gr.Blocks() as demo:
gr.Markdown("""
## Document-Image Matching System Implementation
**Image → Keywords → Books (Async OpenAI Vision + Text API and Open Library API)**
**Developed by Partha Pratim Ray**
**Contact: parthapratimray1986@gmail.com, Copyright 2024**
**Commercial License**
This application uses an AI-powered pipeline to analyze images, extract keywords, and fetch relevant books. Interested persons/entities must take permission before using any part of this code into reserach, study, learning or commerical purposes. Use of this code without written permission is legally punishable.
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="filepath", label="Upload an image")
submit_button = gr.Button("Analyze & Find Books")
with gr.Column():
output_text = gr.Markdown()
output_table = gr.Dataframe()
# Connect the button click to the synchronous wrapper function
submit_button.click(
fn=process_image,
inputs=[image_input],
outputs=[output_text, output_table],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")