-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai_server.py
109 lines (79 loc) · 2.81 KB
/
ai_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Server to handle AI API requests
from flask import Flask, request, jsonify
import base64
from io import BytesIO
from pdfminer.high_level import extract_text
import cohere
from youtube_transcript_api import YouTubeTranscriptApi
from docx import Document
app = Flask(__name__)
@app.route('/process_pdf', methods=['POST'])
def process_pdf():
file = request.form['file']
# Assuming 'file' contains the Base64-encoded PDF file data received over WebSocket
base64_data = file.encode('utf-8') # Convert string to bytes
pdf_data = base64.b64decode(base64_data)
# Create a BytesIO object to work with pdfminer
pdf_stream = BytesIO(pdf_data)
# Extract text content from PDF
text = extract_text(pdf_stream)
# print("Text content of the PDF:", text)
response = {'data': get_summary(text)}
return jsonify(response)
@app.route('/process_file', methods=['POST'])
def process_file():
text = request.form['file']
# Decode Base64 content
decoded_bytes = base64.b64decode(text)
# Convert bytes to string
try:
text = decoded_bytes.decode('utf-8')
except:
# Create a file-like object from the decoded data
docx_file_like = BytesIO(decoded_bytes)
doc = Document(docx_file_like)
# Initialize an empty string to hold the text content
text = ""
# Iterate through each paragraph in the document and concatenate the text
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
response = {'data': get_summary(text)}
return jsonify(response)
@app.route('/process_video', methods=['GET'])
def process_video():
video_id = request.args.get('videoId')
print(video_id)
transcript = " ".join([chunk.get("text") for chunk in catch(lambda: YouTubeTranscriptApi.get_transcript(video_id))]).replace("\n", " ")
if len(transcript) == 0:
data = "No transcript to summarize."
elif len(transcript) < 250:
data = transcript
else:
data = get_summary(transcript)
response = {'data': data}
return jsonify(response)
def get_summary(text):
# Get summary
API_KEY = 'urUHBanFf5qq0F39mHGIPdWV9tvDqe3198WQ0Zq3'
co = cohere.Client(API_KEY)
print(text)
# summarize transcript using Cohere
try:
response = co.summarize(
text=text,
format='bullets',
length='medium'
)
except Exception as e:
print(e)
return "Unable to get summary. Try sending another file."
print(response.summary)
return response.summary
def catch(func, handle=lambda e : e, *args, **kwargs):
try:
return func(*args, **kwargs)
except:
return ""
if __name__ == '__main__':
# Run the Flask app on port 3001
app.run(host='0.0.0.0', port=3001, debug=True)