-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
182 lines (156 loc) · 5.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import os
from searching import search
from translation.translation import translate_text
from tts.basic_tts import save_mp3
from encryption.password_encryp import read_csv
from searching.searching import find_word_location
from standardisation.table_extract import PdfTableExtractor,ImageTableExtractor
from standardisation.pdftocsv import ingest_pdf
from standardisation import tomachinereadable
from file_processing import machine_readablity,readablity
# Define file path
file_path = input("Please enter the file path: ")
def automation(file_path):
'''
l[0] 1 : standardisation
2 : search
l[1] 1 : Decrypt
2 : Search
3 : Text to Speech
l[2] 1 : Text to Speech
2 : Translation
'''
l=[]
print("1.Standardisation\n2.Search")
user_choice=int(input("Enter choice no."))
if user_choice==1:
l.append(1)
print("Choices:\n1.Decrypt\n2.Search\n3.Exit")
user_choice1=int(input("Enter choice:"))
if user_choice1==1:
l.append(1)
print("Choices:\n1.Text to Speech\n2.Translation\n3.Exit")
user_choice2=int(input("Enter choice:"))
if user_choice2==1:
l.append(1)
elif user_choice==2:
l.append(2)
else:
return l
elif user_choice1==2:
l.append(2)
print("Choices:\n1.Text to Speech\n2.Translation\n3.Exit")
user_choice2=int(input("Enter choice:"))
if user_choice2==1:
l.append(1)
elif user_choice==2:
l.append(2)
else:
return l
else:
return l
else:
l.append(2)
print("Choices:\n1.Text to Speech\n2.Exit")
user_choice=int(input("Enter choice:"))
if user_choice==1:
l.append(1)
else:
return l
return l
def perform_auto(file_path,l):
if l[0]==1:
if l[1]==1:
if l[2]==1:
read_csv(file_path)
else:
translated_output = translate_text(input_text, tgt_lang)
print(translated_output)
else:
word = input("Enter the word to search: ")
results = find_word_location(file_path, word)
print(results)
else:
if l[1]==1:
save_mp3(input_text)
else:
if l[2]==1:
input_text = readablity.read_pdf(file_path)
else:
translated_output = translate_text(input_text, tgt_lang)
print(translated_output)
# Check if file exists
if os.path.exists(file_path):
print(f'The file "{file_path}" exists.')
else:
print(f'The file "{file_path}" does not exist.')
if(machine_readablity.is_machine_readable(file_path)==True):
print ("File is machine readable.")
else:
print("file is non machine readable.")
print("Operations:\n1.Searching\n 2.Standardisation\n 3.Translation\n 4.Text to Speech\n 5.Encryption & Decription\n 6.Automation\n 7. exit")
choice=int(input("Enter choice no:"))
if choice==1:
print("Options\n 1.Absolute searching\n2.Dynamic Searching")
opt1=int(input("Enter option no:"))
if opt1==1:
word = input("Enter the word to search: ")
results = find_word_location(file_path, word)
print(results)
#absolute
else:
query = input("Enter your query: ")
search.process_pdf_and_query(file_path,query)
#dynamic
elif choice==2:
print("Options\n 1.Tabular\n2.Non Tabular\n3. Formatting")
opt2=int(input("Enter option no:"))
if opt2==1:
#tabular
#create table from pdf
print("Options\n 1.from pdf\n2.from img\n3. Formatting")
ch = int(input("Enter choice no:"))
if ch == 1:
processor1 = PdfTableExtractor(file_path, start_page=1, end_page=1)
all_results = processor1.process_all_pages()
for page, tables in all_results.items():
print(f"Page {page}: {len(tables)} tables found")
for i, df in enumerate(tables, 1):
print(f" Table {i} shape: {df.shape}")
processor1.create_pdf_from_tables("output_tables.pdf")
elif ch==2:
processor2 = ImageTableExtractor("1234.png", start_page=1, end_page=1)
tables = processor2.extract_tables()
processor2.create_pdf_with_tables("output_tables.pdf")
print("Extracted tables:")
for i, table in enumerate(tables):
print(f"Table {i + 1}:")
print(table.df)
print("\n")
else:
read_csv(file_path)
else:
tomachinereadable.convert_to_machine_readable(file_path, "output.pdf")
elif choice==3:
if ".doc" in file_path:
input_text = readablity.read_doc(file_path)
else:
input_text = readablity.read_pdf(file_path)
tgt_lang = input("Enter the target language code (e.g., 'ben_Beng' for Bengali): ")
translated_output = translate_text(input_text, tgt_lang)
print(translated_output)
elif choice==4:
if ".doc" in file_path:
input_text = readablity.read_doc(file_path)
else:
input_text = readablity.read_pdf(file_path)
save_mp3(input_text)
#tts
elif choice==5:
read_csv()
elif choice==6:
print("Exiting")
elif choice==7:
print("Exiting")
else:
print("Invalid Choice")