Skip to content

Commit bee4ea3

Browse files
committed
Different MS Word call (exe Packaging Problems), txts now via Office, better exeption catching
1 parent 6b29171 commit bee4ea3

File tree

2 files changed

+56
-59
lines changed

2 files changed

+56
-59
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ lib/
1818
lib64/
1919
parts/
2020
sdist/
21+
output/
2122
var/
2223
wheels/
2324
pip-wheel-metadata/

reistee.py

Lines changed: 55 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
import shutil # mainly for moving and deleting files
55
import subprocess # for running libreoffice in background
66
import functools # used for rotating images by metadata
7-
from docx2pdf import convert
87
from fpdf import FPDF # for creating pdf from images
98
from PIL import Image # for opening images
109
from PyPDF2 import PdfFileMerger # for merging pdfs
1110

1211

12+
13+
1314
def remove_unnecassary_symbols(dirpath, filename):
1415
'''
1516
Removes "." "-" and "_" from filename and returns new filename
@@ -110,20 +111,19 @@ def categorise_file(dirpath, filename, file_lists):
110111

111112
elif (file_to_conv_path.endswith(".doc") or
112113
file_to_conv_path.endswith(".docx") or
113-
file_to_conv_path.endswith(".odt")):
114+
file_to_conv_path.endswith(".odt") or
115+
file_to_conv_path.endswith(".txt")):
114116

115117
doc_files.append(file_to_conv_path)
116118

117-
elif file_to_conv_path.endswith(".txt"):
118-
119-
txt_files.append(file_to_conv_path)
120-
121119
else:
122120
return False
123121

124122
return True
125123

126124

125+
126+
127127
# Thanks to Roman Odaisky for this solution to why the images were
128128
# sometimes rotated, when all image viewers displayed it correctly.
129129
# Problem is that when an image is taken sideways, the camera stores
@@ -189,30 +189,39 @@ def pic_to_pdf(pdf_filename, picture_files):
189189
for picture_file in picture_files:
190190

191191
# Open picture and rotate if stated in pictures metadata
192-
cover = Image.open(str(picture_file))
193-
cover_transposed = image_transpose_exif(cover)
194-
cover_transposed.save(str(picture_file))
195-
width, height = cover_transposed.size
192+
try:
193+
cover = Image.open(str(picture_file))
194+
cover_transposed = image_transpose_exif(cover)
195+
cover_transposed.save(str(picture_file))
196+
width, height = cover_transposed.size
196197

197-
# create temporary pdf, with _imgpdf_ in filename
198-
pdf = FPDF(unit = "pt", format = [width, height])
199-
pdf.add_page()
200-
pdf.image(str(picture_file), 0, 0)
198+
# create temporary pdf, with _imgpdf_ in filename
199+
pdf = FPDF(unit = "pt", format = [width, height])
200+
pdf.add_page()
201+
pdf.image(str(picture_file), 0, 0)
201202

202-
img_pdf_counter += 1
203+
img_pdf_counter += 1
203204

204-
pdf.output(pdf_filename + "_imgpdf_" +
205-
str(img_pdf_counter) + ".pdf", "F")
205+
pdf.output(pdf_filename + "_imgpdf_" +
206+
str(img_pdf_counter) + ".pdf", "F")
207+
208+
pdf_list.append(pdf_filename + "_imgpdf_" +
209+
str(img_pdf_counter) + ".pdf")
210+
except Image.UnidentifiedImageError:
211+
print("Can't open image: " +
212+
os.path.basename(pdf_filename) +
213+
"\\" + os.path.basename(picture_file) + " : " +
214+
"May be damaged or not correctly converted.")
215+
move_file_to_folder(picture_file, pdf_filename)
206216

207-
pdf_list.append(pdf_filename + "_imgpdf_" +
208-
str(img_pdf_counter) + ".pdf")
209217

210218
# merge all _imgpdf_ files
211-
img_merger = PdfFileMerger()
212-
for pdf in pdf_list:
213-
img_merger.append(pdf)
214-
img_merger.write(pdf_filename + "_img_" + ".pdf")
215-
img_merger.close()
219+
if len(pdf_list) > 0:
220+
img_merger = PdfFileMerger(strict=False)
221+
for pdf in pdf_list:
222+
img_merger.append(pdf)
223+
img_merger.write(pdf_filename + "_img_" + ".pdf")
224+
img_merger.close()
216225

217226
# remove all _imgpdf_ Files
218227
for pdf in pdf_list:
@@ -252,6 +261,7 @@ def check_libreoffice_install():
252261
return libreoffice_path
253262

254263

264+
255265
def doc_to_pdf(pdf_filename, doc_files):
256266
'''
257267
Converts a list of odt, doc and docx Files into one pdf-File that
@@ -279,14 +289,13 @@ def doc_to_pdf(pdf_filename, doc_files):
279289

280290
os.chdir(os.getcwd() + "\\" + os.path.dirname(doc_file))
281291

282-
283-
284292
if not check_libreoffice_install() == "":
285293

294+
286295
libreoffice = subprocess.Popen(check_libreoffice_install() +
287296
" --convert-to "+ str(doc_counter) +
288-
"py.pdf "+ os.path.basename(doc_file))
289-
libreoffice.wait()
297+
"py.pdf "+ "\"" + os.path.basename(doc_file)+ "\"")
298+
test = libreoffice.wait()
290299

291300
converted_docs.append(os.path.dirname(doc_file) + "\\" +
292301
os.path.splitext(os.path.basename(doc_file))[0] + "." +
@@ -297,19 +306,26 @@ def doc_to_pdf(pdf_filename, doc_files):
297306

298307
# Should convert doc, docx and odt with MS Word. NOT TESTED!
299308
else:
309+
new_name = (os.path.dirname(doc_file) + "\\" +
310+
os.path.splitext(os.path.basename(doc_file))[0] +
311+
"." + str(doc_counter) + "py.pdf")
312+
300313
try:
301-
(convert(doc_file,
302-
os.path.splitext(os.path.basename(doc_file))[0] + "." +
303-
str(doc_counter) + "py.pdf"))
304-
314+
word = os.client.DispatchEx("Word.Application")
315+
worddoc = word.Documents.Open(doc_file)
316+
worddoc.SaveAs(new_name, FileFormat = 17)
305317
converted_docs.append(os.path.dirname(doc_file) + "\\" +
306318
os.path.splitext(os.path.basename(doc_file))[0] + "." +
307319
str(doc_counter) + "py.pdf")
308-
309320
doc_counter += 1
321+
worddoc.Close()
322+
word.Quit()
310323

311-
except Exception:
312-
print("No Office installed")
324+
except Exception as e:
325+
os.chdir(base_dir)
326+
print("No Office installed. Please install LibreOffice!")
327+
328+
move_file_to_folder(doc_file, pdf_filename)
313329

314330
os.chdir(base_dir)
315331

@@ -318,7 +334,7 @@ def doc_to_pdf(pdf_filename, doc_files):
318334

319335
if not len(converted_docs) == 0:
320336
# merge all created pdfs into one pdf
321-
doc_merger = PdfFileMerger()
337+
doc_merger = PdfFileMerger(strict=False)
322338
for converted_doc in converted_docs:
323339
doc_merger.append(converted_doc)
324340

@@ -392,7 +408,7 @@ def merge_files_per_category(curr_student, categorized_files):
392408

393409
pdf_files.sort()
394410

395-
pdf_merger = PdfFileMerger()
411+
pdf_merger = PdfFileMerger(strict=False)
396412
for pdf in pdf_files:
397413
pdf_merger.append(pdf)
398414

@@ -404,22 +420,7 @@ def merge_files_per_category(curr_student, categorized_files):
404420
doc_files.sort()
405421

406422
doc_to_pdf(curr_student, doc_files)
407-
408-
if len(txt_files) != 0:
409-
410-
txt_files.sort()
411-
412-
pdf = FPDF()
413-
414-
for txt_file in txt_files:
415-
pdf.add_page()
416-
pdf.set_font("Arial", size = 15)
417-
f = open(txt_file, "r")
418-
for x in f:
419-
pdf.cell(200, 10, txt = x, ln = 1, align = 'L')
420-
421-
422-
pdf.output(curr_student + "_txts_" + ".pdf")
423+
423424

424425

425426

@@ -443,7 +444,7 @@ def merge_categories(curr_student):
443444
(os.path.basename(curr_student)).split(" ")[0])
444445

445446

446-
fullmerger = PdfFileMerger()
447+
fullmerger = PdfFileMerger(strict=False)
447448

448449
at_least_one_file = False
449450
# add existing merged file format specific pdfs to merger
@@ -459,9 +460,6 @@ def merge_categories(curr_student):
459460
fullmerger.append(curr_student + "_docs_" + ".pdf")
460461
at_least_one_file = True
461462

462-
if os.path.isfile(curr_student + "_txts_" + ".pdf"):
463-
fullmerger.append(curr_student + "_txts_" + ".pdf")
464-
at_least_one_file = True
465463

466464
if at_least_one_file:
467465
# merge pdfs and create pdf with reversed name
@@ -480,8 +478,6 @@ def merge_categories(curr_student):
480478
if os.path.isfile(curr_student + "_docs_" + ".pdf"):
481479
os.remove(curr_student + "_docs_" + ".pdf")
482480

483-
if os.path.isfile(curr_student + "_txts_" + ".pdf"):
484-
os.remove(curr_student + "_txts_" + ".pdf")
485481

486482

487483
def create_merged_pdfs(dir_name):

0 commit comments

Comments
 (0)