4
4
import shutil # mainly for moving and deleting files
5
5
import subprocess # for running libreoffice in background
6
6
import functools # used for rotating images by metadata
7
- from docx2pdf import convert
8
7
from fpdf import FPDF # for creating pdf from images
9
8
from PIL import Image # for opening images
10
9
from PyPDF2 import PdfFileMerger # for merging pdfs
11
10
12
11
12
+
13
+
13
14
def remove_unnecassary_symbols (dirpath , filename ):
14
15
'''
15
16
Removes "." "-" and "_" from filename and returns new filename
@@ -110,20 +111,19 @@ def categorise_file(dirpath, filename, file_lists):
110
111
111
112
elif (file_to_conv_path .endswith (".doc" ) or
112
113
file_to_conv_path .endswith (".docx" ) or
113
- file_to_conv_path .endswith (".odt" )):
114
+ file_to_conv_path .endswith (".odt" ) or
115
+ file_to_conv_path .endswith (".txt" )):
114
116
115
117
doc_files .append (file_to_conv_path )
116
118
117
- elif file_to_conv_path .endswith (".txt" ):
118
-
119
- txt_files .append (file_to_conv_path )
120
-
121
119
else :
122
120
return False
123
121
124
122
return True
125
123
126
124
125
+
126
+
127
127
# Thanks to Roman Odaisky for this solution to why the images were
128
128
# sometimes rotated, when all image viewers displayed it correctly.
129
129
# Problem is that when an image is taken sideways, the camera stores
@@ -189,30 +189,39 @@ def pic_to_pdf(pdf_filename, picture_files):
189
189
for picture_file in picture_files :
190
190
191
191
# Open picture and rotate if stated in pictures metadata
192
- cover = Image .open (str (picture_file ))
193
- cover_transposed = image_transpose_exif (cover )
194
- cover_transposed .save (str (picture_file ))
195
- width , height = cover_transposed .size
192
+ try :
193
+ cover = Image .open (str (picture_file ))
194
+ cover_transposed = image_transpose_exif (cover )
195
+ cover_transposed .save (str (picture_file ))
196
+ width , height = cover_transposed .size
196
197
197
- # create temporary pdf, with _imgpdf_ in filename
198
- pdf = FPDF (unit = "pt" , format = [width , height ])
199
- pdf .add_page ()
200
- pdf .image (str (picture_file ), 0 , 0 )
198
+ # create temporary pdf, with _imgpdf_ in filename
199
+ pdf = FPDF (unit = "pt" , format = [width , height ])
200
+ pdf .add_page ()
201
+ pdf .image (str (picture_file ), 0 , 0 )
201
202
202
- img_pdf_counter += 1
203
+ img_pdf_counter += 1
203
204
204
- pdf .output (pdf_filename + "_imgpdf_" +
205
- str (img_pdf_counter ) + ".pdf" , "F" )
205
+ pdf .output (pdf_filename + "_imgpdf_" +
206
+ str (img_pdf_counter ) + ".pdf" , "F" )
207
+
208
+ pdf_list .append (pdf_filename + "_imgpdf_" +
209
+ str (img_pdf_counter ) + ".pdf" )
210
+ except Image .UnidentifiedImageError :
211
+ print ("Can't open image: " +
212
+ os .path .basename (pdf_filename ) +
213
+ "\\ " + os .path .basename (picture_file ) + " : " +
214
+ "May be damaged or not correctly converted." )
215
+ move_file_to_folder (picture_file , pdf_filename )
206
216
207
- pdf_list .append (pdf_filename + "_imgpdf_" +
208
- str (img_pdf_counter ) + ".pdf" )
209
217
210
218
# merge all _imgpdf_ files
211
- img_merger = PdfFileMerger ()
212
- for pdf in pdf_list :
213
- img_merger .append (pdf )
214
- img_merger .write (pdf_filename + "_img_" + ".pdf" )
215
- img_merger .close ()
219
+ if len (pdf_list ) > 0 :
220
+ img_merger = PdfFileMerger (strict = False )
221
+ for pdf in pdf_list :
222
+ img_merger .append (pdf )
223
+ img_merger .write (pdf_filename + "_img_" + ".pdf" )
224
+ img_merger .close ()
216
225
217
226
# remove all _imgpdf_ Files
218
227
for pdf in pdf_list :
@@ -252,6 +261,7 @@ def check_libreoffice_install():
252
261
return libreoffice_path
253
262
254
263
264
+
255
265
def doc_to_pdf (pdf_filename , doc_files ):
256
266
'''
257
267
Converts a list of odt, doc and docx Files into one pdf-File that
@@ -279,14 +289,13 @@ def doc_to_pdf(pdf_filename, doc_files):
279
289
280
290
os .chdir (os .getcwd () + "\\ " + os .path .dirname (doc_file ))
281
291
282
-
283
-
284
292
if not check_libreoffice_install () == "" :
285
293
294
+
286
295
libreoffice = subprocess .Popen (check_libreoffice_install () +
287
296
" --convert-to " + str (doc_counter ) +
288
- "py.pdf " + os .path .basename (doc_file ))
289
- libreoffice .wait ()
297
+ "py.pdf " + " \" " + os .path .basename (doc_file )+ " \" " )
298
+ test = libreoffice .wait ()
290
299
291
300
converted_docs .append (os .path .dirname (doc_file ) + "\\ " +
292
301
os .path .splitext (os .path .basename (doc_file ))[0 ] + "." +
@@ -297,19 +306,26 @@ def doc_to_pdf(pdf_filename, doc_files):
297
306
298
307
# Should convert doc, docx and odt with MS Word. NOT TESTED!
299
308
else :
309
+ new_name = (os .path .dirname (doc_file ) + "\\ " +
310
+ os .path .splitext (os .path .basename (doc_file ))[0 ] +
311
+ "." + str (doc_counter ) + "py.pdf" )
312
+
300
313
try :
301
- (convert (doc_file ,
302
- os .path .splitext (os .path .basename (doc_file ))[0 ] + "." +
303
- str (doc_counter ) + "py.pdf" ))
304
-
314
+ word = os .client .DispatchEx ("Word.Application" )
315
+ worddoc = word .Documents .Open (doc_file )
316
+ worddoc .SaveAs (new_name , FileFormat = 17 )
305
317
converted_docs .append (os .path .dirname (doc_file ) + "\\ " +
306
318
os .path .splitext (os .path .basename (doc_file ))[0 ] + "." +
307
319
str (doc_counter ) + "py.pdf" )
308
-
309
320
doc_counter += 1
321
+ worddoc .Close ()
322
+ word .Quit ()
310
323
311
- except Exception :
312
- print ("No Office installed" )
324
+ except Exception as e :
325
+ os .chdir (base_dir )
326
+ print ("No Office installed. Please install LibreOffice!" )
327
+
328
+ move_file_to_folder (doc_file , pdf_filename )
313
329
314
330
os .chdir (base_dir )
315
331
@@ -318,7 +334,7 @@ def doc_to_pdf(pdf_filename, doc_files):
318
334
319
335
if not len (converted_docs ) == 0 :
320
336
# merge all created pdfs into one pdf
321
- doc_merger = PdfFileMerger ()
337
+ doc_merger = PdfFileMerger (strict = False )
322
338
for converted_doc in converted_docs :
323
339
doc_merger .append (converted_doc )
324
340
@@ -392,7 +408,7 @@ def merge_files_per_category(curr_student, categorized_files):
392
408
393
409
pdf_files .sort ()
394
410
395
- pdf_merger = PdfFileMerger ()
411
+ pdf_merger = PdfFileMerger (strict = False )
396
412
for pdf in pdf_files :
397
413
pdf_merger .append (pdf )
398
414
@@ -404,22 +420,7 @@ def merge_files_per_category(curr_student, categorized_files):
404
420
doc_files .sort ()
405
421
406
422
doc_to_pdf (curr_student , doc_files )
407
-
408
- if len (txt_files ) != 0 :
409
-
410
- txt_files .sort ()
411
-
412
- pdf = FPDF ()
413
-
414
- for txt_file in txt_files :
415
- pdf .add_page ()
416
- pdf .set_font ("Arial" , size = 15 )
417
- f = open (txt_file , "r" )
418
- for x in f :
419
- pdf .cell (200 , 10 , txt = x , ln = 1 , align = 'L' )
420
-
421
-
422
- pdf .output (curr_student + "_txts_" + ".pdf" )
423
+
423
424
424
425
425
426
@@ -443,7 +444,7 @@ def merge_categories(curr_student):
443
444
(os .path .basename (curr_student )).split (" " )[0 ])
444
445
445
446
446
- fullmerger = PdfFileMerger ()
447
+ fullmerger = PdfFileMerger (strict = False )
447
448
448
449
at_least_one_file = False
449
450
# add existing merged file format specific pdfs to merger
@@ -459,9 +460,6 @@ def merge_categories(curr_student):
459
460
fullmerger .append (curr_student + "_docs_" + ".pdf" )
460
461
at_least_one_file = True
461
462
462
- if os .path .isfile (curr_student + "_txts_" + ".pdf" ):
463
- fullmerger .append (curr_student + "_txts_" + ".pdf" )
464
- at_least_one_file = True
465
463
466
464
if at_least_one_file :
467
465
# merge pdfs and create pdf with reversed name
@@ -480,8 +478,6 @@ def merge_categories(curr_student):
480
478
if os .path .isfile (curr_student + "_docs_" + ".pdf" ):
481
479
os .remove (curr_student + "_docs_" + ".pdf" )
482
480
483
- if os .path .isfile (curr_student + "_txts_" + ".pdf" ):
484
- os .remove (curr_student + "_txts_" + ".pdf" )
485
481
486
482
487
483
def create_merged_pdfs (dir_name ):
0 commit comments