diff --git a/paddleocr.py b/paddleocr.py index 95e316fb1eb..d03a6932e27 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -559,8 +559,9 @@ def check_img(img, alpha_color=(255, 255, 255)): file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats storage type: binary image, net image file, local image file alpha_color: Background color in images in RGBA format - return: numpy.array (h, w, 3) + return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean """ + flag_gif, flag_pdf = False, False if isinstance(img, bytes): img = img_decode(img) if isinstance(img, str): @@ -589,17 +590,17 @@ def check_img(img, alpha_color=(255, 255, 255)): img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) except: logger.error("error in loading image:{}".format(image_file)) - return None + return None, flag_gif, flag_pdf if img is None: logger.error("error in loading image:{}".format(image_file)) - return None + return None, flag_gif, flag_pdf # single channel image array.shape:h,w if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # four channel image array.shape:h,w,c if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4: img = alpha_to_color(img, alpha_color) - return img + return img, flag_gif, flag_pdf class PaddleOCR(predict_system.TextSystem): @@ -700,9 +701,9 @@ def ocr( "Since the angle classifier is not initialized, it will not be used during the forward process" ) - img = check_img(img, alpha_color) + img, flag_gif, flag_pdf = check_img(img, alpha_color) # for infer pdf file - if isinstance(img, list): + if isinstance(img, list) and flag_pdf: if self.page_num > len(img) or self.page_num == 0: imgs = img else: @@ -837,7 +838,16 @@ def __call__( img_idx=0, alpha_color=(255, 255, 255), ): - img = check_img(img, alpha_color) + img, flag_gif, flag_pdf = check_img(img, alpha_color) + if isinstance(img, list) and flag_pdf: + res_list = [] + for index, pdf_img in enumerate(img): + logger.info("processing {}/{} page:".format(index + 1, len(img))) + res, _ = super().__call__( + pdf_img, return_ocr_result_in_table, img_idx=index + ) + res_list.append(res) + return res_list res, _ = super().__call__(img, return_ocr_result_in_table, img_idx=img_idx) return res diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md index 287b0d13c76..a24f0b08405 100644 --- a/ppstructure/docs/quickstart.md +++ b/ppstructure/docs/quickstart.md @@ -209,6 +209,62 @@ for line in result: print(line) ``` +```python +import os +import cv2 +from paddleocr import PPStructure,save_structure_res + +ocr_engine = PPStructure(table=False, ocr=True, show_log=True) + +save_folder = './output' +img_path = 'ppstructure/recovery/UnrealText.pdf' +result = ocr_engine(img_path) +for index, res in enumerate(result): + save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index) + +for res in result: + for line in res: + line.pop('img') + print(line) +``` + +```python +import os +import cv2 +import numpy as np +from paddleocr import PPStructure,save_structure_res +from paddle.utils import try_import +from PIL import Image + +ocr_engine = PPStructure(table=False, ocr=True, show_log=True) + +save_folder = './output' +img_path = 'ppstructure/recovery/UnrealText.pdf' + +fitz = try_import("fitz") +imgs = [] +with fitz.open(img_path) as pdf: + for pg in range(0, pdf.page_count): + page = pdf[pg] + mat = fitz.Matrix(2, 2) + pm = page.get_pixmap(matrix=mat, alpha=False) + + # if width or height > 2000 pixels, don't enlarge the image + if pm.width > 2000 or pm.height > 2000: + pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) + + img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) + img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) + imgs.append(img) + +for index, img in enumerate(imgs): + result = ocr_engine(img) + save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index) + for line in result: + line.pop('img') + print(line) +``` + #### 2.2.4 表格识别 diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md index 5d0cd24b3f1..672d34470e8 100644 --- a/ppstructure/docs/quickstart_en.md +++ b/ppstructure/docs/quickstart_en.md @@ -192,6 +192,62 @@ for line in result: print(line) ``` +```python +import os +import cv2 +from paddleocr import PPStructure,save_structure_res + +ocr_engine = PPStructure(table=False, ocr=True, show_log=True) + +save_folder = './output' +img_path = 'ppstructure/recovery/UnrealText.pdf' +result = ocr_engine(img_path) +for index, res in enumerate(result): + save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index) + +for res in result: + for line in res: + line.pop('img') + print(line) +``` + +```python +import os +import cv2 +import numpy as np +from paddleocr import PPStructure,save_structure_res +from paddle.utils import try_import +from PIL import Image + +ocr_engine = PPStructure(table=False, ocr=True, show_log=True) + +save_folder = './output' +img_path = 'ppstructure/recovery/UnrealText.pdf' + +fitz = try_import("fitz") +imgs = [] +with fitz.open(img_path) as pdf: + for pg in range(0, pdf.page_count): + page = pdf[pg] + mat = fitz.Matrix(2, 2) + pm = page.get_pixmap(matrix=mat, alpha=False) + + # if width or height > 2000 pixels, don't enlarge the image + if pm.width > 2000 or pm.height > 2000: + pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) + + img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) + img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) + imgs.append(img) + +for index, img in enumerate(imgs): + result = ocr_engine(img) + save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index) + for line in result: + line.pop('img') + print(line) +``` + #### 2.2.4 table recognition