diff --git a/paddleocr.py b/paddleocr.py
index 95e316fb1eb..d03a6932e27 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -559,8 +559,9 @@ def check_img(img, alpha_color=(255, 255, 255)):
file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats
storage type: binary image, net image file, local image file
alpha_color: Background color in images in RGBA format
- return: numpy.array (h, w, 3)
+ return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean
"""
+ flag_gif, flag_pdf = False, False
if isinstance(img, bytes):
img = img_decode(img)
if isinstance(img, str):
@@ -589,17 +590,17 @@ def check_img(img, alpha_color=(255, 255, 255)):
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
except:
logger.error("error in loading image:{}".format(image_file))
- return None
+ return None, flag_gif, flag_pdf
if img is None:
logger.error("error in loading image:{}".format(image_file))
- return None
+ return None, flag_gif, flag_pdf
# single channel image array.shape:h,w
if isinstance(img, np.ndarray) and len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# four channel image array.shape:h,w,c
if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4:
img = alpha_to_color(img, alpha_color)
- return img
+ return img, flag_gif, flag_pdf
class PaddleOCR(predict_system.TextSystem):
@@ -700,9 +701,9 @@ def ocr(
"Since the angle classifier is not initialized, it will not be used during the forward process"
)
- img = check_img(img, alpha_color)
+ img, flag_gif, flag_pdf = check_img(img, alpha_color)
# for infer pdf file
- if isinstance(img, list):
+ if isinstance(img, list) and flag_pdf:
if self.page_num > len(img) or self.page_num == 0:
imgs = img
else:
@@ -837,7 +838,16 @@ def __call__(
img_idx=0,
alpha_color=(255, 255, 255),
):
- img = check_img(img, alpha_color)
+ img, flag_gif, flag_pdf = check_img(img, alpha_color)
+ if isinstance(img, list) and flag_pdf:
+ res_list = []
+ for index, pdf_img in enumerate(img):
+ logger.info("processing {}/{} page:".format(index + 1, len(img)))
+ res, _ = super().__call__(
+ pdf_img, return_ocr_result_in_table, img_idx=index
+ )
+ res_list.append(res)
+ return res_list
res, _ = super().__call__(img, return_ocr_result_in_table, img_idx=img_idx)
return res
diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md
index 287b0d13c76..a24f0b08405 100644
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -209,6 +209,62 @@ for line in result:
print(line)
```
+```python
+import os
+import cv2
+from paddleocr import PPStructure,save_structure_res
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+result = ocr_engine(img_path)
+for index, res in enumerate(result):
+ save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index)
+
+for res in result:
+ for line in res:
+ line.pop('img')
+ print(line)
+```
+
+```python
+import os
+import cv2
+import numpy as np
+from paddleocr import PPStructure,save_structure_res
+from paddle.utils import try_import
+from PIL import Image
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+
+fitz = try_import("fitz")
+imgs = []
+with fitz.open(img_path) as pdf:
+ for pg in range(0, pdf.page_count):
+ page = pdf[pg]
+ mat = fitz.Matrix(2, 2)
+ pm = page.get_pixmap(matrix=mat, alpha=False)
+
+ # if width or height > 2000 pixels, don't enlarge the image
+ if pm.width > 2000 or pm.height > 2000:
+ pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+ img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+ imgs.append(img)
+
+for index, img in enumerate(imgs):
+ result = ocr_engine(img)
+ save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index)
+ for line in result:
+ line.pop('img')
+ print(line)
+```
+
#### 2.2.4 表格识别
diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md
index 5d0cd24b3f1..672d34470e8 100644
--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
@@ -192,6 +192,62 @@ for line in result:
print(line)
```
+```python
+import os
+import cv2
+from paddleocr import PPStructure,save_structure_res
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+result = ocr_engine(img_path)
+for index, res in enumerate(result):
+ save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index)
+
+for res in result:
+ for line in res:
+ line.pop('img')
+ print(line)
+```
+
+```python
+import os
+import cv2
+import numpy as np
+from paddleocr import PPStructure,save_structure_res
+from paddle.utils import try_import
+from PIL import Image
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+
+fitz = try_import("fitz")
+imgs = []
+with fitz.open(img_path) as pdf:
+ for pg in range(0, pdf.page_count):
+ page = pdf[pg]
+ mat = fitz.Matrix(2, 2)
+ pm = page.get_pixmap(matrix=mat, alpha=False)
+
+ # if width or height > 2000 pixels, don't enlarge the image
+ if pm.width > 2000 or pm.height > 2000:
+ pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+ img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+ imgs.append(img)
+
+for index, img in enumerate(imgs):
+ result = ocr_engine(img)
+ save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index)
+ for line in result:
+ line.pop('img')
+ print(line)
+```
+
#### 2.2.4 table recognition