You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am using layoutparser for detecting tables and images.
When I just try to run code on individual png image file, model detects tables and figures correctly.
However, when I am using below code to convert pdf into images and detecting tables out of each page image, I am either not getting full image/table or sometimes get duplicates tables as well.
Can you please guide on how to refine below code and what I can try to resolve this issue? Thank you!
def save_detections(table_blocks, image, image_name, save_dir='/content/'):
for j in range(len(table_blocks)):
x_1, y_1, x_2, y_2 = table_blocks[0].block.x_1, table_blocks[0].block.y_1, table_blocks[0].block.x_2, table_blocks[0].block.y_2
cropped = image[int(y_1):int(y_2), int(x_1):int(x_2)]
cv2_imshow(cropped)
file_name = image_name+'_'+str(j)+'.jpg'
status = cv2.imwrite(save_dir+file_name, cropped)
if status:
print("Saved ", file_name)
def inference(images_dir):
table_blocks_list = []
# Getting images from the directory
for file in os.listdir(images_dir):
if file.endswith(".jpg"):
# Extract the image name (excluding the extension)
image_name = file[:-4]
# # Reading the image using OpenCV
image = cv2.imread(images_dir+'/'+file)
# OpenCV reads images in BGR format, convert to RGB
image = image[..., ::-1]
# Running Inference
layout = model.detect(image)
# Extracting Tables
table_blocks = lp.Layout([b for b in layout if b.type=="Table"])
figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
table_blocks = lp.Layout([b for b in table_blocks \
if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
h, w = image.shape[:2]
left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
left_blocks = table_blocks.filter_by(left_interval, center=True)
left_blocks.sort(key = lambda b:b.coordinates[1])
right_blocks = [b for b in table_blocks if b not in left_blocks]
right_blocks.sort(key = lambda b:b.coordinates[1])
# And finally combine the two list and add the index
# according to the order
table_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
save_detections(table_blocks, image, image_name)
table_blocks_list.append(table_blocks)
return table_blocks_list
def pdf_inference(pdfName):
# Converting each page to an image
# Get the current working directory
path = os.getcwd()
# Construct the full path to the PDF file
PDF_file = path + "/" + pdfName
# Create a directory to store converted images
if os.path.exists(path+'/pdf_images'):
shutil.rmtree(path+'/pdf_images')
os.mkdir(path+'/pdf_images')
# Convert each page of the PDF to an image
pages = convert_from_path(PDF_file, dpi=100, grayscale=True)
image_counter = 1
# Iterate over the pages
for page in pages:
filename = "page_"+str(image_counter)+".jpg"
# st.write(filename)
filepath = path+"/pdf_images/" + filename
# Save the page as a JPEG image in the 'pdf_images' directory
page.save(f'{filepath}', 'JPEG')
image_counter = image_counter + 1
#filelimit = image_counter-1
# Running inference on the images
table_blocks_list = inference(path+'/pdf_images')
#return table_blocks_list
test = pdf_inference('abc-Datasheet.pdf')
Thanks
Reema Jain
The text was updated successfully, but these errors were encountered:
Hi Team,
I am using layoutparser for detecting tables and images.
When I just try to run code on individual png image file, model detects tables and figures correctly.
However, when I am using below code to convert pdf into images and detecting tables out of each page image, I am either not getting full image/table or sometimes get duplicates tables as well.
Can you please guide on how to refine below code and what I can try to resolve this issue? Thank you!
!pip install layoutparser
!pip install opencv-python numpy matplotlib
install detectron2:
!pip install 'git+https://github.com/facebookresearch/detectron2.git@v0.4#egg=detectron2'
!pip3 install pdf2image
!sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev
!apt-get install poppler-utils
import os
from pdf2image import convert_from_path
import shutil
import cv2
import layoutparser as lp
PubLayNet
model = lp.models.Detectron2LayoutModel('lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config',
extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.81],
label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"})
def save_detections(table_blocks, image, image_name, save_dir='/content/'):
for j in range(len(table_blocks)):
x_1, y_1, x_2, y_2 = table_blocks[0].block.x_1, table_blocks[0].block.y_1, table_blocks[0].block.x_2, table_blocks[0].block.y_2
cropped = image[int(y_1):int(y_2), int(x_1):int(x_2)]
cv2_imshow(cropped)
file_name = image_name+'_'+str(j)+'.jpg'
status = cv2.imwrite(save_dir+file_name, cropped)
if status:
print("Saved ", file_name)
def inference(images_dir):
table_blocks_list = []
# Getting images from the directory
for file in os.listdir(images_dir):
if file.endswith(".jpg"):
# Extract the image name (excluding the extension)
image_name = file[:-4]
# # Reading the image using OpenCV
image = cv2.imread(images_dir+'/'+file)
# OpenCV reads images in BGR format, convert to RGB
image = image[..., ::-1]
# Running Inference
layout = model.detect(image)
def pdf_inference(pdfName):
# Converting each page to an image
# Get the current working directory
path = os.getcwd()
# Construct the full path to the PDF file
PDF_file = path + "/" + pdfName
# Create a directory to store converted images
if os.path.exists(path+'/pdf_images'):
shutil.rmtree(path+'/pdf_images')
os.mkdir(path+'/pdf_images')
test = pdf_inference('abc-Datasheet.pdf')
Thanks
Reema Jain
The text was updated successfully, but these errors were encountered: