Skip to content

Commit 30f3727

Browse files
committed
feat(rapidocr): optim to_markdown
1 parent a4c1afc commit 30f3727

File tree

4 files changed

+109
-184
lines changed

4 files changed

+109
-184
lines changed

python/demo.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66
engine = RapidOCR()
77

8-
img_url = "https://img1.baidu.com/it/u=3619974146,1266987475&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=516"
9-
result = engine(img_url, return_word_box=True, return_single_char_box=True)
8+
img_url = "tests/test_files/ch_en_num.jpg"
9+
result = engine(img_url)
1010
print(result)
1111

1212
result.vis("vis_result.jpg")
13+
print(result.to_markdown())

python/rapidocr/utils/output.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88

99
from .logger import Logger
10+
from .to_markdown import ToMarkdown
1011
from .utils import save_img
1112
from .vis_res import VisRes
1213

@@ -37,6 +38,9 @@ def __len__(self):
3738
def to_json(self):
3839
pass
3940

41+
def to_markdown(self) -> str:
42+
return ToMarkdown.to(self.boxes, self.txts)
43+
4044
def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None):
4145
if self.img is None or self.boxes is None:
4246
logger.warning("No image or boxes to visualize.")

python/rapidocr/utils/recon2md.py

Lines changed: 0 additions & 182 deletions
This file was deleted.

python/rapidocr/utils/to_markdown.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# -*- encoding: utf-8 -*-
2+
import numpy as np
3+
4+
5+
class ToMarkdown:
6+
@classmethod
7+
def to(cls, boxes, txts) -> str:
8+
# def to(cls, result: RapidOCROutput) -> str:
9+
"""
10+
根据 OCR 结果的坐标信息,将文本还原为近似原始排版的 Markdown。
11+
12+
Args:
13+
result (RapidOCROutput): RapidOCR 的输出结果对象。
14+
15+
Returns:
16+
str: 模拟原始排版的 Markdown 字符串。
17+
"""
18+
if boxes is None or txts is None:
19+
return "没有检测到任何文本。"
20+
21+
# 1. 将 box 和 text 绑定并排序
22+
# 主键:box 的顶部 y 坐标;次键:box 的左侧 x 坐标
23+
combined_data = sorted(
24+
zip(boxes, txts),
25+
key=lambda item: (
26+
cls.get_box_properties(item[0])["top"],
27+
cls.get_box_properties(item[0])["left"],
28+
),
29+
)
30+
31+
output_lines = []
32+
if not combined_data:
33+
return ""
34+
35+
# 初始化当前行和前一个框的属性
36+
current_line_parts = [combined_data[0][1]]
37+
prev_props = cls.get_box_properties(combined_data[0][0])
38+
39+
# 从第二个框开始遍历
40+
for box, text in combined_data[1:]:
41+
current_props = cls.get_box_properties(box)
42+
43+
# 启发式规则来决定如何布局
44+
# 条件1:中心线距离是否足够近
45+
min_height = min(current_props["height"], prev_props["height"])
46+
centers_are_close = abs(
47+
current_props["center_y"] - prev_props["center_y"]
48+
) < (min_height * 0.5)
49+
50+
# 条件2:是否存在垂直方向的重叠
51+
# 计算重叠区域的顶部和底部
52+
overlap_top = max(prev_props["top"], current_props["top"])
53+
overlap_bottom = min(prev_props["bottom"], current_props["bottom"])
54+
has_vertical_overlap = overlap_bottom > overlap_top
55+
56+
# 最终判断:满足任一条件即可
57+
is_same_line = centers_are_close or has_vertical_overlap
58+
59+
if is_same_line:
60+
# 在同一行,用空格隔开
61+
current_line_parts.append(" ") # 使用多个空格以产生明显间距
62+
current_line_parts.append(text)
63+
else:
64+
# 不在同一行,需要换行
65+
# 先将上一行组合成字符串并添加到输出列表
66+
output_lines.append("".join(current_line_parts))
67+
68+
# 规则2:判断是否需要插入空行(新段落)
69+
# 如果垂直间距大于上一个框高度的某个比例(如70%),则认为是一个新段落
70+
vertical_gap = current_props["top"] - prev_props["bottom"]
71+
if vertical_gap > prev_props["height"] * 0.7:
72+
output_lines.append("") # 插入空行来创建段落
73+
74+
# 开始一个新行
75+
current_line_parts = [text]
76+
77+
# 更新前一个框的属性
78+
prev_props = current_props
79+
80+
# 添加最后一行
81+
output_lines.append("".join(current_line_parts))
82+
83+
return "\n".join(output_lines)
84+
85+
@staticmethod
86+
def get_box_properties(box: np.ndarray) -> dict:
87+
"""从坐标数组中计算框的几何属性"""
88+
# box shape is (4, 2) -> [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
89+
ys = box[:, 1]
90+
xs = box[:, 0]
91+
92+
top = np.min(ys)
93+
bottom = np.max(ys)
94+
left = np.min(xs)
95+
96+
return {
97+
"top": top,
98+
"bottom": bottom,
99+
"left": left,
100+
"height": bottom - top,
101+
"center_y": top + (bottom - top) / 2,
102+
}

0 commit comments

Comments
 (0)