Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit 2ea8c9b

Browse files
authored
Merge pull request #52 from tomprogrammer/refactor-set-edges
Refactor method `Table.set_edges` for readability
2 parents 713bfc0 + e222b8d commit 2ea8c9b

File tree

1 file changed

+39
-109
lines changed

1 file changed

+39
-109
lines changed

camelot/core.py

Lines changed: 39 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import numpy as np
99
import pandas as pd
1010

11-
1211
# minimum number of vertical textline intersections for a textedge
1312
# to be considered valid
1413
TEXTEDGE_REQUIRED_ELEMENTS = 4
@@ -398,125 +397,56 @@ def set_edges(self, vertical, horizontal, joint_tol=2):
398397
List of detected horizontal lines.
399398
400399
"""
400+
401+
def find_close_point(over, coord, joint_tol):
402+
for i, t in enumerate(over):
403+
if math.isclose(coord, t[0], abs_tol=joint_tol):
404+
return i
405+
return None
406+
401407
for v in vertical:
402408
# find closest x coord
403409
# iterate over y coords and find closest start and end points
404-
i = [
405-
i
406-
for i, t in enumerate(self.cols)
407-
if np.isclose(v[0], t[0], atol=joint_tol)
408-
]
409-
j = [
410-
j
411-
for j, t in enumerate(self.rows)
412-
if np.isclose(v[3], t[0], atol=joint_tol)
413-
]
414-
k = [
415-
k
416-
for k, t in enumerate(self.rows)
417-
if np.isclose(v[1], t[0], atol=joint_tol)
418-
]
419-
if not j:
410+
start = find_close_point(self.rows, v[3], joint_tol)
411+
if start is None:
420412
continue
421-
J = j[0]
422-
if i == [0]: # only left edge
423-
L = i[0]
424-
if k:
425-
K = k[0]
426-
while J < K:
427-
self.cells[J][L].left = True
428-
J += 1
429-
else:
430-
K = len(self.rows)
431-
while J < K:
432-
self.cells[J][L].left = True
433-
J += 1
434-
elif i == []: # only right edge
435-
L = len(self.cols) - 1
436-
if k:
437-
K = k[0]
438-
while J < K:
439-
self.cells[J][L].right = True
440-
J += 1
441-
else:
442-
K = len(self.rows)
443-
while J < K:
444-
self.cells[J][L].right = True
445-
J += 1
413+
end = find_close_point(self.rows, v[1], joint_tol)
414+
if end is None:
415+
end = len(self.rows)
416+
i = find_close_point(self.cols, v[0], joint_tol)
417+
if i is None: # only right edge
418+
i = len(self.cols) - 1
419+
for j in range(start, end):
420+
self.cells[j][i].right = True
421+
elif i == 0: # only left edge
422+
for j in range(start, end):
423+
self.cells[j][0].left = True
446424
else: # both left and right edges
447-
L = i[0]
448-
if k:
449-
K = k[0]
450-
while J < K:
451-
self.cells[J][L].left = True
452-
self.cells[J][L - 1].right = True
453-
J += 1
454-
else:
455-
K = len(self.rows)
456-
while J < K:
457-
self.cells[J][L].left = True
458-
self.cells[J][L - 1].right = True
459-
J += 1
425+
for j in range(start, end):
426+
self.cells[j][i].left = True
427+
self.cells[j][i - 1].right = True
460428

461429
for h in horizontal:
462430
# find closest y coord
463431
# iterate over x coords and find closest start and end points
464-
i = [
465-
i
466-
for i, t in enumerate(self.rows)
467-
if np.isclose(h[1], t[0], atol=joint_tol)
468-
]
469-
j = [
470-
j
471-
for j, t in enumerate(self.cols)
472-
if np.isclose(h[0], t[0], atol=joint_tol)
473-
]
474-
k = [
475-
k
476-
for k, t in enumerate(self.cols)
477-
if np.isclose(h[2], t[0], atol=joint_tol)
478-
]
479-
if not j:
432+
start = find_close_point(self.cols, h[0], joint_tol)
433+
if start is None:
480434
continue
481-
J = j[0]
482-
if i == [0]: # only top edge
483-
L = i[0]
484-
if k:
485-
K = k[0]
486-
while J < K:
487-
self.cells[L][J].top = True
488-
J += 1
489-
else:
490-
K = len(self.cols)
491-
while J < K:
492-
self.cells[L][J].top = True
493-
J += 1
494-
elif i == []: # only bottom edge
495-
L = len(self.rows) - 1
496-
if k:
497-
K = k[0]
498-
while J < K:
499-
self.cells[L][J].bottom = True
500-
J += 1
501-
else:
502-
K = len(self.cols)
503-
while J < K:
504-
self.cells[L][J].bottom = True
505-
J += 1
435+
end = find_close_point(self.cols, h[2], joint_tol)
436+
if end is None:
437+
end = len(self.cols)
438+
i = find_close_point(self.rows, h[1], joint_tol)
439+
if i is None: # only bottom edge
440+
i = len(self.rows) - 1
441+
for j in range(start, end):
442+
self.cells[i][j].bottom = True
443+
elif i == 0: # only top edge
444+
for j in range(start, end):
445+
self.cells[0][j].top = True
506446
else: # both top and bottom edges
507-
L = i[0]
508-
if k:
509-
K = k[0]
510-
while J < K:
511-
self.cells[L][J].top = True
512-
self.cells[L - 1][J].bottom = True
513-
J += 1
514-
else:
515-
K = len(self.cols)
516-
while J < K:
517-
self.cells[L][J].top = True
518-
self.cells[L - 1][J].bottom = True
519-
J += 1
447+
for j in range(start, end):
448+
self.cells[i][j].top = True
449+
self.cells[i - 1][j].bottom = True
520450

521451
return self
522452

0 commit comments

Comments
 (0)