|
8 | 8 | import numpy as np |
9 | 9 | import pandas as pd |
10 | 10 |
|
11 | | - |
12 | 11 | # minimum number of vertical textline intersections for a textedge |
13 | 12 | # to be considered valid |
14 | 13 | TEXTEDGE_REQUIRED_ELEMENTS = 4 |
@@ -398,125 +397,56 @@ def set_edges(self, vertical, horizontal, joint_tol=2): |
398 | 397 | List of detected horizontal lines. |
399 | 398 |
|
400 | 399 | """ |
| 400 | + |
| 401 | + def find_close_point(over, coord, joint_tol): |
| 402 | + for i, t in enumerate(over): |
| 403 | + if math.isclose(coord, t[0], abs_tol=joint_tol): |
| 404 | + return i |
| 405 | + return None |
| 406 | + |
401 | 407 | for v in vertical: |
402 | 408 | # find closest x coord |
403 | 409 | # iterate over y coords and find closest start and end points |
404 | | - i = [ |
405 | | - i |
406 | | - for i, t in enumerate(self.cols) |
407 | | - if np.isclose(v[0], t[0], atol=joint_tol) |
408 | | - ] |
409 | | - j = [ |
410 | | - j |
411 | | - for j, t in enumerate(self.rows) |
412 | | - if np.isclose(v[3], t[0], atol=joint_tol) |
413 | | - ] |
414 | | - k = [ |
415 | | - k |
416 | | - for k, t in enumerate(self.rows) |
417 | | - if np.isclose(v[1], t[0], atol=joint_tol) |
418 | | - ] |
419 | | - if not j: |
| 410 | + start = find_close_point(self.rows, v[3], joint_tol) |
| 411 | + if start is None: |
420 | 412 | continue |
421 | | - J = j[0] |
422 | | - if i == [0]: # only left edge |
423 | | - L = i[0] |
424 | | - if k: |
425 | | - K = k[0] |
426 | | - while J < K: |
427 | | - self.cells[J][L].left = True |
428 | | - J += 1 |
429 | | - else: |
430 | | - K = len(self.rows) |
431 | | - while J < K: |
432 | | - self.cells[J][L].left = True |
433 | | - J += 1 |
434 | | - elif i == []: # only right edge |
435 | | - L = len(self.cols) - 1 |
436 | | - if k: |
437 | | - K = k[0] |
438 | | - while J < K: |
439 | | - self.cells[J][L].right = True |
440 | | - J += 1 |
441 | | - else: |
442 | | - K = len(self.rows) |
443 | | - while J < K: |
444 | | - self.cells[J][L].right = True |
445 | | - J += 1 |
| 413 | + end = find_close_point(self.rows, v[1], joint_tol) |
| 414 | + if end is None: |
| 415 | + end = len(self.rows) |
| 416 | + i = find_close_point(self.cols, v[0], joint_tol) |
| 417 | + if i is None: # only right edge |
| 418 | + i = len(self.cols) - 1 |
| 419 | + for j in range(start, end): |
| 420 | + self.cells[j][i].right = True |
| 421 | + elif i == 0: # only left edge |
| 422 | + for j in range(start, end): |
| 423 | + self.cells[j][0].left = True |
446 | 424 | else: # both left and right edges |
447 | | - L = i[0] |
448 | | - if k: |
449 | | - K = k[0] |
450 | | - while J < K: |
451 | | - self.cells[J][L].left = True |
452 | | - self.cells[J][L - 1].right = True |
453 | | - J += 1 |
454 | | - else: |
455 | | - K = len(self.rows) |
456 | | - while J < K: |
457 | | - self.cells[J][L].left = True |
458 | | - self.cells[J][L - 1].right = True |
459 | | - J += 1 |
| 425 | + for j in range(start, end): |
| 426 | + self.cells[j][i].left = True |
| 427 | + self.cells[j][i - 1].right = True |
460 | 428 |
|
461 | 429 | for h in horizontal: |
462 | 430 | # find closest y coord |
463 | 431 | # iterate over x coords and find closest start and end points |
464 | | - i = [ |
465 | | - i |
466 | | - for i, t in enumerate(self.rows) |
467 | | - if np.isclose(h[1], t[0], atol=joint_tol) |
468 | | - ] |
469 | | - j = [ |
470 | | - j |
471 | | - for j, t in enumerate(self.cols) |
472 | | - if np.isclose(h[0], t[0], atol=joint_tol) |
473 | | - ] |
474 | | - k = [ |
475 | | - k |
476 | | - for k, t in enumerate(self.cols) |
477 | | - if np.isclose(h[2], t[0], atol=joint_tol) |
478 | | - ] |
479 | | - if not j: |
| 432 | + start = find_close_point(self.cols, h[0], joint_tol) |
| 433 | + if start is None: |
480 | 434 | continue |
481 | | - J = j[0] |
482 | | - if i == [0]: # only top edge |
483 | | - L = i[0] |
484 | | - if k: |
485 | | - K = k[0] |
486 | | - while J < K: |
487 | | - self.cells[L][J].top = True |
488 | | - J += 1 |
489 | | - else: |
490 | | - K = len(self.cols) |
491 | | - while J < K: |
492 | | - self.cells[L][J].top = True |
493 | | - J += 1 |
494 | | - elif i == []: # only bottom edge |
495 | | - L = len(self.rows) - 1 |
496 | | - if k: |
497 | | - K = k[0] |
498 | | - while J < K: |
499 | | - self.cells[L][J].bottom = True |
500 | | - J += 1 |
501 | | - else: |
502 | | - K = len(self.cols) |
503 | | - while J < K: |
504 | | - self.cells[L][J].bottom = True |
505 | | - J += 1 |
| 435 | + end = find_close_point(self.cols, h[2], joint_tol) |
| 436 | + if end is None: |
| 437 | + end = len(self.cols) |
| 438 | + i = find_close_point(self.rows, h[1], joint_tol) |
| 439 | + if i is None: # only bottom edge |
| 440 | + i = len(self.rows) - 1 |
| 441 | + for j in range(start, end): |
| 442 | + self.cells[i][j].bottom = True |
| 443 | + elif i == 0: # only top edge |
| 444 | + for j in range(start, end): |
| 445 | + self.cells[0][j].top = True |
506 | 446 | else: # both top and bottom edges |
507 | | - L = i[0] |
508 | | - if k: |
509 | | - K = k[0] |
510 | | - while J < K: |
511 | | - self.cells[L][J].top = True |
512 | | - self.cells[L - 1][J].bottom = True |
513 | | - J += 1 |
514 | | - else: |
515 | | - K = len(self.cols) |
516 | | - while J < K: |
517 | | - self.cells[L][J].top = True |
518 | | - self.cells[L - 1][J].bottom = True |
519 | | - J += 1 |
| 447 | + for j in range(start, end): |
| 448 | + self.cells[i][j].top = True |
| 449 | + self.cells[i - 1][j].bottom = True |
520 | 450 |
|
521 | 451 | return self |
522 | 452 |
|
|
0 commit comments