Skip to content

Commit 691995a

Browse files
committed
use codecov token
1 parent 19fcf9a commit 691995a

File tree

3 files changed

+18
-18
lines changed

3 files changed

+18
-18
lines changed

.github/workflows/run-pytest.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
strategy:
1313
matrix:
14-
python-version: ['3.8']
14+
python-version: ['3.10']
1515

1616
steps:
1717
- uses: actions/checkout@v3
@@ -44,3 +44,4 @@ jobs:
4444
uses: codecov/codecov-action@v5
4545
with:
4646
files: ./coverage.xml
47+
token: ${{ secrets.CODECOV_TOKEN }}

casparser/parsers/mupdf.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ def extract_blocks(page_dict):
5252
for block in grouped_blocks:
5353
lines = []
5454
items = []
55-
if len(block.get("lines", [])) == 0:
56-
continue
57-
bbox = block["lines"][0]["bbox"]
55+
bbox = [0, 0, 0, 0]
56+
if len(block.get("lines", [])) > 0:
57+
bbox = block["lines"][0]["bbox"]
5858
y0, y1 = bbox[1], bbox[3]
5959
for line in sorted(block["lines"], key=lambda x: x["bbox"][1]):
6060
if len(items) > 0 and not (
@@ -113,12 +113,10 @@ def parse_investor_info(page_dict, page_rect: fitz.Rect) -> InvestorInfo:
113113
name = None
114114
for block in blocks:
115115
for line in block["lines"]:
116-
for span in line["spans"]:
117-
if span["bbox"][0] > width / 3:
118-
continue
116+
for span in filter(
117+
lambda x: x["bbox"][0] <= width / 3 and x["text"].strip() != "", line["spans"]
118+
):
119119
txt = span["text"].strip()
120-
if txt == "":
121-
continue
122120
if not email_found:
123121
if m := re.search(r"^\s*email\s+id\s*:\s*(.+?)(?:\s|$)", txt, re.I):
124122
email = m.group(1).strip()
@@ -156,9 +154,9 @@ def group_similar_rows(elements_list: List[Iterator[Any]]):
156154
lines = []
157155
for elements in elements_list:
158156
sorted_elements = list(sorted(elements, key=itemgetter(1, 0)))
159-
if len(sorted_elements) == 0:
160-
continue
161-
y0, y1 = sorted_elements[0][1], sorted_elements[0][3]
157+
y0, y1 = 0, 0
158+
if len(sorted_elements) > 0:
159+
y0, y1 = sorted_elements[0][1], sorted_elements[0][3]
162160
items = []
163161
for el in sorted_elements:
164162
x2, y2, x3, y3 = el[:4]

casparser/parsers/pdfminer.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ def parse_investor_info(layout, width, height) -> InvestorInfo:
2222
[
2323
x
2424
for x in layout
25-
if isinstance(x, LTTextBoxHorizontal) and x.x1 < width / 1.5 and x.y1 > height / 2
25+
if isinstance(x, LTTextBoxHorizontal)
26+
and x.x1 < width / 1.5
27+
and x.y1 > height / 2
28+
and x.get_text().strip() != ""
2629
],
2730
key=lambda x: -x.y1,
2831
)
@@ -33,8 +36,6 @@ def parse_investor_info(layout, width, height) -> InvestorInfo:
3336
name = None
3437
for el in text_elements:
3538
txt = el.get_text().strip()
36-
if txt == "":
37-
continue
3839
if not email_found:
3940
if m := re.search(r"^\s*email\s+id\s*:\s*(.+?)(?:\s|$)", txt, re.I):
4041
email = m.group(1).strip()
@@ -88,9 +89,9 @@ def group_similar_rows(elements_list: List[Iterator[LTTextBoxHorizontal]]):
8889
lines = []
8990
for elements in elements_list:
9091
sorted_elements = list(sorted(elements, key=lambda x: (-x.y1, x.x0)))
91-
if len(sorted_elements) == 0:
92-
continue
93-
y0, y1 = sorted_elements[0].y0, sorted_elements[0].y1
92+
y0, y1 = 0, 0
93+
if len(sorted_elements) > 0:
94+
y0, y1 = sorted_elements[0].y0, sorted_elements[0].y1
9495
items = []
9596
for el in sorted_elements:
9697
if len(items) > 0 and not (

0 commit comments

Comments
 (0)