-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdf_page_counter.py
98 lines (86 loc) · 2.63 KB
/
pdf_page_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pathlib
import sys
from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError
import logging
import click
from rich.console import Console
from rich.table import Table
from rich.text import Text
logging.basicConfig(
filename="pdf_page_counter.log",
filemode="w",
format="%(levelname)s:%(message)s",
level=logging.DEBUG,
)
def count_pages(path: pathlib.Path) -> int:
try:
with open(path.resolve(), mode="rb") as f:
reader = PdfReader(f)
num_pages = len(reader.pages)
logging.info(f'"{path.name}":pages={num_pages}')
return num_pages
except PdfReadError as e:
logging.error(f'"{path.name}":{e}:could not be read as PDF')
return 0
except Exception as e:
print(
f"ERROR: Unhandled exception, check log file for details. Exiting...",
file=sys.stderr,
)
logging.critical(f'"{path.name}":{e}', exc_info=True)
sys.exit()
@click.command()
@click.argument(
"dir_path",
type=click.Path(exists=True, file_okay=False, path_type=pathlib.Path),
default=pathlib.Path(),
)
@click.option(
"-r",
"--recursive",
is_flag=True,
default=False,
help="recursive search: count all pdfs in DIR_PATH and all its subdirectories",
)
@click.option(
"-t",
"--table",
is_flag=True,
default=False,
help="print a table to terminal using rich library",
)
def count(dir_path, recursive, table):
"""
Count all the PDFs in the directory DIR_PATH.
"1, 2, 3 PDFs do I see, HA HA HA"
-- Count von Count, Seasame Street
"""
pdf_path_gen = getattr(dir_path, "rglob" if recursive else "glob")("*.pdf")
if not table:
total_pages = sum(count_pages(p) for p in pdf_path_gen)
print(total_pages)
else:
print()
table = Table(title="Count of PDF Pages", show_edge=True)
table.add_column(
Text.from_markup("PDF Path", justify="center"),
Text.from_markup("[u]Total Pages", justify="right", style="red"),
style="cyan",
justify="left",
)
table.add_column("Pages", str(0), justify="right", style="magenta")
total_pages = 0
for pdf_path in pdf_path_gen:
pages = count_pages(pdf_path)
table.add_row(str(pdf_path), str(pages))
total_pages += pages
table.show_footer = True
table.footer_style = "red"
table.columns[1].footer = f"[u]{total_pages}"
console = Console()
console.print(table)
print()
logging.info(f"TOTAL_PAGE_COUNT={total_pages}")
if __name__ == "__main__":
count()