Skip to content

Commit

Permalink
feat/pypdf: bump pypdf. (#16)
Browse files Browse the repository at this point in the history
* dep: upgrade PyPDF2 to pypdf==3.7.1

* refactor: update pypdf api usage.

* doc: update.

* chore: update sample output with pypdf.
  • Loading branch information
FFengIll authored Apr 10, 2023
1 parent e8829eb commit 6cdc320
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 27 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ Known limitation:

## Dependency

- PyPDF2: edit the pdf box
- pypdf: a pure python module for pdf (upgrade of PyPDF2)
- ~~PyPDF2: edit the pdf box~~
- ~~pdfminer & pdfminer3: scan elements~~
- pdfminer.six: scan pdf elements
- ~~PySide2: optional for GUI only~~
Expand Down
Binary file modified cases/output/input.pdf
Binary file not shown.
Binary file modified cases/output/output.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion pdf_page_split/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
- 输出所有的对应latex语句(template.tex)中可以修改

# 使用
- python3 -m pip install PyPDF2
- python3 -m pip install -r requirements.txt
- python3 splitor.py -t figure.tex -c figure.csv -o split/figure testcase.pdf
- `-t`选择模板,只有figure和table两类(也可以执行修改)
- `-c`选择数据文件
Expand Down
15 changes: 6 additions & 9 deletions pdf_page_split/splitor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
# coding=utf-8
import argparse
import PyPDF2 as pdflib
from PyPDF2 import PdfFileWriter, PdfFileReader

import configparser
import csv
import logging
import os
import sys
import logging


import configparser
import csv
from pypdf import PdfReader, PdfWriter

# from config import config

Expand Down Expand Up @@ -113,7 +110,7 @@ def split(path, template, config, output="split", no_pdf=False):
with open(path, "rb") as pdf, open(
"log.txt".format(output), "w", encoding="utf-8"
) as log:
reader = PdfFileReader(pdf)
reader = PdfReader(pdf)

for i, item in zip(range(reader.getNumPages()), config):
if item is None:
Expand All @@ -130,7 +127,7 @@ def split(path, template, config, output="split", no_pdf=False):
page = reader.getPage(i)

if not no_pdf:
writer = PdfFileWriter()
writer = PdfWriter()
writer.add_page(page)
with open("{}".format(path), "wb") as figure:
writer.write(figure)
Expand Down
4 changes: 2 additions & 2 deletions pdf_white_cut/cutter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

from PyPDF2 import PdfWriter, PdfReader
from path import Path
from pypdf import PdfReader, PdfWriter

from pdf_white_cut import analyzer
from pdf_white_cut.logger import logger
Expand Down Expand Up @@ -55,7 +55,7 @@ def edit_pdf(source: Path, target: Path, ignore=0):
try:
# MENTION: never move and change the sequence, since file IO.
# analyses the visible box of each page, aka the box scale. res=[(x1,y1,x2,y2)]
# analyses whole pdf at one time since it use `pdfminer` (not `PyPDF2`)
# analyses whole pdf at one time since it use `pdfminer` (not `pypdf`)
page_box_list = analyzer.extract_pdf_boxs(source, ignore=ignore)

# edit pdf by visible box and output it
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# required
PyPDF2==2.11.1
pypdf==3.7.1
pdfminer.six
loguru==0.6.0
path
Expand Down
14 changes: 7 additions & 7 deletions tests/test_PyPDF2.py → tests/test_pypdf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from PyPDF2 import PdfFileWriter, PdfFileReader
from pypdf import PdfReader, PdfWriter

output = PdfFileWriter()
input1 = PdfFileReader(open("cases/input/input.pdf", "rb"))
output = PdfWriter()
input1 = PdfReader(open("cases/input/input.pdf", "rb"))

# print how many pages input1 has:
print("document1.pdf has %d pages." % input1.getNumPages())
Expand All @@ -18,15 +18,15 @@

# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(3)
watermark = PdfFileReader(open("input.pdf", "rb"))
watermark = PdfReader(open("input.pdf", "rb"))
page4.mergePage(watermark.getPage(0))
output.add_page(page4)

# add page 5 from input1, but crop it to half size:
page5 = input1.getPage(4)
page5.mediaBox.upperRight = (
page5.mediaBox.getUpperRight_x() / 2,
page5.mediaBox.getUpperRight_y() / 2,
page5.mediabox.upperRight = (
page5.mediabox.getUpperRight_x() / 2,
page5.mediabox.getUpperRight_y() / 2,
)
output.add_page(page5)

Expand Down
11 changes: 5 additions & 6 deletions tests/test_white_cut.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys

import loguru
from PyPDF2 import PdfFileReader, PdfFileWriter
from pypdf import PdfReader, PdfWriter

sys.path.append(".")

Expand All @@ -28,13 +28,12 @@ def test_analyzer():


def test_rw_pdf():

pdf = PdfFileReader(open("cases/input/input.pdf", "rb"))
out = PdfFileWriter()
pdf = PdfReader(open("cases/input/input.pdf", "rb"))
out = PdfWriter()

for page in pdf.pages:
page.mediaBox.upper_right = (580, 800)
page.mediaBox.lower_left = (128, 232)
page.mediabox.upper_right = (580, 800)
page.mediabox.lower_left = (128, 232)
out.add_page(page)

ous = open("cases/output/output.pdf", "wb")
Expand Down

0 comments on commit 6cdc320

Please sign in to comment.