From 225f1bc233bc37f26ef10cfd3e88b0a5c5450d58 Mon Sep 17 00:00:00 2001 From: Mathieu Keller Date: Sat, 22 Apr 2023 08:32:24 +0200 Subject: [PATCH] feat: read epub file (#2) --- .github/workflows/build.yml | 3 +- .github/workflows/codeql-analysis.yml | 4 -- README.md | 21 ++++++++++ book.go | 58 ++++++++++++++++++++++++++ container.go | 10 +++++ opfReader.go | 17 -------- model.go => package.go | 0 parser_test.go | 14 +++---- test_epub_v3_0.epub | Bin 0 -> 1244 bytes test_opf.xml | 35 ---------------- 10 files changed, 97 insertions(+), 65 deletions(-) create mode 100644 README.md create mode 100644 book.go create mode 100644 container.go delete mode 100644 opfReader.go rename model.go => package.go (100%) create mode 100644 test_epub_v3_0.epub delete mode 100644 test_opf.xml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9516469..55d7483 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,8 +2,6 @@ name: Build on: push: - branches: - - 'main' jobs: test: @@ -24,6 +22,7 @@ jobs: run: go test -v ./... release: needs: [test] + if: github.ref == 'refs/heads/main' name: create release runs-on: ubuntu-latest steps: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d6012e6..6ccbe42 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -2,10 +2,6 @@ name: "CodeQL" on: push: - pull_request: - branches: - - 'dev' - - 'main' schedule: - cron: '42 15 * * 1' jobs: diff --git a/README.md b/README.md new file mode 100644 index 0000000..db40400 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# Epub parser + +This parser only supports epub version 3.0 so far. +This parser also only reads the MetaData. + +### How to use? +```go +zipReader, err := zip.OpenReader("./test_epub_v3_0.epub") +if err != nil { + t.Log(err.Error()) + t.Fail() +} +defer zipReader.Close() + +book, err := OpenBook(zipReader) +if err != nil { + t.Log(err.Error()) + t.Fail() +} +``` +After that, all metadata can be found in the book Object \ No newline at end of file diff --git a/book.go b/book.go new file mode 100644 index 0000000..e99f504 --- /dev/null +++ b/book.go @@ -0,0 +1,58 @@ +package epub + +import ( + "archive/zip" + "encoding/xml" + "fmt" + "io" + "path" +) + +type Book struct { + Opf Package + Container Container + ZipReader *zip.ReadCloser +} + +func OpenBook(reader *zip.ReadCloser) (*Book, error) { + book := &Book{ZipReader: reader} + err := book.readXML("META-INF/container.xml", &book.Container) + if err != nil { + return nil, err + } + err = book.readXML(book.Container.Rootfile.Path, &book.Opf) + if err != nil { + return nil, err + } + if book.Opf.Version != "3.0" { + return nil, fmt.Errorf("%s not supported yet!", book.Opf.Version) + } + return book, nil +} + +func (book *Book) Open(fileName string) (io.ReadCloser, error) { + return book.open(book.getFileFromRootPath(fileName)) +} + +func (book *Book) getFileFromRootPath(fileName string) string { + return path.Join(path.Dir(book.Container.Rootfile.Path), fileName) +} + +func (book *Book) readXML(fileName string, targetStruct interface{}) error { + reader, err := book.open(fileName) + if err != nil { + return err + } + defer reader.Close() + dec := xml.NewDecoder(reader) + return dec.Decode(targetStruct) +} + +func (book *Book) open(fileName string) (io.ReadCloser, error) { + for _, file := range book.ZipReader.File { + if file.Name == fileName { + return file.Open() + } + } + return nil, fmt.Errorf("file %s not exist", fileName) +} diff --git a/container.go b/container.go new file mode 100644 index 0000000..367b4d3 --- /dev/null +++ b/container.go @@ -0,0 +1,10 @@ +package epub + +type Container struct { + Rootfile Rootfile `xml:"rootfiles>rootfile"` +} + +type Rootfile struct { + Path string `xml:"full-path,attr"` + Type string `xml:"media-type,attr"` +} diff --git a/opfReader.go b/opfReader.go deleted file mode 100644 index 4c64732..0000000 --- a/opfReader.go +++ /dev/null @@ -1,17 +0,0 @@ -package epub - -import ( - "encoding/xml" - "io" -) - -func ReadOpf(in io.ReadCloser) (*Package, error) { - defer in.Close() - dec := xml.NewDecoder(in) - var opf Package - err := dec.Decode(&opf) - if err != nil { - return nil, err - } - return &opf, nil -} diff --git a/model.go b/package.go similarity index 100% rename from model.go rename to package.go diff --git a/parser_test.go b/parser_test.go index 3cb15a0..d2de538 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,26 +1,26 @@ package epub import ( - "io/ioutil" - "os" + "archive/zip" "strconv" "testing" ) func Test_parse_epub_3_0_opf(t *testing.T) { - b, err := os.Open("./test_opf.xml") + zipReader, err := zip.OpenReader("./test_epub_v3_0.epub") if err != nil { t.Log(err.Error()) t.Fail() } - defer b.Close() - reader := ioutil.NopCloser(b) - defer reader.Close() - opf, err := ReadOpf(reader) + defer zipReader.Close() + + book, err := OpenBook(zipReader) if err != nil { t.Log(err.Error()) t.Fail() } + opf := book.Opf + assertEquals(t, opf.Lang, "us") assertEquals(t, opf.UniqueIdentifier, "p1234") assertEquals(t, opf.Version, "3.0") diff --git a/test_epub_v3_0.epub b/test_epub_v3_0.epub new file mode 100644 index 0000000000000000000000000000000000000000..c4bf49bbf3f8d0c5fd91b4086b640ada1f9d6f86 GIT binary patch literal 1244 zcmWIWW@Zs#5My9q5OtgumY=vl!v;LRnOC`5=j3=!ep2)2x1;>FtBQ`3nw6va`=2Q5PSsz& zPnrD!+tL+ct5Y{BC}f=s+;TN(gYUP}kDF>WL|1=b!&>ypI#Fxa=CIsPe=2PBCazA? zESj6S=i;b%!;Y}(+blfPoy4xtto5z;@t8>uVt)WI7UofKW+6Kw>2SW zd+)@uIC;h8KU-77x_Rc#q!-t0PyX3<#{S0{8?jt=$Cj&K5B;;)_CDyH=f+PV_ueX< z{$=G|cKGYdK9)18e9sMc{IixycM)uBklUWlZoFr&bFS^Yt71zz0D003Ef3buaja^ zjs}O>RLgIlyye-Nr1W!Kmv$DFy*M1HT4B1UG^4CgMg4V~#J;Wj-8L+r8b`}Ula zzs$7n8~%Os@Ryt;WO(3;)YR8m7gDypPxO1>xZ|c@QCpucquXP-Z-UZB{cc8zpBSoS zzF*wYUVfzQXNTJFC3Y=VbDX4MV3x+1yOKa!0EnCz(?U0_o_3iThcNORE^@?b&Veiu^J{%Lc zETnyFxXpLH`F^$TpCcMW&K?Uqv-S5H+w2J^u2{cVxai^k`jgMR?i5NJ+lOC~&#+_O z^;WXWNM8BurJp;REQ^+IO#d}U)-*~a_nJ4~6?2A`wJ#2y4BV`D+3>TG{Q9K@?uL!L zGd^B$$9gF=TCcwFB9)`oy7c3jOoD}9w&|X4VaxG4XdRuUeX_!Dr_|lE zaUTLUD%3yJ4th5uDzDXUmUw_SBa;X-?!o}*D==8n2%=y`156jLf&ik2fniCb6_AOg fSb!OTD - - - test identifier - Test Book 01 - - - Test Books - us - Action - Fantasy - Last Name, First Name - 2020-03-04 - test type - Test Publisher - Test Rights - Test description - Test relation - Test coverage - Test contributor - - main - - - - - - - - - \ No newline at end of file