-
Notifications
You must be signed in to change notification settings - Fork 0
/
ebook.go
239 lines (190 loc) · 6.8 KB
/
ebook.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
package pgrdf
import (
"encoding/xml"
"io"
"regexp"
)
// Ebook reads an RDF from the input stream and maps it onto this object, which
// hides the complexities of the source RDF for easier interaction. This type
// can also be un/marshaled to JSON.
type Ebook struct {
// PG eText ID.
// `<pgterms:ebook rdf:about="...">`.
ID int `json:"id"`
// Full title for this work (main, sub, etc.).
// `<dcterms:title>`
Titles []string `json:"titles"`
// Alternate titles for this work.
// `<dcterms:alternative>`
AlternateTitles []string `json:"alternate_titles,omitempty"`
// // The table of contents for a book, with chapters separated by ` -- `.
// `<dcterms:tableOfContents>`
TableOfContents string `json:"toc"`
// Publisher of this work; always "Project Gutenberg".
// `<dcterms:publisher>`
Publisher string `json:"publisher"`
// Year this work was published in.
// `<pgterms:marc906>`
PublishedYear int `json:"published_year"`
// PG release/issued date in ISO 8601 format. Example: 2006-01-02.
// `<dcterms:issued>`
ReleaseDate string `json:"released"`
// A short summary of the work.
// `<pgterms:marc520>`
Summary string `json:"summary,omitempty"`
// The series this work originally belonged to.
// `<pgterms:marc440>`
Series []string `json:"series,omitempty"`
// Languages used in this book.
// `<dcterms:language>`
Languages []string `json:"languages,omitempty"`
// Language dialect (ISO 3166-2), _probably_ for the primary language only.
// `<pgterms:marc907>`
LanguageDialect string `json:"language_dialect,omitempty"`
// Notes about the language of the work, e.g. "Uses 19th century spelling."
// `<pgterms:marc546>`
LanguageNotes []string `json:"language_notes,omitempty"`
// Publication note of the source material: publisher, city, year, etc.
// `<pgterms:marc260>`
PublicationNote string `json:"publication_note,omitempty"`
// Edition note for this work, e.g. "2nd Edition", "A new edition with eleven new poems.", etc.
// `<pgterms:marc250>:`
EditionNote string `json:"edition_note,omitempty"`
// ProductionNotes for this ebook. This can also include "updated" dates, either as a
// separate entry or as part of the credit, e.g. "J. Smith\nUpdated: 2022-07-14".
// `<pgterms:marc508>`
ProductionNotes []string `json:"production_notes,omitempty"`
// Rights for this work. Most are "Public domain in the USA."
// `<dcterms:rights>`
Copyright string `json:"copyright"`
// Distributed Proofreaders clearance code, e.g. "20050213050736stahl".
// `<pgterms:marc905>`
CopyrightClearanceCode string `json:"pg_dp_clearance"`
// Type of this work, one of:
// Collection, Dataset, Image, MovingImage, Sound, StillImage, Text
// `<dcterms:type>`
BookType BookType `json:"type"`
// Additional notes about this eText.
// `<dcterms:description>`
Notes []string `json:"note"`
// A description of the physical attributes of the source of this work, e.g. "5 pages : illustrations, map, portraits".
// `<pgterms:marc300>`
PhysicalDescriptionNote string `json:"physical_description_note"`
// URLs to information about the source of this work, e.g. image scans on Internet Archive website.
// `<pgterms:marc904>`
SourceLinks []string `json:"source_link"`
// Library of Congress Control Number
// `<pgterms:marc010>`
LCCN string `json:"lccn"`
// ISBN of this work - possibly of the work used for the OCR.
// `<pgterms:marc020>`
ISBN string `json:"isbn"`
// Book covers, or images acting as a book cover, i.e. this could be a title page.
// A URL or file path in the HTML ebook directory.
// `<pgterms:marc901>`
BookCovers []string `json:"book_covers"`
// URL of an image file representing the title page of a book.
// `<pgterms:marc902>`
TitlePageImage string `json:"title_page_image"`
// URL of an image file representing the back cover of a book.
// `<pgterms:marc903>`
BackCover string `json:"back_cover"`
// List of creators involved in the creation of this work, such as the
// authors, editors, and illustrators.
// `<dcterms:creator>`, `<marcrel:*>`, `<pgterms:agent>`
Creators []Creator `json:"creators"`
// List of subjects for this work.
// `<dcterms:subject>`
Subjects []Subject `json:"subjects"`
// List of files associated with this work (txt, zip, images, etc.).
// `<dcterms:hasFormat>`, `<pgterms:file>`
Files []File `json:"files"`
// List of PG bookshelves this work is available in.
// `<pgterms:bookshelf>`
Bookshelves []Bookshelf `json:"bookshelves"`
// Download count - only from the previous 30 days at the time the RDF was generated.
// <pgterms:downloads>
Downloads int `json:"downloads"`
// List of author links (typically to Wikipedia).
// `<rdf:Description>`
AuthorLinks []AuthorLink `json:"author_links"`
// A Creative Commons comment, usually just info on where to find the RDF files.
// `<cc:Work><rdfs:comment>`
CCComment string `json:"cc_comment"`
// A Creative Commons license URL.
// `<cc:Work><cc:license>`
CCLicense string `json:"cc_license"`
}
// ReadRDF document from the given `io.Reader` and unmarshal to an Ebook.
func ReadRDF(r io.Reader) (*Ebook, error) {
doc, err := rdfUnmarshal(r)
if err != nil {
return nil, err
}
return doc, nil
}
// WriteRDF marshals the Ebook to an RDF document and writes it to the provided `io.Writer`.
func (e *Ebook) WriteRDF(w io.Writer) error {
rdf := rdfMarshal(e)
data, err := xml.MarshalIndent(rdf, "", " ")
if err != nil {
return err
}
// The xml package does not currently emit self-closing tags, e.g. `<tag />`.
r := regexp.MustCompile(`></[^>]+?>`)
data = r.ReplaceAll(data, []byte("/>"))
// prepend the xml declaration
data = append([]byte(xml.Header), data...)
if _, err := w.Write(data); err != nil {
return err
}
return nil
}
func (e *Ebook) AddSubject(heading, schema string) {
sub := Subject{
Heading: heading,
Schema: schema,
}
e.Subjects = append(e.Subjects, sub)
}
func (e *Ebook) AddBookshelf(name, resource string) {
shelf := Bookshelf{
Resource: resource,
Name: name,
}
e.Bookshelves = append(e.Bookshelves, shelf)
}
func (e *Ebook) AddAuthorLink(description, url string) {
wiki := AuthorLink{
Description: description,
URL: url,
}
e.AuthorLinks = append(e.AuthorLinks, wiki)
}
// AddCreator appends an Agent to the creators list with the given role.
func (e *Ebook) AddCreator(creator Creator) {
e.Creators = append(e.Creators, creator)
}
func (e *Ebook) AddBookFile(file File) {
e.Files = append(e.Files, file)
}
func (e *Ebook) SetBookType(value string) {
switch value {
case "collection":
e.BookType = BookTypeCollection
case "Dataset":
e.BookType = BookTypeDataset
case "Image":
e.BookType = BookTypeImage
case "MovingImage":
e.BookType = BookTypeMovingImage
case "Sound":
e.BookType = BookTypeSound
case "StillImage":
e.BookType = BookTypeStillImage
case "Text":
e.BookType = BookTypeText
default:
e.BookType = BookTypeUnknown
}
}