Skip to content

Commit be85097

Browse files
authored
Merge pull request #5271 from cdrini/feature/gutenberg-reads
Add basic Gutenberg & LibriVox & Standard Ebooks support
2 parents 9bf0e12 + 5fb0736 commit be85097

24 files changed

+535
-119
lines changed

openlibrary/book_providers.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
from typing import List, Optional, Union, Literal, cast
2+
3+
import web
4+
from web import uniq
5+
6+
from openlibrary.app import render_template
7+
from openlibrary.plugins.upstream.models import Edition
8+
from openlibrary.plugins.upstream.utils import get_coverstore_public_url
9+
10+
11+
class AbstractBookProvider:
12+
short_name: str
13+
14+
"""
15+
The key in the identifiers field on editions;
16+
see https://openlibrary.org/config/edition
17+
"""
18+
identifier_key: str
19+
20+
def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> List[str]:
21+
return (
22+
# If it's an edition
23+
ed_or_solr.get('identifiers', {}).get(self.identifier_key, []) or
24+
# if it's a solr work record
25+
ed_or_solr.get(f'id_{self.identifier_key}', [])
26+
)
27+
28+
def choose_best_identifier(self, identifiers: List[str]) -> str:
29+
return identifiers[0]
30+
31+
def get_best_identifier(self, ed_or_solr: Union[Edition, dict]) -> str:
32+
identifiers = self.get_identifiers(ed_or_solr)
33+
assert identifiers
34+
return self.choose_best_identifier(identifiers)
35+
36+
def get_best_identifier_slug(self, ed_or_solr: Union[Edition, dict]) -> str:
37+
"""Used in eg /work/OL1W?edition=ia:foobar URLs, for example"""
38+
return f'{self.short_name}:{self.get_best_identifier(ed_or_solr)}'
39+
40+
def get_template_path(self, typ: Literal['read_button', 'download_options']) -> str:
41+
return f"book_providers/{self.short_name}_{typ}.html"
42+
43+
def render_read_button(self, ed_or_solr: Union[Edition, dict]):
44+
return render_template(
45+
self.get_template_path('read_button'),
46+
self.get_best_identifier(ed_or_solr)
47+
)
48+
49+
def render_download_options(self, edition: Edition, extra_args: List = None):
50+
return render_template(
51+
self.get_template_path('download_options'),
52+
self.get_best_identifier(edition),
53+
*(extra_args or [])
54+
)
55+
56+
def get_cover_url(self, ed_or_solr: Union[Edition, dict]) -> Optional[str]:
57+
"""
58+
Get the cover url most appropriate for this copy when made available by this
59+
provider
60+
"""
61+
size = 'M'
62+
63+
# Editions
64+
if isinstance(ed_or_solr, Edition):
65+
return ed_or_solr.get_cover().url(size)
66+
67+
# Solr document augmented with availability
68+
availability = ed_or_solr.get('availability', {})
69+
70+
if availability.get('openlibrary_edition'):
71+
olid = availability.get('openlibrary_edition')
72+
return f"{get_coverstore_public_url()}/b/olid/{olid}-{size}.jpg"
73+
if availability.get('identifier'):
74+
ocaid = ed_or_solr['availability']['identifier']
75+
return f"//archive.org/services/img/{ocaid}"
76+
77+
# Plain solr - we don't know which edition is which here, so this is most
78+
# preferable
79+
if ed_or_solr.get('cover_i'):
80+
cover_i = ed_or_solr["cover_i"]
81+
return f'{get_coverstore_public_url()}/b/id/{cover_i}-{size}.jpg'
82+
if ed_or_solr.get('cover_edition_key'):
83+
olid = ed_or_solr['cover_edition_key']
84+
return f"{get_coverstore_public_url()}/b/olid/{olid}-{size}.jpg"
85+
if ed_or_solr.get('ocaid'):
86+
return f"//archive.org/services/img/{ed_or_solr.get('ocaid')}"
87+
88+
# No luck
89+
return None
90+
91+
def is_own_ocaid(self, ocaid: str) -> bool:
92+
"""Whether the ocaid is an archive of content from this provider"""
93+
return False
94+
95+
96+
class InternetArchiveProvider(AbstractBookProvider):
97+
short_name = 'ia'
98+
identifier_key = 'ocaid'
99+
100+
def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> List[str]:
101+
# Solr work record augmented with availability
102+
if ed_or_solr.get('availability', {}).get('identifier'):
103+
return [ed_or_solr['availability']['identifier']]
104+
105+
# Edition
106+
if ed_or_solr.get('ocaid'):
107+
return [ed_or_solr['ocaid']]
108+
109+
# Solr work record
110+
return ed_or_solr.get('ia', [])
111+
112+
def is_own_ocaid(self, ocaid: str) -> bool:
113+
return True
114+
115+
def render_download_options(self, edition: Edition, extra_args: List = None):
116+
if edition.is_access_restricted() or not edition.ia_metadata:
117+
return ''
118+
119+
formats = {
120+
'pdf': edition.get_ia_download_link('.pdf'),
121+
'epub': edition.get_ia_download_link('.epub'),
122+
'mobi': edition.get_ia_download_link('.mobi'),
123+
'txt': edition.get_ia_download_link('_djvu.txt'),
124+
}
125+
126+
if any(formats.values()):
127+
return render_template(
128+
self.get_template_path('download_options'),
129+
formats,
130+
edition.url('/daisy'))
131+
else:
132+
return ''
133+
134+
135+
class LibriVoxProvider(AbstractBookProvider):
136+
short_name = 'librivox'
137+
identifier_key = 'librivox'
138+
139+
def render_download_options(self, edition: Edition, extra_args: List = None):
140+
# The template also needs the ocaid, since some of the files are hosted on IA
141+
return super().render_download_options(edition, [edition.get('ocaid')])
142+
143+
def is_own_ocaid(self, ocaid: str) -> bool:
144+
return 'librivox' in ocaid
145+
146+
147+
class ProjectGutenbergProvider(AbstractBookProvider):
148+
short_name = 'gutenberg'
149+
identifier_key = 'project_gutenberg'
150+
151+
def is_own_ocaid(self, ocaid: str) -> bool:
152+
return ocaid.endswith('gut')
153+
154+
155+
class StandardEbooksProvider(AbstractBookProvider):
156+
short_name = 'standard_ebooks'
157+
identifier_key = 'standard_ebooks'
158+
159+
def is_own_ocaid(self, ocaid: str) -> bool:
160+
# Standard ebooks isn't archived on IA
161+
return False
162+
163+
164+
PROVIDER_ORDER: List[AbstractBookProvider] = [
165+
# These providers act essentially as their own publishers, so link to the first when
166+
# we're on an edition page
167+
LibriVoxProvider(),
168+
ProjectGutenbergProvider(),
169+
StandardEbooksProvider(),
170+
# Then link to IA
171+
InternetArchiveProvider(),
172+
]
173+
174+
175+
def is_non_ia_ocaid(ocaid: str) -> bool:
176+
"""
177+
Check if the ocaid "looks like" it's from another provider
178+
"""
179+
providers = (
180+
provider
181+
for provider in PROVIDER_ORDER
182+
if provider.short_name != 'ia')
183+
return any(
184+
provider.is_own_ocaid(ocaid)
185+
for provider in providers)
186+
187+
188+
def get_book_provider_by_name(short_name: str) -> Optional[AbstractBookProvider]:
189+
return next(
190+
(p for p in PROVIDER_ORDER if p.short_name == short_name),
191+
None
192+
)
193+
194+
195+
def get_book_provider(
196+
ed_or_solr: Union[Edition, dict]
197+
) -> Optional[AbstractBookProvider]:
198+
199+
# On search results, we want to display IA copies first.
200+
# Issue is that an edition can be provided by multiple providers; we can easily
201+
# choose the correct copy when on an edition, but on a solr record, with all copies
202+
# of all editions aggregated, it's more difficult.
203+
# So we do some ugly ocaid sniffing to try to guess :/ Idea being that we ignore
204+
# OCAIDs that look like they're from other providers.
205+
prefer_ia = not isinstance(ed_or_solr, Edition)
206+
if prefer_ia:
207+
ia_provider = cast(InternetArchiveProvider, get_book_provider_by_name('ia'))
208+
ia_ocaids = [
209+
ocaid
210+
# Subjects/publisher pages have ia set to a specific value :/
211+
for ocaid in uniq(ia_provider.get_identifiers(ed_or_solr) or [])
212+
if not is_non_ia_ocaid(ocaid)
213+
]
214+
prefer_ia = bool(ia_ocaids)
215+
216+
default_order = PROVIDER_ORDER
217+
if prefer_ia:
218+
ia_provider = cast(InternetArchiveProvider, get_book_provider_by_name('ia'))
219+
default_order = uniq([ia_provider, *PROVIDER_ORDER])
220+
221+
provider_order = default_order
222+
provider_overrides = web.input(providerPref=None).providerPref
223+
224+
if provider_overrides:
225+
new_order: List[AbstractBookProvider] = []
226+
for name in provider_overrides.split(','):
227+
if name == '*':
228+
new_order += default_order
229+
else:
230+
provider = get_book_provider_by_name(name)
231+
if not provider:
232+
# TODO: Show the user a warning somehow
233+
continue
234+
new_order.append(provider)
235+
new_order = uniq(new_order + default_order)
236+
if new_order:
237+
provider_order = new_order
238+
239+
for provider in provider_order:
240+
if provider.get_identifiers(ed_or_solr):
241+
return provider
242+
243+
# No luck
244+
return None
245+
246+
247+
setattr(get_book_provider, 'ia', get_book_provider_by_name('ia'))

openlibrary/core/lending.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from openlibrary.core import cache
1717
from openlibrary.accounts.model import OpenLibraryAccount
1818
from openlibrary.plugins.upstream.utils import urlencode
19-
from openlibrary.utils import dateutil
19+
from openlibrary.utils import dateutil, uniq
2020

2121
from . import ia
2222
from . import helpers as h
@@ -371,6 +371,9 @@ def add_availability(items, mode="identifier"):
371371
:rtype: list of dict
372372
"""
373373
def get_ocaid(item):
374+
# Circular import otherwise
375+
from ..book_providers import is_non_ia_ocaid
376+
374377
possible_fields = [
375378
'ocaid', # In editions
376379
'identifier', # In ?? not editions/works/solr
@@ -393,9 +396,17 @@ def get_ocaid(item):
393396
possible_fields.remove('ia')
394397
possible_fields.append('ia')
395398

399+
ocaids = []
396400
for field in possible_fields:
397401
if item.get(field):
398-
return item[field][0] if isinstance(item[field], list) else item[field]
402+
ocaids += (
403+
item[field] if isinstance(item[field], list) else [item[field]]
404+
)
405+
ocaids = uniq(ocaids)
406+
return next(
407+
(ocaid for ocaid in ocaids if not is_non_ia_ocaid(ocaid)),
408+
None
409+
)
399410

400411
if mode == "identifier":
401412
ocaids = [ocaid for ocaid in map(get_ocaid, items) if ocaid]

openlibrary/macros/DownloadOptions.html

Lines changed: 0 additions & 25 deletions
This file was deleted.

openlibrary/macros/LoanStatus.html

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
$ waiting_loan = check_loan_status and ocaid and ctx.user and ctx.user.get_waiting_loan_for(ocaid)
2020
$ my_turn_to_borrow = waiting_loan and waiting_loan['status'] == 'available' and waiting_loan['position'] == 1
2121

22+
23+
$if not waiting_loan:
24+
$ book_provider = get_book_provider(doc)
25+
$else:
26+
$ book_provider = get_book_provider.ia
27+
2228
$# Replace existing, possibly inaccurate, availability with results ground truth availability API call:
2329
$if allow_expensive_availability_check and ocaid:
2430
$ availability.update(get_cached_groundtruth_availability(ocaid))
@@ -45,6 +51,9 @@
4551
});
4652
</script>
4753

54+
$elif book_provider and book_provider.short_name != 'ia':
55+
$:book_provider.render_read_button(doc)
56+
4857
$elif availability.get('is_readable'):
4958
$:macros.ReadButton(ocaid, listen=listen)
5059
$if secondary_action:
@@ -112,13 +121,13 @@
112121
data-ol-link-track="CTAClick|NotInLibrary">$_('Not in Library')</a>
113122
</div>
114123

115-
$if ocaid and secondary_action and availability.get('is_printdisabled'):
124+
$if ocaid and secondary_action and availability.get('is_printdisabled') and book_provider.short_name == 'ia':
116125
$:macros.BookPreview(ocaid, linkback=not no_index)
117126
$:macros.BookSearchInside(ocaid)
118127

119128
$:post
120129

121-
$if ocaid and daisy:
130+
$if ocaid and daisy and book_provider.short_name == 'ia':
122131
$:macros.daisy('%s/daisy' % doc.url, protected=availability.get('is_printdisabled'))
123132

124133
$if query_param('debug'):

openlibrary/macros/ReadButton.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
$ title = _("Read ebook from Internet Archive")
1717

1818
<div class="cta-button-group">
19-
<a href="$(stream_url)" title="$title" class="cta-btn cta-btn--available cta-btn--$(action)"
19+
<a href="$(stream_url)" title="$title" class="cta-btn cta-btn--ia cta-btn--available cta-btn--$(action)"
2020
$if loan:
2121
data-userid="$(loan['userid'])"
2222
$elif printdisabled:
@@ -30,7 +30,7 @@
3030
<a href="$(stream_url)&_autoReadAloud=show"
3131
title="$title using Read Aloud"
3232
data-ol-link-track="CTAClick|$(action.capitalize())Listen"
33-
class="cta-btn cta-btn--available">
33+
class="cta-btn cta-btn--available cta-btn--w-icon">
3434
<span class="btn-icon read-aloud"></span>
3535
<span class="btn-label">$_('Listen')</span>
3636
</a>

0 commit comments

Comments
 (0)