-
Notifications
You must be signed in to change notification settings - Fork 0
/
doi.py
60 lines (45 loc) · 1.72 KB
/
doi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#! /usr/bin/python
# -*- coding: utf-8 -*-
"""Codes specifically related to DOI inputs."""
import re
import html
import urllib.parse
import logging
import requests
import commons
import bibtex
# the regex is from:
# http://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page
doi_regex = re.compile(r'\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'])\S)+)\b')
class Response(commons.BaseResponse):
"""Create a DOI's response object."""
def __init__(self, doi_or_url, pure=False, date_format='%Y-%m-%d'):
"""Make the dictionary and run self.generate()."""
self.date_format = date_format
if pure:
self.doi = doi_or_url
else:
# unescape '&', '<', and '>' in doi_or_url
# decode percent encodings
decoded_url = urllib.parse.unquote(html.unescape(doi_or_url))
m = re.search(doi_regex, decoded_url)
if m:
self.doi = m.group(1)
self.url = 'http://dx.doi.org/' + self.doi
self.bibtex = get_bibtex(self.url)
if self.bibtex == 'Resource not found.':
logger.info('DOI could not be resolved.\n' + self.url)
self.error = 100
self.sfnt = 'DOI could not be resolved.'
self.ctnt = self.bibtex
else:
self.dictionary = bibtex.parse(self.bibtex)
if commons.config.lang == 'fa':
self.detect_language(self.dictionary['title'])
self.generate()
def get_bibtex(doi_url):
"""Get BibTex file content from a DOI URL. Return as string."""
r = requests.get(doi_url, headers={'Accept': 'application/x-bibtex'})
bibtex = r.text
return bibtex
logger = logging.getLogger(__name__)