-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpga.py
132 lines (103 loc) · 4.17 KB
/
pga.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
import configparser
import logging
import traceback
from datetime import date
from dateutil.relativedelta import relativedelta
from analyzer import ImageAnalyzer
from downloader import Downloader
from gif_downloader import GifDownloader
from oai_api import LibraryCrawler
from twitter_api import TwitterPoster
from utils import (
db_connection, cleanup, initialize_logging, APIException, ConverterException
)
QUERY = {
'type': [
'gazeta', 'stary druk', 'fotografia', 'fotografie', 'album',
'dokument ikonograficzny', 'dokument ikonograficzny', 'inkunabuł',
'kalendarz', 'karta pocztowa', 'mapa', 'obraz', 'pocztówka', 'rękopis',
'rysunek', 'ulotka', 'druk ulotny',
]
}
initialize_logging()
logger = logging.getLogger()
class PANkreator(object):
"""
Main body of the PANkreator bot.
"""
dry_run = True
def __init__(self):
self.config = configparser.ConfigParser()
self.config.read('config/config.conf')
self.db = self.config['default']['database']
logger.info('Starting...')
def get_gif(self):
gif_downloader = GifDownloader(self.config, self.db)
return gif_downloader.check_new_posts()
def get_djvu(self, just_thumbnail=False):
record, content_id = LibraryCrawler(self.config, QUERY).run()
if record:
downloader = Downloader(content_id, self.config)
downloader.get_file()
downloader.unzip()
if just_thumbnail:
media_file_path = downloader.get_thumbnail()
else:
analyzer = ImageAnalyzer(self.config)
media_file_path = analyzer.run()
title = record.metadata['title'][0]
title = '%s %s%s' % (title[:110], self.config['default']['metadata_url'], content_id)
return media_file_path, title
return None, None
def choose_content(self):
"""
This can be either
- gif from pankreator.org site
or
- djvu image from the PAN library.
"""
with db_connection(self.db) as cursor:
cursor.execute('select * from pankreator_gifs order by id desc limit 1;')
last_record = cursor.fetchone()
# If gif wasn't added yesterday, add one.
yesterday = date.today() - relativedelta(days=+1)
if (not last_record) or (last_record[4] < yesterday):
media_file_path, result = self.get_gif()
if media_file_path and result:
query = 'insert into pankreator_gifs (title, url, gif_url, date_added)'\
'values (?, ?, ?, ?)'
cursor.execute(query, (result['title'], result['url'], result['gif_url'], date.today()))
return media_file_path, '%s %s' % (result['title'], result['url'])
media_file_path, title = self.get_djvu()
return media_file_path, title
def main(self, tries=0):
try:
media_file_path, title = self.choose_content()
if not media_file_path:
tries -= 1
if tries > 0:
logger.warning("Trying again...")
self.main(tries=tries)
# Try to get the thumbnail.
media_file_path, title = self.get_djvu(just_thumbnail=True)
logger.info("The winner is... %s, %s" % (media_file_path, title))
if not self.dry_run:
twitter_poster = TwitterPoster(self.config)
twitter_poster.put_media_to_timeline(
media_file_path,
title
)
cleanup(self.config)
except (Exception, APIException, ConverterException) as e:
# Catch any exception and try n times until you get a result.
tb = traceback.format_exc()
logger.error("Caught exception: %s \n %s" % (e, tb))
logger.warning("Trying again...")
cleanup(self.config)
tries -= 1
if tries > 0:
self.main(tries=tries)
if __name__ == '__main__':
pankreator = PANkreator()
pankreator.main(tries=3)