-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
735 lines (614 loc) · 26.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
import imaplib
import config
import email, email.message, email.header, email.utils
import email.mime.multipart, email.mime.text
import re
import feedparser
import urllib.parse
import time, datetime
import sys
import html2text
import unicodedata
import logging
logging.basicConfig(
filename=config.logfile,
format='%(levelname)s:%(asctime)s %(message)s',
level=logging.DEBUG)
from xml.etree import ElementTree
import imap_utf7
from io import BytesIO
from email.generator import BytesGenerator
def imapify(string):
""" Return a version of the given string which
can be used as a mailbox name by an IMAP server. """
result = imap_utf7.encode(string).decode()
result = result.replace(".", "-")
result = result.replace('"', "'")
result = result.replace("/", "|")
return result
"""
result = unicodedata.normalize('NFKD', string)
result = result.encode('ASCII', 'ignore')
result = result.decode()
"""
class YFeed(object):
""" This is a yarss2imap RSS feed mapped to an IMAP mailbox. """
def __init__(self, url=None):
# URL of the feed
self.url = url
# Parsed feed
self.feed = None
if url is not None:
self.feed = feedparser.parse(url)
# Title of the feed
self._title = None
# Safe title to be used as the name of a mailbox
self._safeTitle = None
# Path of the mailbox where this feed is represented
self._mailbox = None
def title(self, title=None):
""" Returns the title of the feed. """
if title is not None:
self._title = title
if self._title is not None:
return self._title
self._title = 'No title'
try:
self._title = self.feed.feed.title
except AttributeError:
pass
return self._title
def safeTitle(self):
""" Returns a version of the feed title that can safely be used
as the name of an IMAP mailbox. """
if self._safeTitle is not None:
return self._safeTitle
self._safeTitle = imapify(self.title())
return self._safeTitle
def mailbox(self,
agent=None,
targetMailbox='INBOX.' + config.mailbox):
""" Returns the mailbox associated with this feed. """
if self._mailbox is not None:
return self._mailbox
if agent is None:
return None
# Create one
self._mailbox = targetMailbox
logging.info("Creating mailbox: " + self._mailbox)
status, message = agent.imap.create(self._mailbox)
if status != 'OK':
# it probably already exists
logging.info("Could not create mailbox: " + self._mailbox)
logging.info(" error message was: " + str(message))
status, message = agent.imap.subscribe(self._mailbox)
if status != 'OK':
logging.error("Could not subscribe to mailbox: " + self._mailbox)
logging.error(" error message was: " + str(message))
return self._mailbox
def createMessage(self, entry=None):
""" Creates a message representing a given feed entry. """
logging.info("Creating message about: " + entry.title)
msg = email.mime.multipart.MIMEMultipart('alternative')
msg.set_charset(self.feed.encoding)
author = self.title()
try:
author = entry.author + " @ " + author
except AttributeError:
pass
msg['From'] = author
msg['Subject'] = entry.title
msg['To'] = config.username
date = time.time()
if hasattr(entry, 'updated_parsed') \
and entry.updated_parsed is not None:
date = entry.updated_parsed
elif hasattr(entry, 'published_parsed') \
and entry.published_parsed is not None:
date = entry.published_parsed
else:
logging.warning('Entry without a date: ' + entry.title)
if date is not None:
msg['Date'] = email.utils.format_datetime(
datetime.datetime.fromtimestamp(
time.mktime(date)))
headerName = 'X-Entry-Link'
msg[headerName] = email.header.Header(s=entry.link,
charset=self.feed.encoding)
try:
content = entry.content[0]['value']
except AttributeError:
try:
content = entry.summary
except AttributeError:
content = entry.description
html = content
text = html2text.html2text(html)
text = 'Retrieved from ' + entry.link + '\n' + text
html = html + \
'<p><a href="' + \
entry.link + \
'">Retrieved from ' + \
entry.link + \
'</a></p>'
part1 = email.mime.text.MIMEText(text, 'plain')
part2 = email.mime.text.MIMEText(html, 'html')
msg.attach(part1)
msg.attach(part2)
bytesIO = BytesIO()
bytesGenerator = BytesGenerator(bytesIO,
mangle_from_=True,
maxheaderlen=60)
bytesGenerator.flatten(msg)
text = bytesIO.getvalue()
return text
def updateEntries(self, agent=None):
""" Guarantees that there is one message in the given mailbox
for each entry in the feed. """
if agent is None:
return
mailbox = self.mailbox(agent=agent)
# Create one message per feed item
nbOfEntries = str(len(self.feed.entries))
logging.info("Examining " + nbOfEntries + " feed entries.")
agent.select(mailbox=mailbox)
for entry in self.feed.entries:
if hasattr(entry, 'link') is False or \
entry.link is None or \
entry.link == '':
logging.error('Could not update entry titled: ' + entry.title)
continue
# Is there already a message for this entry ?
headerName = 'X-Entry-Link'
try:
agent.imap.literal = entry.link.encode(self.feed.encoding)
# ^-- this is an undocumented imaplib feature
status, data = agent.imap.uid(
'search',
'CHARSET',
self.feed.encoding,
'UNDELETED HEADER ' + headerName)
except:
logging.error('Could not search for entry titled: ' + \
entry.title)
status = 'KO'
data = [None]
if status == 'OK' and data[0] not in [None, b'']:
# There is already one, move on !
continue
elif status != 'OK':
logging.error('Could not search for entry URL: ' + entry.link)
msg = self.createMessage(entry=entry)
status, error = agent.imap.append(
mailbox,
'',
imaplib.Time2Internaldate(time.time()),
msg)
if status != 'OK':
logging.error('Could not append message: ' + error)
def createMailbox(self,
agent=None,
parentMailbox='INBOX.' + config.mailbox):
""" Creates a mailbox with a given name and a command
message for a feed at the given URL.
If no name but a URL is given, the name is the title of
feed at this URL.
If no URL but a name is given, the mailbox is created
without any command message."""
logging.info("Creating a feed mailbox named '" + self.title() \
+ "' from this URL: " + str(self.url))
if self.url is None and self.title() is None:
logging.error('Could not create mailbox without a feed nor a name.')
if agent is None:
logging.error('Could not create mailbox without '
'an IMAP agent: ' + str(self.url))
path = '"' + parentMailbox.strip('"') + '.' + self.safeTitle() + '"'
agent.select(mailbox=path)
if self.url is None:
return path
msg = email.mime.text.MIMEText("", "plain")
msg['Subject'] = "feed " + str(self.url)
msg['From'] = config.authorizedSender
msg['To'] = config.authorizedSender
status, error = agent.imap.append(
path,
'',
imaplib.Time2Internaldate(time.time()),
msg.as_bytes())
if status != 'OK':
logging.error('Could not append message: ' + str(error))
else:
logging.info('Created feed message in mailbox: ' + path)
return path
class YCommandMessage(object):
""" A yarss2imap command represented as a message. """
def __init__(self, message=None, mailbox=None, messageUID=None, agent=None):
self.message = message
self.mailbox = mailbox
if mailbox is not None:
if self.mailbox[0] != '"':
self.mailbox = '"' + self.mailbox + '"'
self.messageUID = messageUID
self.agent = agent
def remove(self):
""" Deletes the command message. """
if self.mailbox is None or self.messageUID is None:
return 'OK'
# Remove import command messages
self.agent.select(self.mailbox)
status, msg = self.agent.imap.uid(
'store',
self.messageUID,
'+FLAGS',
'\\Deleted')
if status != 'OK':
logging.error("Could not delete message with UID: " + \
self.messageUID + \
" in mailbox: " + \
self.mailbox)
logging.error("Error message was: " + msg)
return status
class YImportCommandMessage(YCommandMessage):
""" An OPML document is to be imported as a hierarchy of
mailboxes including feed commands. Is represented as a
message with 'importOPML' as its subject and having an OPML
document attached or as the main body.
When executed, a hierarchy of mailboxes will
be created according to the hierarchy of outlines described
in the OPML files. outlines with an xmlUrl attribute will
be created as mailboxes with a "feed <xmlUrl>"-titled
message within.
"""
def __init__(self, message=None, mailbox=None, messageUID=None, agent=None):
YCommandMessage.__init__(self,
message=message,
mailbox=mailbox,
messageUID=messageUID,
agent=agent)
opmlMimeTypes = ['text/xml', 'text/x-opml+xml']
self.opml = None
if message is None:
return
if message.get_content_maintype() == 'multipart':
parts = message.get_payload()
for part in parts:
if part.get_content_type() in opmlMimeTypes:
self.opml = part.get_payload(decode=True)
elif message.get_content_type() in opmlMimeTypes:
self.opml = message.get_payload(decode=True)
def execute(self, underMailbox='INBOX' + config.mailbox):
""" Execute the importOPML command using given agent:
- create the hierarchy of outlines as mailboxes
with feeds,
- remove the message. """
if self.agent is None:
logging.error("Could not execute without any agent.")
return
if self.opml is None:
logging.error("Could not import OPML when OPML is None.")
return
# Create mailboxes for OPML content
logging.info("Importing 1 OPML file.")
root = ElementTree.fromstring(self.opml)
def createMailboxes(root, rootMailbox):
""" Creates a mailbox under the given rootMailbox
for each child of the given root outline. """
for child in root.getchildren():
childMailbox = rootMailbox
if child.tag == 'outline':
url = child.get('xmlUrl')
title = child.get('title')
feed = YFeed(url)
if title is not None:
feed.title(title)
childMailbox = feed.createMailbox(agent=self.agent,
parentMailbox=rootMailbox)
createMailboxes(child, childMailbox)
createMailboxes(root, underMailbox)
return self.remove()
class YFeedCommandMessage(YCommandMessage):
""" A feed is to be updated. This command is represented
as a message with "feed <feedURL>" as its subject line :
feed http://...
"""
def __init__(self, message=None, mailbox=None, messageUID=None, agent=None):
YCommandMessage.__init__(self,
message=message,
mailbox=mailbox,
messageUID=messageUID,
agent=agent)
subject = message['Subject']
matches = re.search(r'feed\s+(http.*)', subject)
if matches is None:
self.feedURL = None
else:
self.feedURL = matches.groups()[0]
def execute(self, underMailbox='INBOX' + config.mailbox):
""" Executes the feed command using agent :
- move the feed message to a dedicated feed mailbox
if needed
- update this mailbox according to feed entries. """
logging.info("Updating feed from URL: " + self.feedURL)
# Create a mailbox for that feed
feed = YFeed(self.feedURL)
logging.info("This feed has this title: " + feed.title())
# If needed, move that feed message to the feed mailbox
if self.mailbox in ['INBOX', '"INBOX"']:
# This feed needs its own mailbox.
newMailbox = '"' + \
underMailbox.strip('"') + \
'.' + \
feed.safeTitle() + \
'"'
feedMailbox = feed.mailbox(agent=self.agent,
targetMailbox=newMailbox)
else:
# This feed will be in same mailbox as its
# command message.
feedMailbox = feed.mailbox(agent=self.agent,
targetMailbox=self.mailbox)
if self.mailbox != feedMailbox:
# The feed command message must go into
# the feed mailbox.
self.agent.moveUID(self.messageUID,
fromMailbox=self.mailbox,
toMailbox=feedMailbox)
self.agent.select(mailbox=feedMailbox)
self.mailbox = feedMailbox
# Now update entries in that mailbox
feed.updateEntries(agent=self.agent)
return 'OK'
class YAgent(object): #pylint: disable-msg=R0904
""" An IMAP4 agent that can manage RSS feeds as mailboxes. """
def __init__(self):
logging.info("-----------------------------------------------")
logging.info("Initializing new agent.")
self.imap = None
try:
self.imap = imaplib.IMAP4_SSL(config.servername, config.port)
except:
self.imap = imaplib.IMAP4(config.servername, config.port)
logging.info("New agent initialized.")
def login(self):
""" Logs in using credentials given in config file. """
logging.info("Logging in.")
status, message = self.imap.login(config.username,
config.password)
return status
def select(self, mailbox='INBOX.' + config.mailbox):
""" Selects given mailbox or mailbox given in config gile. """
logging.info("Selecting mailbox: " + mailbox)
mbox = mailbox
if mbox[0] != '"':
mbox = '"' + mbox + '"'
status, message = self.imap.select(mbox)
if status == 'NO': # there's no such mailbox, let's create one
self.imap.select()
status, message = self.imap.create(mbox)
if status != "OK":
logging.error("Could not create mailbox: " + str(mbox))
self.imap.subscribe(mbox)
status, message = self.imap.select(mbox)
if status != "OK":
logging.error("Could not select mailbox: " + str(mbox))
if mbox in ['"INBOX.testyarss2imap"', '"INBOX.' + config.mailbox + '"']:
# The default yarss2imap mailbox was just created
# Let's populate it with a README message.
logging.info("Creating README message")
msg = email.mime.multipart.MIMEMultipart('alternative')
msg.set_charset("utf-8")
msg['From'] = "sig@akasig.org"
msg['Subject'] = "Welcome to yarss2imap. README please."
msg['To'] = config.username
msg['Date'] = email.utils.format_datetime(
datetime.datetime.fromtimestamp(
time.time()))
f = open('README.md','r')
content = f.read()
f.close()
part = email.mime.text.MIMEText(content, 'plain')
msg.attach(part)
bytesIO = BytesIO()
bytesGenerator = BytesGenerator(bytesIO,
mangle_from_=True,
maxheaderlen=60)
bytesGenerator.flatten(msg)
text = bytesIO.getvalue()
status, error = self.imap.append(
mbox,
'',
imaplib.Time2Internaldate(time.time()),
text)
if status != 'OK':
logging.error('Could not append README message: ' + error)
self.imap.select(mbox)
return status
def close(self):
logging.info("Closing connexion.")
status, message = self.imap.close()
return status
def logout(self):
logging.info("Logging out.")
status, message = self.imap.logout()
return status
def purge(self, mailbox=None):
""" Deletes given mailbox and its content. """
if mailbox is None:
return None
logging.info("Erasing mailbox: " + mailbox)
lines = self.imap.list(mailbox)[1]
self.select(mailbox='INBOX')
for line in lines:
if line is None:
continue
line = line.decode()
path = re.search(r'\(.*\) ".*" "(.*)"', line).groups()[0]
if path[0] != '"':
path = '"' + path + '"'
if path in [mailbox, '"' + mailbox + '"']:
continue # we'll remove it at the end
status, message = self.imap.unsubscribe(path)
if status != 'OK':
logging.error("Could not unsubscribe from: " + path)
status, message = self.imap.delete(path)
if status != 'OK':
logging.error("Could not delete path: " + path)
self.imap.unsubscribe(mailbox)
return self.imap.delete(mailbox)[0]
def moveUID(self, uid, fromMailbox='INBOX', toMailbox='INBOX'):
""" Moves message given by UID from one mailbox to another. """
fromMb = fromMailbox
if fromMb[0] != '"':
fromMb = '"' + fromMb + '"' # make it safe
toMb = toMailbox
if toMb[0] != '"':
toMb = '"' + toMb + '"' # make it safe
if fromMb == toMb:
return
logging.info("Moving message from " + fromMb + " to " + toMb)
status = self.select(fromMb)
if status != 'OK':
logging.error("Could not select mailbox: " + fromMb)
status, msg = self.imap.uid('copy', uid, toMb)
if status != 'OK':
logging.error("Could not copy a message to mailbox: " + toMb)
logging.error(" error message was: " + str(msg))
status, msg = self.imap.uid('store', uid, '+FLAGS', '\\Deleted')
if status != 'OK':
logging.error("Could not delete message with UID: " + uid)
logging.error(" error message was: " + str(msg))
def listMailboxes(self, mailbox='INBOX' + config.mailbox, pattern='*'):
""" Lists mailbox paths under given mailbox and with names matching
given pattern. """
mailboxNames = []
mailboxes = self.imap.list(mailbox, pattern=pattern)[1]
for mailboxFound in mailboxes:
if mailboxFound is not None:
mailboxName = re.search(r'\(.*\) ".*" "(.*)"',
mailboxFound.decode()).groups()[0]
mailboxNames.append(mailboxName)
return mailboxNames
def listCommands(self, mailbox='INBOX' + config.mailbox):
""" Returns a list of command messages found in the given mailbox. """
logging.info("Looking for command messages in: " + mailbox)
self.select(mailbox)
self.imap.recent()
commandMessages = []
commandPattern = {
'feed': 'HEADER Subject "feed "',
'importOPML': 'HEADER Subject "importOPML"'
}
for command, pattern in commandPattern.items():
status, data = self.imap.uid('search', None, 'UNDELETED ' + pattern)
messageUIDs = data[0].decode().split()
logging.info("Found " + \
str(len(messageUIDs)) + \
" command messages of type '" + \
command + \
"' in mailbox: " + \
mailbox)
for uid in messageUIDs:
msgBin = self.imap.uid('fetch', uid, '(RFC822)')[1][0][1]
msg = email.message_from_bytes(msgBin)
if command == "feed":
commandMessage = YFeedCommandMessage(message=msg,
mailbox=mailbox,
messageUID=uid,
agent=self)
if commandMessage.feedURL is None:
# invalid command : no URL
commandMessage = None
elif command == "importOPML":
commandMessage = YImportCommandMessage(message=msg,
mailbox=mailbox,
messageUID=uid,
agent=self)
if commandMessage.opml is None:
# invalid command : no OPML
commandMessage = None
if commandMessage is not None:
commandMessages.append(commandMessage)
return commandMessages
def update(self, mailbox='INBOX.' + config.mailbox):
""" Looks for command messages in the INBOX and under the given
mailbox. Then executes these commands. Commands are given
in the subject line of messages. Arguments can be given in the
subject line or can be attachments.
"""
logging.info("Updating mailbox: " + mailbox)
# Did we receive any new command message in the INBOX
# or in the given mailbox ?
listedMailboxes = ['INBOX', mailbox]
# Or are there older command messages already stored under
# their own folders ?
listedMailboxes += self.listMailboxes(mailbox)
# Search for such messages and their command line in those mailboxes
commands = []
for listedMailbox in listedMailboxes:
commands += self.listCommands(listedMailbox)
logging.info("Found " + \
str(len(commands)) + \
" command messages under mailbox: " + \
mailbox)
# Remove duplicate command messages
opmls = {}
feedCommandsByURL = {}
uniqueCommands = commands.copy()
for command in commands:
if isinstance(command, YImportCommandMessage):
if command.opml in opmls.keys():
# We already know this OPML.
# Let's remove that command.
command.remove()
uniqueCommands.remove(command)
else:
opmls[command.opml] = True
elif isinstance(command, YFeedCommandMessage):
if command.feedURL in feedCommandsByURL.keys():
# We already have this feed in another command
otherCommand = feedCommandsByURL[command.feedURL]
if len(command.mailbox) > len(otherCommand.mailbox):
# This command path is longer than the longest so far
# for that URL.
# It's the only one we want to keep.
feedCommandsByURL[command.feedURL] = command
otherCommand.remove()
uniqueCommands.remove(otherCommand)
else:
# This command is redundant. Let's remove it.
command.remove()
uniqueCommands.remove(command)
else:
# This is the first feed command message for this URL
feedCommandsByURL[command.feedURL] = command
logging.info("Found " + \
str(len(uniqueCommands)) + \
" unique commands under mailbox: " + \
mailbox)
for command in uniqueCommands:
result = command.execute(underMailbox=mailbox)
if result is None:
logging.error('Could not execute command: ' + str(command))
break
return result
def loop(self):
""" Main loop. """
logging.info("Agent starting loop.")
try:
while True:
self.update()
logging.info("Sleeping for 60 seconds.")
time.sleep(60)
except:
logging.warning("Unexpected error:" + str(sys.exc_info()[0]))
self.close()
self.logout()
raise
logging.info("Agent stopping loop.")
if __name__ == "__main__":
AGENT = YAgent()
AGENT.login()
AGENT.select()
AGENT.loop()
AGENT.close()
AGENT.logout()