-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathserver.py
76 lines (60 loc) · 2.43 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import re
from itertools import cycle
from urllib.parse import urljoin
import html2text
from aiohttp import ClientSession, web
LIFEHACKER_URL = 'https://lifehacker.ru'
BASE_URL = 'http://0.0.0.0:8080'
EMOJI = ['😃', '😂', '😎', '😡', '😍', '😐']
WORD_LENGTH = 6
def change_links(string, old_link, new_link):
"""
Changes all matched links from string to new link
:param string
:param new_link
:param old_link
:return: string with new links
"""
return re.sub(rf'\b{old_link}\b', new_link, string)
def clear_string_from_links(string):
"""
Clear string from urls
:param string: string
:return: string without urls
"""
return re.sub(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)', '', string)
def validate_words(words, word_length):
"""
Validates words of required length
:param word_length: required word length
:param words: list of words
:return: list of validated words of required length
"""
validated_words = []
for raw_word in words:
words_regexp = re.findall(r'\b\w+\b', clear_string_from_links(raw_word)) # extract only the word
for word in words_regexp:
if len(word) == word_length:
# print(word)
validated_words.append(word)
return validated_words
async def proxy_with_emoji(request):
target_url = urljoin(LIFEHACKER_URL, request.match_info['path'])
async with ClientSession() as session:
async with session.get(target_url) as resp:
content_type = resp.content_type
# return other content without modifying
if content_type != 'text/html':
return web.Response(body=await resp.read(), content_type=content_type)
html = await resp.text()
words = validate_words(html2text.html2text(html).replace('\n', '').split(' '), WORD_LENGTH)
html = change_links(html, LIFEHACKER_URL, BASE_URL)
changed_words = set()
for index, (word, emoji) in enumerate(zip(words, cycle(EMOJI))):
if word not in changed_words: # the page may contain duplicate words
html = re.sub(rf'\b{word}\b', f'{word}{emoji}', html) # find a word and add emoji
changed_words.add(word)
return web.Response(body=html, content_type='text/html')
app = web.Application()
app.add_routes([web.get('/{path:.*}', proxy_with_emoji)])
web.run_app(app)