-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate.py
147 lines (126 loc) · 5.01 KB
/
aggregate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import logging
from pyramid.config import Configurator
from pyramid.events import NewRequest
from pyramid.events import subscriber
from pyramid.events import ApplicationCreated
from pyramid.httpexceptions import HTTPFound
from pyramid.session import UnencryptedCookieSessionFactoryConfig
from pyramid.view import view_config
from wsgiref.simple_server import make_server
import sqlite3
import requests
from lxml import html
import re
from urllib.parse import urlsplit
logging.basicConfig()
log = logging.getLogger(__file__)
here = os.path.dirname(os.path.abspath(__file__))
# a tag grabber
def grab_urls(site):
page_text = requests.get(site).text
parsed = html.fromstring(page_text[38:])
urls = parsed.cssselect('a')
urllist = [dict(text=e.text, url=e.get('href')) for e in urls]
return urllist
# views
@view_config(route_name='list', renderer='list.mako')
def list_view(request):
rs = request.db.execute("select id, name from tasks where closed = 0")
tasks = [dict(id=row[0], name=row[1]) for row in rs.fetchall()]
return {'tasks': tasks}
@view_config(route_name='new', renderer='new.mako')
def new_view(request):
if request.method == 'POST':
if request.POST.get('name'):
request.db.execute('insert into tasks (name, closed) values (?, ?)',
[request.POST['name'], 0])
request.db.commit()
request.session.flash('New task was successfully added!')
return HTTPFound(location=request.route_url('list'))
else:
request.session.flash('Please enter a name for the task!')
return {}
@view_config(route_name='close')
def close_view(request):
task_id = int(request.matchdict['id'])
request.db.execute("update tasks set closed = ? where id = ?", (1, task_id))
request.db.commit()
request.session.flash('Task was successfully closed!')
return HTTPFound(location=request.route_url('list'))
@view_config(context='pyramid.exceptions.NotFound', renderer='notfound.mako')
def notfound_view(self):
return {}
@view_config(route_name='aggregate', renderer='aggregate.mako')
def aggregate_view(request):
rs = request.db.execute("select id, text, url from urls")
urls = [dict(id=row[0], text=row[1], url=row[2]) for row in rs.fetchall()]
return {'urls': urls}
@view_config(route_name='newsite', renderer='newsite.mako')
def new_site_view(request):
if request.method == 'POST':
if request.POST.get('name'):
s = urlsplit(request.POST['name'])
root_url = "http://" + s.netloc + s.path.rstrip(re.findall('/.*$', s.path)[0])
full_url = "http://" + s.netloc + s.path
urllist = grab_urls(full_url)
if urllist:
for item in urllist:
if item['url'] == None:
continue
if item['url'][0] == '/' or '#':
item['url'] = root_url + item['url']
if item['text'] == None:
item['text'] = item['url']
request.db.execute('insert into urls (url, text) values (?, ?)',
[item['url'], item['text']])
request.db.commit()
request.session.flash('New URLs were successfully added!')
return HTTPFound(location=request.route_url('aggregate'))
else:
request.session.flash('Please enter a URL to scrape!')
return {}
# subscribers
@subscriber(NewRequest)
def new_request_subscriber(event):
request = event.request
settings = request.registry.settings
request.db = sqlite3.connect(settings['db'])
request.add_finished_callback(close_db_connection)
def close_db_connection(request):
request.db.close()
@subscriber(ApplicationCreated)
def application_created_subscriber(event):
log.warn('Initializing database...')
f = open(os.path.join(here, 'schema.sql'), 'r')
stmt = f.read()
settings = event.app.registry.settings
db = sqlite3.connect(settings['db'])
db.executescript(stmt)
db.commit()
f.close()
if __name__ == '__main__':
# configuration settings
settings = {}
settings['reload_all'] = True
settings['debug_all'] = True
settings['mako.directories'] = os.path.join(here, 'templates')
settings['db'] = os.path.join(here, 'tasks.db')
# session factory
session_factory = UnencryptedCookieSessionFactoryConfig('itsaseekreet')
# configuration setup
config = Configurator(settings=settings, session_factory=session_factory)
# routes setup
config.add_route('list', '/')
config.add_route('new', '/new')
config.add_route('close', '/close/{id}')
config.add_route('aggregate', '/aggregate')
config.add_route('newsite', '/newsite')
# static view setup
config.add_static_view('static', os.path.join(here, 'static'))
# scan for @view_config and @subscriber decorators
config.scan()
# serve app
app = config.make_wsgi_app()
server = make_server('0.0.0.0', 8080, app)
server.serve_forever()