-
Notifications
You must be signed in to change notification settings - Fork 74
/
fabfile.py
239 lines (160 loc) · 6.77 KB
/
fabfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# coding=utf-8
from boto import ec2
from fabric.api import cd, env, put, run, settings, sudo
import os
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Environments
#
def local_vm():
env.user = os.environ['VM_USER']
env.hosts = os.environ['VM_HOST'].split(',')
def ec2_vm():
"""
Target a single ec2 machine, or a cluster of machines, all with the group
name "govtrack-vm", for deployment.
Borrowed with ♥ from http://joet3ch.com/blog/2012/01/18/fabric-ec2/
"""
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
env.user = os.environ['VM_USER']
env.key_filename = os.environ['VM_KEY_FILENAME']
ec2conn = ec2.connection.EC2Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
web_group = ec2conn.get_all_security_groups(groupnames=['govtrack-vm'])
for i in web_group[0].instances():
hostname = i.__dict__['public_dns_name']
if hostname is not '':
env.hosts.append(hostname)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set-up commands
#
def install_packages(update=True):
"""
Install necesary system packages. This is onen of the few commands whose
commands actually has to be in the fabfile, since the govtrack code is not
known to be in the latest state (or on the machine at all) when the command
gets run.
Here we install things like git and python, without which we can't proceed.
"""
if update:
sudo('apt update')
sudo('apt install -y git python-virtualenv python-lxml python-openid'
' python-oauth2client python-iso8601 python-numpy python-scipy'
' python-prctl python-pip libssl-dev'
# For Solr
' openjdk-8-jre jetty8'
# For PostgreSQL & MySQLclient support
' libpq-dev'
' libmysqlclient-dev'
# For the web server
' nginx')
install_ssl_packages(update=update)
def install_ssl_packages(update=True):
"""
Install the necessary packages for Let's Encrypt SSL certificates.
"""
if update:
sudo('add-apt-repository ppa:certbot/certbot --yes')
sudo('apt-get update')
sudo('apt-get install certbot --yes')
def configure_ssl():
sudo('govtrack.us-web/build/configure_ssl.sh')
def pull_repo(folder, branch='master'):
with cd(folder):
result = run('git fetch --all')
if result.failed:
return result
run('git checkout {branch}'.format(branch=branch))
run('git reset --hard origin/{branch}'.format(branch=branch))
return result
def clone_repo(repo_url, folder, branch='master'):
result = run('git clone --recursive {url} {folder}'.format(url=repo_url, folder=folder))
with cd(folder):
run('git checkout {branch}'.format(branch=branch))
return result
def pull_or_clone_repo(repo_url, folder, branch='master'):
# Try pulling as if the repo already exists
with settings(warn_only=True):
result = pull_repo(folder, branch=branch)
# If it doesn't, clone from github
if result.failed:
clone_repo(repo_url, folder, branch=branch)
def install_deps():
with cd('govtrack.us-web'):
sudo('pip install --upgrade -r ./requirements.txt')
# We don't need psycopg2 in the normal requirements. If postgres isn't
# installed, the library installation will fail. Leave it out of the
# list to make development easier.
sudo('pip install psycopg2')
sudo('pip install mysqlclient')
sudo('pip install paypalrestsdk') # installs 'cryptography' package which requires libssl-dev which we skip in local development
# Similarly, we only need gunicorn if we're serving from a VM.
sudo('pip install gunicorn')
# We use honcho to manage the environment.
sudo('pip install honcho jinja2')
sudo('pip install honcho-export-systemd')
# For backing up the data directory...
sudo('pip install aws')
# TODO: Create a requirements.server.txt with all of the requirements
# so we don't have to install them all piecemeal.
def configure_solr():
sudo('govtrack.us-web/build/configure_solr.sh')
def configure_postgres():
sudo('apt install -y postgresql')
sudo('createdb govtrack', user='postgres')
def setenv(envfile, restart=True):
""" Upload environment variables to the target server(s). """
with cd('govtrack.us-web'):
put(envfile, '.env')
if restart:
restart_webserver()
def printenv():
with cd('govtrack.us-web'):
return run('cat .env')
def update_db():
with cd('govtrack.us-web'):
# NOTE: Will have to use `migrate` after Django upgrade.
run('honcho run ./manage.py syncdb --noinput')
def update_assets():
with cd('govtrack.us-web'):
run('honcho run ./minify')
def bootstrap_data(congress=None):
with cd('govtrack.us-web'):
# This seems very chicken-or-egg. Is the purpose of this to bootstrap
# the existing site with as much data as already exists? Won't the
# scrapers have to be run anyway?
run('wget http://www.govtrack.us/data/db/django-fixture-{people,usc_sections,billterms}.json')
run('honcho run ./manage.py loaddata django-fixture-people.json')
run('honcho run ./manage.py loaddata django-fixture-usc_sections.json')
run('honcho run ./manage.py loaddata django-fixture-billterms.json')
run('honcho run ./parse.py person')
run('honcho run ./parse.py committee', warn_only=True) # fails b/c meeting data not available
run('honcho run build/rsync.sh')
run('honcho run ./parse.py bill --congress={}'.format(congress))
run('honcho run ./parse.py vote --congress={}'.format(congress))
run('honcho run ./manage.py update_index')
def configure_webserver():
sudo('govtrack.us-web/build/configure_webserver.sh')
restart_webserver()
def configure_cron():
sudo('govtrack.us-web/build/configure_cron.sh')
def restart_webserver():
sudo('govtrack.us-web/build/restart_webserver.sh')
def deploy(envfile=None, branch='master', congress=None):
pull_or_clone_repo(os.environ['GOVTRACK_WEB_GIT_URL'], 'govtrack.us-web', branch=branch)
pull_or_clone_repo(os.environ['LEGISLATORS_GIT_URL'], 'congress-legislators')
install_deps()
configure_solr()
if envfile:
setenv(envfile, restart=False)
update_db()
update_assets()
if congress:
bootstrap_data(congress=congress)
configure_cron()
configure_nginx()
restart_webserver()
def backup_data():
sudo('govtrack.us-web/build/backup_data.sh')
def clean():
run('rm -rf govtrack.us-web')
run('rm -rf congress-legislators')