-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtasks.py
executable file
·322 lines (246 loc) · 9.24 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#!/usr/bin/env python3
"""Common tasks for Korp.
tasks.py <front|back> <smi|nsu|other> <build|push|runlocal|bap>
"""
import argparse
import os
from dataclasses import dataclass
ACR = "gtlabcontainerregistry.azurecr.io"
INSTANCES = ["smi", "nsu", "other"]
LANGS = [
"sma", "sme", "smj", "smn", "sms", "koi",
"kpv", "mdf", "mhr", "mrj", "myv", "udm",
"fao", "fit", "fkv", "olo", "vep", "vro",
]
CMDS = ["build", "push", "sync-settings", "run", "bap"]
def port_of(frontorback, lang):
port = 1390
port += len(LANGS) if frontorback == "front" else 0
port += LANGS.index(lang)
return port
DOCKERFILE_FRONTEND = """
FROM docker.io/library/debian:bookworm AS builder
ARG instance
RUN <<EOF
set -eux
apt-get update
apt-get install -y --no-install-recommends git nginx npm
npm install --global yarn
git clone --branch master --depth 1 https://github.com/spraakbanken/korp-frontend.git /korp/korp-frontend
EOF
WORKDIR /korp
COPY ./gtweb2_config/front/config-${instance}.yaml /korp/korp-frontend/app/config.yml
RUN mkdir -p /korp/korp-frontend/app/modes
# yarn build failed on this file not being present, with "invalid syntax",
# because some file did a require() on this, which some earlier part of the
# build process replaced with some non-js text about the file not being
# found.
RUN touch /korp/korp-frontend/app/modes/default_mode.js
# Extra translation files for the frontend
COPY ./gtweb2_config/translations/* /korp/korp-frontend/app/translations
RUN <<EOF
set -eux
cd /korp/korp-frontend
yarn
yarn build
EOF
FROM docker.io/library/nginx
#COPY ./korp-nginx-frontend.conf /etc/nginx/conf.d/default.conf
COPY --from=builder /korp/korp-frontend/dist /usr/share/nginx/html/
RUN grep -l "korp_backend_url:[ ]\\?\\"[^\\"]\\+\\"" /usr/share/nginx/html/*.js > /js_file
RUN <<EOF
if [ $(wc -l </js_file) -ne 1 ]; then
echo "IMAGE BUILD ERROR: Cannot find .js file with defintion of korp_backend_url, cannot continue."
exit 1;
fi
EOF
RUN echo '#!/bin/bash' > /entry.sh
RUN echo 'if [ ! -v BACKEND ]; then' >>/entry.sh
RUN echo ' echo "Fatal: env var BACKEND is not set"' >>/entry.sh
RUN echo 'fi' >>/entry.sh
RUN echo 'sed -i "s,korp_backend_url:[ ]\\?\\"[^\\"]\\+\\",korp_backend_url: \\"${BACKEND}\\","' "$(cat /js_file)" >>/entry.sh
RUN echo 'exec "$@"' >>/entry.sh
RUN chmod +x /entry.sh
ENTRYPOINT [ "/entry.sh" ]
CMD ["nginx", "-g", "daemon off;"]
"""
DOCKERFILE_BACKEND = """
FROM docker.io/library/debian:bookworm AS builder
# anders: we have to install headers for pypi mysqlclient to be able to build
# https://github.com/PyMySQL/mysqlclient/tree/main#linux
RUN set -eux && \
apt-get update && \
apt-get install --no-install-recommends -y \
git curl build-essential \
python3 python3-venv python3-pip python3-dev \
default-libmysqlclient-dev libglib2.0-0 libpcre3
# CWB
RUN set -eux && \
curl --location --silent --show-error --create-dirs --output-dir /cwb --remote-name https://downloads.sourceforge.net/project/cwb/cwb/cwb-3.5/deb/cwb_3.5.0-1_amd64.deb && \
dpkg -i $(find /cwb -name "*.deb")
# Memcached server ? MariaDB Server?
WORKDIR /korp
RUN set -eux && \
git clone --depth 1 https://github.com/spraakbanken/korp-backend.git
#git clone --branch giellatekno --single-branch --depth 1 https://github.com/giellatekno/korp-backend.git /korp/korp-backend
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN pip install --disable-pip-version-check --requirement /korp/korp-backend/requirements.txt
RUN pip install gunicorn
FROM docker.io/library/debian:bookworm
RUN set -eux; \
apt-get update; \
apt-get install --no-install-recommends -y python3 libmariadb3 libglib2.0-0 libpcre3
# the created virtual environment, with all requirements installed
COPY --from=builder /opt/venv /opt/venv
# the code repository, as cloned from git
COPY --from=builder /korp/korp-backend /korp/korp-backend
# the CWB binaries we need
COPY --from=builder /usr/bin/cqp /usr/bin/cqp
COPY --from=builder /usr/bin/cwb-scan-corpus /usr/bin/cwb-scan-corpus
COPY --from=builder /usr/lib/libcl.so /usr/lib/libcl.so
# This essentially activates the virutal environment
ENV PATH="/opt/venv/bin:$PATH"
WORKDIR /korp/korp-backend
CMD ["gunicorn", "--worker-class", "gevent", "--bind", "0.0.0.0:1234", "--workers", "4", "--max-requests", "250", "--limit-request-line", "0", "run:create_app()" ]
"""
def run_cmd(cmd, *args, **kwargs):
from subprocess import run
if isinstance(cmd, str):
from shlex import split as split_cmd
cmd = split_cmd(cmd)
if not isinstance(cmd, list):
raise TypeError("argument 'cmd' must be a list or a string")
print(" ".join(cmd))
try:
run(cmd, **kwargs)
except KeyboardInterrupt:
pass
def build_front(lang):
assert isinstance(lang, str)
cmd = (
"podman build "
f"-t korp-frontend-{lang} "
f"--build-arg=instance={lang} "
f"-f - {os.getcwd()}"
)
run_cmd(cmd, input=DOCKERFILE_FRONTEND, encoding="utf-8")
def build_back():
cmd = f"podman build -t korp-backend -f - {os.getcwd()}"
run_cmd(cmd, input=DOCKERFILE_BACKEND, encoding="utf-8")
def run_front(lang, backend):
if backend is None:
backend = f"http://localhost:{port_of('back', lang)}"
run_cmd(
"podman run --rm "
f"--name korp-frontend-{lang} "
f"-e BACKEND={backend} "
f"-p {port_of('front', lang)}:80 "
f"korp-frontend-{lang}"
)
def run_back(lang, cwbfiles):
assert lang in LANGS
assert isinstance(cwbfiles, str)
print(lang, cwbfiles)
cwd = os.getcwd()
args = (
f"--name korp-backend-{lang} "
"--rm "
"--replace "
f"-p {port_of('back', lang)}:1234 "
f"-v {cwd}/gtweb2_config/config.py:/korp/korp-backend/instance/config.py "
f"-v {cwd}/gtweb2_config/corpus_configs/{lang}:/corpora/corpus_config "
f"-v {cwbfiles}:/corpora/gt_cwb"
)
run_cmd(f"podman run {args} korp-backend")
def push_front(lang):
run_cmd(f"podman tag korp-frontend-{lang} {ACR}/korp-frontend-{lang}")
run_cmd(f"podman push {ACR}/korp-frontend-{lang}")
def push_back():
run_cmd(f"podman tag korp-backend {ACR}/korp-backend")
run_cmd(f"podman push {ACR}/korp-backend")
def bap_front(lang):
build_front(lang)
push_front(lang)
def bap_back():
build_back()
push_back()
def sync_settings():
run_cmd(
"rsync "
"-rv " # r = recursively, v = verbose
"--stats "
"gtweb2_config/ " # copy everything in the directory
"gtweb-02.uit.no:/home/services/korp/config/" # to this directory
)
@dataclass
class Args:
cmd: str # Literal["build", "run"]
frontorback: str # Literal["front", "back"]
lang: str
cwbfiles: str
backend: str
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("args", nargs="*")
parser.add_argument("--cwbfiles")
parser.add_argument("--backend")
args = parser.parse_args()
cmd = None
frontorback = None
lang = None
for arg in args.args:
if arg in LANGS:
if lang is not None:
parser.error("multiple languages given, only give one")
lang = arg
elif arg in ["front", "back"]:
if frontorback is not None:
parser.error("can't give both front and back, choose one")
frontorback = arg
elif arg in CMDS:
if cmd is not None:
parser.error("can only give one command")
cmd = arg
return Args(
cmd=cmd,
frontorback=frontorback,
lang=lang,
cwbfiles=args.cwbfiles,
backend=args.backend,
)
if __name__ == "__main__":
match parse_args():
case Args("build", "front", lang=None) as args:
print("error: build front: missing argument: lang")
print(" give one of:", ", ".join(LANGS))
case Args("build", "front", lang):
build_front(lang)
case Args("build", "back") as args:
build_back()
case Args("run", "front", lang=None) as args:
print("error: run front: missing 3rd argument: lang")
print(f" give one of: {', '.join(LANGS)}")
case Args("run", "front", lang, _cwbfiles, backend) as args:
run_front(lang, backend)
case Args("run", "back", cwbfiles=None) as args:
print("Need to specify --cwbfiles <path to built cwb files>")
case Args("run", "back", lang, cwbfiles) as args:
run_back(lang, cwbfiles)
case Args("push", "back"):
push_back()
case Args("push", "front", lang=None):
print("error: push front: missing argument: lang")
print(f" give one of: {', '.join(LANGS)}")
case Args("push", "front", lang):
push_front(lang)
case Args("bap", "front", lang=None):
print("error: bap front: missing argument: lang")
print(f" give one of: {', '.join(LANGS)}")
case Args("run" | "build", frontorback) as args:
print("error: front or back?")
case Args("sync-settings"):
sync_settings()
case Args(cmd):
print("error: no cmd found in arguments")
print(f"give one of: {', '.join(CMDS)}")