Skip to content

Commit

Permalink
we are more produciton-ready with this commit than ever
Browse files Browse the repository at this point in the history
  • Loading branch information
dnaaun committed Jul 9, 2020
1 parent b28ed24 commit 0017cb8
Show file tree
Hide file tree
Showing 11 changed files with 248 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This is in .env because all the services need it.
REDIS_HOST=redis
REDIS_PORT=6739
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.tar.gz filter=lfs diff=lfs merge=lfs -text
60 changes: 60 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
version: '3.7'

services:
web:
image: openframing_web_image
build:
context: ./services/web
expose:
- "5000"
volumes:
- frontend_volume:/home/app/web/frontend/
- project_data_volume:/home/app/project_data_directory
- transformers_cache_volume:/home/app/transformers_cache_directory
depends_on:
- redis
environment: # Pass down from .env file
- REDIS_HOST
- REDIS_PORT
nginx:
build: ./services/nginx
volumes:
- frontend_volume:/home/app/frontend
ports:
- "80:80"
depends_on:
- web
redis:
image: redis:6.0.5-alpine
command: "redis-server --port ${REDIS_PORT}"
expose:
- "${REDIS_PORT}"

classifiers_worker:
image: openframing_web_image
command: [ "rq", "worker", "--url", "redis://${REDIS_HOST}:${REDIS_PORT}", "classifiers" ]
depends_on:
- redis
environment: # Pass down from .env file
- REDIS_HOST
- REDIS_PORT
# We need to pass them here, in addition to `command` above, because the
# worker will import settings.py, which needs them to be set.

topic_model_worker:
image: openframing_web_image
command: [ "rq", "worker", "--url", "redis://${REDIS_HOST}:${REDIS_PORT}", "topic_models" ]
depends_on:
- redis
environment:
- REDIS_HOST
- REDIS_PORT
# We need to pass them here, in addition to `command` above, because the
# worker will import settings.py, which needs them to be set.



volumes:
frontend_volume:
project_data_volume:
transformers_cache_volume:
4 changes: 4 additions & 0 deletions services/nginx/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM nginx:1.19.0-alpine

RUN rm -f /etc/nginx/conf.d/default.conf
COPY nginx.conf /etc/nginx/conf.d/
20 changes: 20 additions & 0 deletions services/nginx/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
upstream backend {
server web:5000;
}

server {

listen 80;

location /api {
proxy_pass http://backend;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_redirect off;
}

location / {
alias /home/app/frontend/;
}

}
38 changes: 38 additions & 0 deletions services/web/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# For the uploaded files and model weights
project_data/
transformers_cache/

# For the database
sqlite.db

# vim related
Session.vim

# related to virt env openFraming
openFraming
venv

# py cache and swap files
*.pyc
*.swp

# Python/mypy related
__pycache__/
.mypy_cache/
.dmypy.json

instance/

.pytest_cache/
.coverage
htmlcov/

.idea

# OSX related files
.DS_Store


dist/
build/
*.egg-info/
77 changes: 77 additions & 0 deletions services/web/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Using Multistage builds to make the final image smaller.

#### Stage one #####
FROM python:3.8-slim-buster as builder

# set work directory
WORKDIR /usr/src/app

# set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1

COPY ./backend/requirements_no_gpu.txt .
RUN pip wheel \
--no-cache-dir \
--no-deps \
--wheel-dir \
/usr/src/app/wheels -r requirements_no_gpu.txt


### Stage two #####
FROM python:3.8-slim-buster

# create the app user
RUN groupadd -r app -g 999 && useradd -r -g app -u 999 app

## install dependencies
COPY --from=builder /usr/src/app/wheels /wheels
RUN pip install --no-cache /wheels/*

# Install Gosu
RUN set -eux; \
apt-get update; \
apt-get install -y gosu; \
rm -rf /var/lib/apt/lists/*; \
# verify that the binary works
gosu nobody true

# create directory for the app user
ENV HOME=/home/app
RUN mkdir -p $HOME

# Prepare mallet installation
ADD mallet-2.0.8.tar.gz $HOME/
ENV MALLET_BIN_DIRECTORY=$HOME/mallet-2.0.8/bin
# Prepare project data directory, this is actually a volume
# Handled by docker-compose.yml
ENV PROJECT_DATA_DIRECTORY=$HOME/project_data_directory
# This is similarly a volume.
ENV TRANSFORMERS_CACHE_DIRECTORY=$HOME/transformers_cache_directory
# Flask env to make sure flask doesn't serve
# static files
ENV FLASK_ENV=production

# Setup th app directory
ENV APP_HOME=/home/app/web
RUN mkdir $APP_HOME
# copy project
COPY . $APP_HOME

# Make sure the volumes are owned by the app user
RUN mkdir -p $PROJECT_DATA_DIRECTORY && chown app:app $PROJECT_DATA_DIRECTORY
VOLUME $PROJECT_DATA_DIRECTORY
RUN mkdir -p $TRANSFORMERS_CACHE_DIRECTORY && chown app:app $TRANSFORMERS_CACHE_DIRECTORY
VOLUME $TRANSFORMERS_CACHE_DIRECTORY

# chown all the files to the app user
RUN chown -R app:app $HOME
# change to the app user
USER app

# Needed because all the python imports look like
# from flask_app import ...
# and not from backend.flask_app import ...
WORKDIR $APP_HOME/backend/

CMD [ "gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "flask_app.app:create_app()" ]
22 changes: 13 additions & 9 deletions services/web/backend/flask_app/app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""All the flask api endpoints."""
import csv
from flask_app.version import Version
import logging
import typing as T
from collections import Counter
Expand All @@ -13,6 +12,13 @@
from flask import Flask
from flask import Response
from flask import send_file
from flask_app import db
from flask_app import utils
from flask_app.modeling.classifier import ClassifierMetrics
from flask_app.modeling.enqueue_jobs import Scheduler
from flask_app.settings import needs_settings_init
from flask_app.settings import Settings
from flask_app.version import Version
from flask_restful import Api # type: ignore
from flask_restful import reqparse
from flask_restful import Resource
Expand All @@ -24,13 +30,6 @@
from werkzeug.exceptions import HTTPException
from werkzeug.exceptions import NotFound

from flask_app import db
from flask_app import utils
from flask_app.modeling.classifier import ClassifierMetrics
from flask_app.modeling.enqueue_jobs import Scheduler
from flask_app.settings import needs_settings_init
from flask_app.settings import Settings

API_URL_PREFIX = "/api"

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -877,7 +876,12 @@ def create_app(logging_level: int = logging.WARNING) -> Flask:
"""
logging.basicConfig()
logger.setLevel(logging_level)
app = Flask(__name__, static_url_path="/", static_folder="../../frontend")

# Usually, we'd read this from app.config, but we need it to create app.config ...
if Settings.FLASK_ENV == "development":
app = Flask(__name__, static_url_path="/", static_folder="../../frontend")
else:
app = Flask(__name__)

# Create project root if necessary
if not Settings.PROJECT_DATA_DIRECTORY.exists():
Expand Down
7 changes: 3 additions & 4 deletions services/web/backend/flask_app/modeling/enqueue_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
import typing as T

import typing_extensions as TT
from redis import Redis
from rq import Queue # type: ignore

from flask_app import db
from flask_app.modeling.classifier import ClassifierModel
from flask_app.modeling.lda import Corpus
from flask_app.modeling.lda import LDAModeler
from flask_app.settings import Settings
from redis import Redis
from rq import Queue # type: ignore

logging.basicConfig()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -144,7 +143,7 @@ def do_topic_model_related_task(task_args: TopicModelTrainingTaskArgs) -> None:

class Scheduler(object):
def __init__(self) -> None:
connection = Redis()
connection = Redis(host=Settings.REDIS_HOST, port=Settings.REDIS_PORT)
is_async = True
self.classifiers_queue = Queue(
name="classifiers", connection=connection, is_async=is_async
Expand Down
30 changes: 26 additions & 4 deletions services/web/backend/flask_app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@


class SettingsFromOutside(TT.TypedDict):
"""Required to be set by the web admin (usually through environment variables)"""
"""These settings must be set using enviornmetn variables."""

PROJECT_DATA_DIRECTORY: str
TRANSFORMERS_CACHE_DIRECTORY: T.Optional[str]
MALLET_BIN_DIRECTORY: str
FLASK_ENV: TT.Literal["development", "production"]
REDIS_HOST: str
REDIS_PORT: int


class Settings:
Expand All @@ -42,6 +45,9 @@ class Settings:
TRANSFORMERS_CACHE_DIRECTORY: Path
DATABASE_FILE: Path
MALLET_BIN_DIRECTORY: Path
FLASK_ENV: TT.Literal["development", "production"]
REDIS_HOST: str
REDIS_PORT: int

SUPPORTED_NON_CSV_FORMATS: T.Set[str] = {".xls", ".xlsx"}

Expand All @@ -54,19 +60,29 @@ def is_initialized_already(cls) -> bool:
@classmethod
def initialize_from_env(cls) -> None:
try:
any_flask_env = os.environ["FLASK_ENV"]
assert any_flask_env in ["production", "development"]
flask_env: TT.Literal["production", "development"] = any_flask_env # type: ignore[assignment]

settings_dict = SettingsFromOutside(
{
"PROJECT_DATA_DIRECTORY": os.environ["PROJECT_DATA_DIRECTORY"],
"TRANSFORMERS_CACHE_DIRECTORY": os.environ.get( # Not required, we have a fallback, look at initialize_from_dict
"TRANSFORMERS_CACHE_DIRECTORY", ""
),
"TRANSFORMERS_CACHE_DIRECTORY": os.environ[
"TRANSFORMERS_CACHE_DIRECTORY"
],
"MALLET_BIN_DIRECTORY": os.environ["MALLET_BIN_DIRECTORY"],
"FLASK_ENV": flask_env,
"REDIS_HOST": os.environ["REDIS_HOST"],
"REDIS_PORT": int(os.environ["REDIS_PORT"]),
}
)
cls.initialize_from_dict(settings_dict)
except KeyError as e:
logger.critical("You did not define one or more environment variable(s).")
raise e
except BaseException as e:
logger.critical("You did not set one or more environment *correctly*.")
raise e

@classmethod
def initialize_from_dict(cls, settings_dict: SettingsFromOutside) -> None:
Expand All @@ -86,6 +102,9 @@ def initialize_from_dict(cls, settings_dict: SettingsFromOutside) -> None:
)
cls.DATABASE_FILE = Path(cls.PROJECT_DATA_DIRECTORY) / "sqlite.db"
cls.MALLET_BIN_DIRECTORY = Path(settings_dict["MALLET_BIN_DIRECTORY"])
cls.FLASK_ENV = settings_dict["FLASK_ENV"]
cls.REDIS_HOST = settings_dict["REDIS_HOST"]
cls.REDIS_PORT = settings_dict["REDIS_PORT"]
cls._initialized_already = True

@classmethod
Expand All @@ -97,6 +116,9 @@ def deinitialize(cls) -> None:
"TRANSFORMERS_CACHE_DIRECTORY",
"DATABASE_FILE",
"MALLET_BIN_DIRECTORY",
"FLASK_ENV",
"REDIS_HOST",
"REDIS_PORT",
]:
if hasattr(cls, attr):
delattr(cls, attr)
Expand Down
3 changes: 3 additions & 0 deletions services/web/mallet-2.0.8.tar.gz
Git LFS file not shown

0 comments on commit 0017cb8

Please sign in to comment.