Skip to content

Commit

Permalink
Add CSV file upload
Browse files Browse the repository at this point in the history
- replace single zip file upload with two file uploads: one h5 file, one csv file
- replace `/input_file` endpoint with `/input_h5_file` and `/input_csv_file`
- update model, endpoints and runner accordingly
- validate csv file on frontend
  - parse first line and extract column names
  - require that "barcode", "cdr3", "chain" are present
- resolves #3
- validate file sizes
  - h5 must be less than 50MB
  - csv must be less than 10MB
  - resolves #2
- increase nginx/flask body limit to 100MB
- make runner into a package, add initial test using requests-mock
  • Loading branch information
lkeegan committed Sep 30, 2024
1 parent fb6a7de commit d869e3b
Show file tree
Hide file tree
Showing 19 changed files with 285 additions and 140 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ jobs:
name: "Docker website"
steps:
- uses: actions/checkout@v4
- run: echo "VITE_REST_API_LOCATION=https://predictcr.lkeegan.dev/api" > frontend/.env
- run: docker compose build
- uses: docker/login-action@v3
with:
Expand Down Expand Up @@ -92,8 +91,8 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: pip install pytest
- run: python -m pytest -sv
- run: pip install -e .[tests]
- run: pytest -sv
runner-docker:
runs-on: ubuntu-latest
name: "Docker runner"
Expand Down
4 changes: 2 additions & 2 deletions README_DEPLOYMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Some information on how to deploy the website.

Production docker container images are automatically built by CI.
To deploy the latest version on a virtual machine with docker compose installed,
download [docker compose.yml](https://raw.githubusercontent.com/ssciwr/predicTCR/main/docker compose.yml), then do
download [docker-compose.yml](https://raw.githubusercontent.com/ssciwr/predicTCR/main/docker-compose.yml), then do

```
sudo docker compose pull
Expand All @@ -16,7 +16,7 @@ sudo docker compose up -d
The location of data directory, SSL keys and secret key should be set
either in env vars or in a file `.env` in the same location as the docker compose.yml.

For example the current deployment on heicloud looks like this:
For example the current test deployment on heicloud looks like this:

```
PREDICTCR_DATA="/home/ubuntu/predicTCR/docker_volume"
Expand Down
33 changes: 26 additions & 7 deletions backend/src/predicTCR_server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def create_app(data_path: str = "/predictcr_data"):
app.config["JWT_ACCESS_TOKEN_EXPIRES"] = datetime.timedelta(minutes=60)
app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{data_path}/predicTCR.db"
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
# limit max file upload size to 20mb
app.config["MAX_CONTENT_LENGTH"] = 20 * 1024 * 1024
# limit max file upload size to 100mb
app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024
app.config["PREDICTCR_DATA_PATH"] = data_path

CORS(app)
Expand Down Expand Up @@ -147,9 +147,9 @@ def change_password():
def samples():
return get_samples(current_user.email)

@app.route("/api/input_file", methods=["POST"])
@app.route("/api/input_h5_file", methods=["POST"])
@jwt_required()
def input_file():
def input_h5_file():
sample_id = request.json.get("sample_id", None)
logger.info(
f"User {current_user.email} requesting results for sample {sample_id}"
Expand All @@ -163,7 +163,25 @@ def input_file():
if user_sample is None:
logger.info(f" -> sample {sample_id} not found")
return jsonify(message="Sample not found"), 400
return flask.send_file(user_sample.input_file_path(), as_attachment=True)
return flask.send_file(user_sample.input_h5_file_path(), as_attachment=True)

@app.route("/api/input_csv_file", methods=["POST"])
@jwt_required()
def input_csv_file():
sample_id = request.json.get("sample_id", None)
logger.info(
f"User {current_user.email} requesting results for sample {sample_id}"
)
filters = {"id": sample_id}
if not current_user.is_admin and not current_user.is_runner:
filters["email"] = current_user.email
user_sample = db.session.execute(
db.select(Sample).filter_by(**filters)
).scalar_one_or_none()
if user_sample is None:
logger.info(f" -> sample {sample_id} not found")
return jsonify(message="Sample not found"), 400
return flask.send_file(user_sample.input_csv_file_path(), as_attachment=True)

@app.route("/api/result", methods=["POST"])
@jwt_required()
Expand Down Expand Up @@ -199,10 +217,11 @@ def add_sample():
name = form_as_dict.get("name", "")
tumor_type = form_as_dict.get("tumor_type", "")
source = form_as_dict.get("source", "")
infile = request.files.get("file")
h5_file = request.files.get("h5_file")
csv_file = request.files.get("csv_file")
logger.info(f"Adding sample {name} from {email}")
new_sample, error_message = add_new_sample(
email, name, tumor_type, source, infile
email, name, tumor_type, source, h5_file, csv_file
)
if new_sample is not None:
logger.info(" - > success")
Expand Down
17 changes: 12 additions & 5 deletions backend/src/predicTCR_server/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ def _base_path(self) -> pathlib.Path:
data_path = flask.current_app.config["PREDICTCR_DATA_PATH"]
return pathlib.Path(f"{data_path}/{self.id}")

def input_file_path(self) -> pathlib.Path:
return self._base_path() / "input.zip"
def input_h5_file_path(self) -> pathlib.Path:
return self._base_path() / "input.h5"

def input_csv_file_path(self) -> pathlib.Path:
return self._base_path() / "input.csv"

def result_file_path(self) -> pathlib.Path:
return self._base_path() / "result.zip"
Expand Down Expand Up @@ -110,6 +113,7 @@ def get_samples(email: str | None = None) -> list[Sample]:


def request_job() -> int | None:
# todo: go through running jobs and reset to queued if they have been running for more than e.g. 2 hrs
selected_samples = (
db.select(Sample)
.filter(Sample.status == Status.QUEUED)
Expand Down Expand Up @@ -288,6 +292,7 @@ def enable_user(email: str, enabled: bool) -> tuple[str, int]:
if user is None:
logger.info(f" -> Unknown email address '{email}'")
return f"Unknown email address {email}", 400
user.activated = True
user.enabled = enabled
db.session.commit()
return f"Account {email} activated", 200
Expand Down Expand Up @@ -345,7 +350,8 @@ def add_new_sample(
name: str,
tumor_type: str,
source: str,
input_file: FileStorage,
h5_file: FileStorage,
csv_file: FileStorage,
) -> tuple[Sample | None, str]:
user = db.session.execute(
db.select(User).filter(User.email == email)
Expand Down Expand Up @@ -378,6 +384,7 @@ def add_new_sample(
)
db.session.add(new_sample)
db.session.commit()
new_sample.input_file_path().parent.mkdir(parents=True, exist_ok=True)
input_file.save(new_sample.input_file_path())
new_sample.input_h5_file_path().parent.mkdir(parents=True, exist_ok=True)
h5_file.save(new_sample.input_h5_file_path())
csv_file.save(new_sample.input_csv_file_path())
return new_sample, ""
9 changes: 3 additions & 6 deletions backend/tests/helpers/flask_test_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import argon2
from predicTCR_server.model import User, Sample, db, Status
import pathlib
import shutil
import tempfile


def add_test_users(app):
Expand Down Expand Up @@ -42,10 +40,9 @@ def add_test_samples(app, data_path: pathlib.Path):
):
ref_dir = data_path / f"{sample_id}"
ref_dir.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory() as tmp_dir:
with open(f"{tmp_dir}/test.txt", "w") as f:
f.write(name)
shutil.make_archive(f"{ref_dir}/input", "zip", tmp_dir)
for input_file_type in ["h5", "csv"]:
with open(f"{ref_dir}/input.{input_file_type}", "w") as f:
f.write(input_file_type)
new_sample = Sample(
email="user@abc.xy",
name=name,
Expand Down
25 changes: 13 additions & 12 deletions backend/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations
from typing import Dict
import io
import zipfile
import pytest
import pathlib
import predicTCR_server
import flask_test_utils as ftu
Expand Down Expand Up @@ -129,36 +129,36 @@ def test_samples_valid(client):
assert len(response.json) == 4


def test_input_file_invalid(client):
@pytest.mark.parametrize("input_file_type", ["h5", "csv"])
def test_input_file_invalid(client, input_file_type: str):
# no auth header
response = client.post(
"/api/input_file",
f"/api/input_{input_file_type}_file",
json={"sample_id": 2},
)
assert response.status_code == 401
# invalid sample id
headers = _get_auth_headers(client)
response = client.post(
"/api/input_file",
f"/api/input_{input_file_type}_file",
json={"sample_id": 66},
headers=headers,
)
assert response.status_code == 400
assert "not found" in response.json["message"]


def test_input_file_valid(client):
@pytest.mark.parametrize("input_file_type", ["h5", "csv"])
def test_input_file_valid(client, input_file_type: str):
headers = _get_auth_headers(client)
response = client.post(
"/api/input_file",
f"/api/input_{input_file_type}_file",
json={"sample_id": 2},
headers=headers,
)
assert response.status_code == 200
zip_file = zipfile.ZipFile(io.BytesIO(response.data))
filenames = [f.filename for f in zip_file.filelist]
assert len(filenames) == 1
assert "test.txt" in filenames
with io.BytesIO(response.data) as f:
assert input_file_type in f.read().decode("utf-8")


def test_result_invalid(client):
Expand Down Expand Up @@ -222,14 +222,15 @@ def test_admin_runner_token_invalid(client):
assert response.status_code == 400


def test_admin_runner_token_valid(client):
@pytest.mark.parametrize("input_file_type", ["h5", "csv"])
def test_admin_runner_token_valid(client, input_file_type: str):
headers = _get_auth_headers(client, "admin@abc.xy", "admin")
response = client.get("/api/admin/runner_token", headers=headers)
assert response.status_code == 200
new_token = response.json["access_token"]
assert (
client.post(
"/api/input_file",
f"/api/input_{input_file_type}_file",
json={"sample_id": 1},
headers={"Authorization": f"Bearer {new_token}"},
).status_code
Expand Down
2 changes: 2 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ RUN pnpm install

COPY . .

RUN echo "VITE_REST_API_LOCATION=/api" > .env

RUN pnpm run build-only

FROM nginx
Expand Down
2 changes: 1 addition & 1 deletion frontend/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ server {
ssl_certificate_key /predictcr_ssl_key.pem;

# Maximum file upload size
client_max_body_size 20M;
client_max_body_size 100M;

# Improve HTTPS performance with session resumption
ssl_session_cache shared:SSL:10m;
Expand Down
11 changes: 0 additions & 11 deletions frontend/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 24 additions & 4 deletions frontend/src/components/SamplesTable.vue
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
<script setup lang="ts">
// @ts-ignore
import { download_input_file, download_result } from "@/utils/api-client";
import {
download_input_csv_file,
download_input_h5_file,
download_result,
} from "@/utils/api-client";
import type { Sample } from "@/utils/types";
defineProps<{
Expand All @@ -19,7 +23,8 @@ defineProps<{
<th>Tumor type</th>
<th>Source</th>
<th>Status</th>
<th>Input file</th>
<th>Input H5 file</th>
<th>Input csv file</th>
<th>Results</th>
</tr>
<tr v-for="sample in samples" :key="sample.id">
Expand All @@ -31,11 +36,26 @@ defineProps<{
<td>{{ sample["source"] }}</td>
<td>{{ sample["status"] }}</td>
<td>
<a href="" @click.prevent="download_input_file(sample['id'])"> zip </a>
<a
href=""
@click.prevent="download_input_h5_file(sample.id, sample.name)"
>
input.h5
</a>
</td>
<td>
<a
href=""
@click.prevent="download_input_csv_file(sample.id, sample.name)"
>
input.csv
</a>
</td>
<td>
<template v-if="sample.has_results_zip">
<a href="" @click.prevent="download_result(sample.id)">zip</a>
<a href="" @click.prevent="download_result(sample.id, sample.name)"
>zip</a
>
</template>
<template v-else> - </template>
</td>
Expand Down
Loading

0 comments on commit d869e3b

Please sign in to comment.