diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 906a403..6e8822d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,7 +27,7 @@ jobs: # Login to Quay.io and build image docker login quay.io - docker build -t $REPO:$BRANCH . + docker build --build-arg GITHUB_SHA=$GITHUB_SHA -t $REPO:$BRANCH . # Add 'latest' tag to 'main' image if [[ $BRANCH == 'main' ]]; then diff --git a/Dockerfile b/Dockerfile index 23a3ff6..fab88a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,21 @@ FROM python:3.12 -WORKDIR /app +ARG GITHUB_SHA +ENV GITHUB_SHA=$GITHUB_SHA -ADD "https://api.github.com/repos/ACED-IDP/image_viewer/commits?per_page=1" latest_commit - -RUN git clone https://github.com/ACED-IDP/image_viewer WORKDIR /app/image_viewer -RUN git checkout development -RUN pip install --no-cache-dir . -RUN git log --oneline +# Copy the project files into the container +COPY pyproject.toml ./ + +RUN pip install . + +# write git commit hash to a file +RUN echo $GITHUB_SHA > git_commit_hash.txt + +# Copy the rest of the project files into the container +COPY . . + +# Expose the port your app listens on +EXPOSE 8000 + CMD ["uvicorn", "image_viewer.app:app", "--reload"] diff --git a/image_viewer/__init__.py b/image_viewer/__init__.py index e69de29..be59a5b 100644 --- a/image_viewer/__init__.py +++ b/image_viewer/__init__.py @@ -0,0 +1,4 @@ +from cacheout import Cache + +cache = Cache(ttl=60) # 1 second + diff --git a/image_viewer/app.py b/image_viewer/app.py index 4f05940..7e176e0 100644 --- a/image_viewer/app.py +++ b/image_viewer/app.py @@ -3,12 +3,13 @@ import threading import uvicorn -from fastapi import FastAPI, HTTPException, Header, Cookie +from fastapi import FastAPI, HTTPException, Header, Cookie, Request from fastapi.responses import RedirectResponse from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict -from image_viewer.indexd_searcher import aviator_url +from image_viewer import cache +from image_viewer.indexd_searcher import redirection_url #AVIVATOR_URL = "https://avivator.gehlenborglab.org/?image_url=" AVIVATOR_URL = "/aviator/?image_url=" @@ -44,7 +45,8 @@ async def health_check(): summary="View Object", description="Redirects to a URL for the object.", responses={307: {"description": "Temporary Redirect"}}) -async def view_object(object_id: str, authorization: str = Header(None), access_token: str = Cookie(None)): +async def view_object(object_id: str, request: Request, authorization: str = Header(None), access_token: str = Cookie(None)): + """Create a view for the object, render a redirect.""" token = None @@ -62,7 +64,7 @@ async def view_object(object_id: str, authorization: str = Header(None), access_ try: logger.error(f"in view object {object_id} {settings.base_url}") - redirect_url = aviator_url(object_id, token, settings.base_url) + redirect_url = redirection_url(object_id, token, settings.base_url, request) logger.error(f"in view object {redirect_url}") return RedirectResponse(url=redirect_url) @@ -71,6 +73,23 @@ async def view_object(object_id: str, authorization: str = Header(None), access_ raise HTTPException(status_code=e.status_code, detail=str(e)) +@app.get("/ucsc/{token_hash}/{object_id}", + summary="UCSC Genome Browser Track definition", + description="Redirects to a URL for the object.", + responses={200: {"description": "Track config https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html"}}) +async def ucsc_track(token_hash: str, object_id: str, authorization: str = Header(None), access_token: str = Cookie(None)): + urls: dict = cache.get(f"{object_id}_{token_hash}") + if not urls: + raise HTTPException(status_code=404, detail="No signed URL found") + source = urls.get("source") + tbi = urls.get("tbi") + if not source or not tbi: + raise HTTPException(status_code=404, detail="No signed URL found") + return f""" + track type=vcf name="vcf" description="vcf" visibility=full bigDataUrl="{source}" bigDataIndex="{tbi}" + """ + + # Make the application multi-threaded def run_server(): uvicorn.run(app, host="0.0.0.0", port=8000, workers=4) # workers=4 makes the app multi-threaded. diff --git a/image_viewer/indexd_searcher.py b/image_viewer/indexd_searcher.py index 1fcdd2b..02c0f78 100644 --- a/image_viewer/indexd_searcher.py +++ b/image_viewer/indexd_searcher.py @@ -1,18 +1,91 @@ +import hashlib import urllib -from fastapi import HTTPException +from fastapi import HTTPException, Request from gen3.auth import Gen3Auth from gen3.file import Gen3File from gen3.index import Gen3Index + +from image_viewer import cache from image_viewer.object_signer import get_signed_url import logging +import re +from dataclasses import dataclass logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -def aviator_url(object_id: str, access_token: str, base_url: str) -> str: - """Return the URL for the Aviator image viewer. +@dataclass +class RegexEqual(str): + string: str + match: re.Match = None + + def __eq__(self, pattern): + self.match = re.search(pattern, self.string) + return self.match is not None + + +def aviator_url(source_record, base_url, file_service, index_service): + """Return the URL for the Aviator image viewer.""" + source_file_name = source_record["file_name"] + + offset_file_name = source_file_name.replace("ome.tiff", "offsets.json") + offset_file_name = offset_file_name.replace("ome.tif", "offsets.json") + offsets_records = index_service.query_urls(offset_file_name) + if not isinstance(offsets_records, list) or len(offsets_records) != 1: + raise HTTPException(status_code=404, + detail=f"Could not find object with file_name {offset_file_name} {offsets_records}") + offsets_record = offsets_records[0] + if "did" not in offsets_record: + raise HTTPException(status_code=404, detail=f"Could not find did within {offsets_record}") + offsets_object_id = offsets_record["did"] + + # get the signed url for the source object + object_id = source_record["did"] + source_signed_url = get_signed_url(object_id, file_service) + offsets_signed_url = get_signed_url(offsets_object_id, file_service) + + # Use the configurable base_url from settings + # we encode the signed url because it will contain special characters + redirect_url = f"{base_url}{urllib.parse.quote_plus(source_signed_url)}&offsets_url={urllib.parse.quote_plus(offsets_signed_url)}" + return redirect_url + + +def genome_browser_url(source_record, base_url, access_token, file_service, index_service): + """Return the URL for the genome browser.""" + vcf_file_name = source_record["file_name"] + + tbi_file_name = vcf_file_name + ".tbi" + tbi_records = index_service.query_urls(tbi_file_name) + if not isinstance(tbi_records, list) or len(tbi_records) != 1: + raise HTTPException(status_code=404, + detail=f"Could not find object with file_name {tbi_file_name} {tbi_records}") + tbi_record = tbi_records[0] + if "did" not in tbi_record: + raise HTTPException(status_code=404, detail=f"Could not find did within {tbi_record}") + tbi_object_id = tbi_record["did"] + + # get the signed url for the source object + object_id = source_record["did"] + source_signed_url = get_signed_url(object_id, file_service) + tbi_signed_url = get_signed_url(tbi_object_id, file_service) + + # Use the configurable base_url from settings + # we encode the signed url because it will contain special characters + access_token_hash = hashlib.md5(access_token.encode()).hexdigest() + cache.set(f"{object_id}_{access_token_hash}", { + "source": source_signed_url, + "tbi": tbi_signed_url + }) + # coordinate + hub_url = f"{base_url}/ucsc/{access_token_hash}/{object_id}" + redirect_url = f"http://genome.ucsc.edu/cgi-bin/hgTracks?hubUrl={urllib.parse.quote_plus(hub_url)}" + return redirect_url + + +def redirection_url(object_id: str, access_token: str, base_url: str, request: Request) -> str: + """Return the URL for the object. object_id: str The object ID of an ome.tif file to view access_token: str The access token to use for authentication base_url: str The base URL for the Aviator image viewer @@ -30,32 +103,25 @@ def aviator_url(object_id: str, access_token: str, base_url: str) -> str: file_service = Gen3File(auth) index_service = Gen3Index(auth) + logger.error(f"in redirection_url") + source_record = index_service.get(object_id) if not isinstance(source_record, dict): - raise HTTPException(status_code=500, detail=f"Could not find object with id {object_id} {source_record}") - logger.error(f"aviator_url source_record {source_record}") + raise HTTPException(status_code=404, detail=f"Could not find object with id {object_id} {source_record}") + + logger.error(f"redirection_url source_record {source_record}") + if "file_name" not in source_record: raise HTTPException(status_code=500, detail=f"Could not find file_name within {source_record}") - if "ome.tif" not in source_record["file_name"]: - raise HTTPException(status_code=500, detail=f"Expected file_name to contain 'ome.tif' {source_record}") - source_file_name = source_record["file_name"] - offset_file_name = source_file_name.replace("ome.tiff", "offsets.json") - offset_file_name = offset_file_name.replace("ome.tif", "offsets.json") - offsets_records = index_service.query_urls(offset_file_name) - if not isinstance(offsets_records, list) or len(offsets_records) != 1: - raise HTTPException(status_code=500, - detail=f"Could not find object with file_name {offset_file_name} {offsets_records}") - offsets_record = offsets_records[0] - if "did" not in offsets_record: - raise HTTPException(status_code=500, detail=f"Could not find did within {offsets_record}") - offsets_object_id = offsets_record["did"] + redirect_url = None + match RegexEqual(source_record['file_name']): + case "\\.ome.tif?": + redirect_url = aviator_url(source_record, base_url, file_service, index_service) + case "\\.vcf": + redirect_url = genome_browser_url(source_record, request.base_url, access_token, file_service, index_service) - # get the signed url for the source object - source_signed_url = get_signed_url(object_id, file_service) - offsets_signed_url = get_signed_url(offsets_object_id, file_service) + if not redirect_url: + raise HTTPException(status_code=500, detail=f"Could not match a viewer for {source_record['file_name']}") - # Use the configurable base_url from settings - # we encode the signed url because it will contain special characters - redirect_url = f"{base_url}{urllib.parse.quote_plus(source_signed_url)}&offsets_url={urllib.parse.quote_plus(offsets_signed_url)}" return redirect_url diff --git a/pyproject.toml b/pyproject.toml index 455d719..3e249c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "image_viewer" version = "0.1.0" description = "A FastAPI project for viewing images" authors = [ - { name = "Your Name", email = "your.email@example.com" } + { name = "Brian Walsh", email = "walsbr@ohsu.edu" } ] dependencies = [ "fastapi", @@ -15,7 +15,8 @@ dependencies = [ "requests", "pydantic-settings", "python-dotenv", - "gen3" + "gen3", + "cacheout" ] [project.optional-dependencies] diff --git a/tests/fixtures/vcf/vcfExample.vcf.gz b/tests/fixtures/vcf/vcfExample.vcf.gz new file mode 100644 index 0000000..625a0a7 Binary files /dev/null and b/tests/fixtures/vcf/vcfExample.vcf.gz differ diff --git a/tests/fixtures/vcf/vcfExample.vcf.gz.tbi b/tests/fixtures/vcf/vcfExample.vcf.gz.tbi new file mode 100644 index 0000000..e718568 Binary files /dev/null and b/tests/fixtures/vcf/vcfExample.vcf.gz.tbi differ diff --git a/tests/fixtures/vcf/vcfExampleTwo.vcf.gz b/tests/fixtures/vcf/vcfExampleTwo.vcf.gz new file mode 100644 index 0000000..7c8c114 Binary files /dev/null and b/tests/fixtures/vcf/vcfExampleTwo.vcf.gz differ diff --git a/tests/fixtures/vcf/vcfExampleTwo.vcf.gz.tbi b/tests/fixtures/vcf/vcfExampleTwo.vcf.gz.tbi new file mode 100644 index 0000000..cb49af4 Binary files /dev/null and b/tests/fixtures/vcf/vcfExampleTwo.vcf.gz.tbi differ diff --git a/tests/unit/app/conftest.py b/tests/unit/app/conftest.py index dd232c7..43dd012 100644 --- a/tests/unit/app/conftest.py +++ b/tests/unit/app/conftest.py @@ -1,6 +1,6 @@ import urllib.parse from importlib import reload - +from fastapi import Request from fastapi.testclient import TestClient import pytest import image_viewer.app @@ -41,17 +41,19 @@ def monkey_patch_aviator(monkeypatch): """Monkey patch the Aviator URL response.""" import image_viewer.indexd_searcher - def mock_aviator_url(object_id, access_token, base_url): + def mock_aviator_url(object_id, access_token, base_url, request: Request): """Mock the Aviator URL response""" + print("In mock_aviator_url") image_url = urllib.parse.quote_plus(f'https://image-{object_id}') offsets_url = urllib.parse.quote_plus(f'https://offsets-{object_id}') parms = f'image_url={image_url}&offsets_url={offsets_url}' _ = f"https://env-file-url.com/objects/?{parms}" - print(f"Mocked aviator_url: {object_id} {access_token} {base_url} -> {_}") + print(f"Mocked redirection_url: {object_id} {access_token} {base_url} -> {_}") return _ - monkeypatch.setattr(image_viewer.indexd_searcher, "aviator_url", mock_aviator_url) - print("Monkey patched aviator_url") + monkeypatch.setattr(image_viewer.indexd_searcher, "redirection_url", mock_aviator_url) + print("Monkey patched redirection_url") + # print(image_viewer.indexd_searcher.redirection_url("123", "456", "789", None)) def monkey_patch_signed_url(monkeypatch): diff --git a/tests/unit/app/test_auth_needed_env_variable.py b/tests/unit/app/test_auth_needed_env_variable.py index 51b300a..c17f352 100644 --- a/tests/unit/app/test_auth_needed_env_variable.py +++ b/tests/unit/app/test_auth_needed_env_variable.py @@ -16,11 +16,13 @@ def client_with_cookie_base_url(monkeypatch, base_url, valid_token): client_ = TestClient(image_viewer.app.app) client_.cookies.update({"access_token": valid_token}) + print(client_) yield client_ # Test setting base_url via environment variable def test_base_url_from_env_variable(monkeypatch, client_with_cookie_base_url, base_url): + print(client_with_cookie_base_url) object_id = "123" response = client_with_cookie_base_url.get(f"/view/{object_id}", follow_redirects=False) diff --git a/tests/unit/app/test_file_type.py b/tests/unit/app/test_file_type.py new file mode 100644 index 0000000..697fe7a --- /dev/null +++ b/tests/unit/app/test_file_type.py @@ -0,0 +1,32 @@ +from image_viewer.indexd_searcher import RegexEqual + + +def test_simple_match(): + match RegexEqual("Something to match"): + case "^...match": + print("Nope...") + case "^S.*ing$": + print("Closer...") + case "^S.*match$": + print("Yep!") + case _: + assert False, "Should not match anything else" + + +def test_extension_match(): + + match RegexEqual("/a/b/c/d.txt"): + case "\\.txt": + print("ok") + case _: + assert False, "Should not match anything else" + + for file_name in ["/a/b/c/d.ome.tif", "/a/b/c/d.ome.tiff", "/a/b/c/d.vcf.gz", "/a/b/c/d.vcf"]: + match RegexEqual(file_name): + case "\\.ome.tif?": + continue + case "\\.vcf": + continue + case _: + assert False, "Should not match anything else" + assert False, "Should not match anything else"