Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/pip/python_dependencies/processin…
Browse files Browse the repository at this point in the history
…g/rpy2-3.5.16
  • Loading branch information
Bento007 authored May 9, 2024
2 parents 0068abe + 42f40e9 commit 0da5b60
Show file tree
Hide file tree
Showing 116 changed files with 789 additions and 447 deletions.
12 changes: 11 additions & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ updates:
interval: "weekly"
day: "sunday"
assignees:
- "danieljhegeman"
- "Bento007"
- package-ecosystem: pip
directory: "/python_dependencies/common/"
open-pull-requests-limit: 3
Expand All @@ -63,6 +63,16 @@ updates:
day: "sunday"
assignees:
- "nayib-jose-gloria"
groups:
formatters:
patterns:
- "black"
- "ruff"
test-runners:
patterns:
- "pytest"
- "coverage"
- "allure"
- package-ecosystem: npm
directory: "/frontend/"
open-pull-requests-limit: 3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-images-and-create-deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
uses: avakar/create-deployment@v1
# To stop deployment to a specific DEPLOYMENT_STAGE remove it from condition below.
# The DEPLOYMENT_STAGE that should be present are dev, stage, prod.
if: env.DEPLOYMENT_STAGE == 'prod' || env.DEPLOYMENT_STAGE == 'stage'
if: env.DEPLOYMENT_STAGE == 'prod' || env.DEPLOYMENT_STAGE == 'stage' || env.DEPLOYMENT_STAGE == 'dev'
with:
auto_merge: false
environment: ${{ env.DEPLOYMENT_STAGE }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ jobs:
main:
runs-on: ubuntu-latest
steps:
- uses: amannn/action-semantic-pull-request@v5.4.0
- uses: amannn/action-semantic-pull-request@v5.5.2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 changes: 7 additions & 7 deletions .github/workflows/push-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -142,7 +142,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -201,7 +201,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -260,7 +260,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -316,7 +316,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -372,7 +372,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down Expand Up @@ -431,7 +431,7 @@ jobs:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44.0.0
uses: tj-actions/changed-files@v44.3.0
with:
files: |
Dockerfile*
Expand Down
6 changes: 3 additions & 3 deletions .happy/terraform/modules/schema_migration/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ resource aws_sfn_state_machine sfn_schema_migration {
},
{
"Name": "EXECUTION_ID",
"Value.$": "$$.Execution.Name"
"Value.$": "$.execution_id"
}
]
}
Expand Down Expand Up @@ -351,7 +351,7 @@ resource aws_sfn_state_machine sfn_schema_migration {
},
{
"Name": "EXECUTION_ID",
"Value.$": "$$.Execution.Name"
"Value.$": "$.execution_id"
}
]
}
Expand Down Expand Up @@ -418,7 +418,7 @@ resource aws_sfn_state_machine sfn_schema_migration {
},
{
"Name": "EXECUTION_ID",
"Value.$": "$$.Execution.Name"
"Value.$": "$.execution_id"
}
]
}
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 22.3.0 # Must be kept in sync with black version in requirements.txt
rev: 24.3.0 # Must be kept in sync with black version in python_dependencies/common/requirements.txt
hooks:
- id: black
stages: [commit]
Expand All @@ -14,7 +14,7 @@ repos:
exclude_types: ["json"]

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.239
rev: v0.4.3 # Must be kept in sync with ruff version in python_dependencies/common/requirements.txt
hooks:
- id: ruff
args: ["--config=pyproject.toml", "--fix"]
Expand Down
7 changes: 3 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ export COVERAGE_RUN_ARGS:=--data-file=$(COVERAGE_DATA_FILE) --parallel-mode $(CO

.PHONY: fmt
fmt:
black --config=pyproject.toml backend scripts tests
ruff check --fix --config=pyproject.toml backend tests scripts
pre-commit run --all-files

.PHONY: lint
lint:
Expand All @@ -35,7 +34,7 @@ wmg-processing-unittest:
cellguide-pipeline-unittest:
# This target is intended to be run INSIDE the cellguide pipeline container
DEPLOYMENT_STAGE=test PYTHONWARNINGS=ignore:ResourceWarning coverage run $(COVERAGE_RUN_ARGS) -m pytest \
tests/unit/cellguide_pipeline/ --rootdir=. --alluredir=./allure-results --verbose;
tests/unit/backend/cellguide/pipeline/ --rootdir=. --alluredir=./allure-results --verbose;

.PHONY: functional-test
functional-test:
Expand Down Expand Up @@ -233,4 +232,4 @@ coverage/report-html: coverage/combine

.PHONY: promote-staging-to-prod
promote-staging-to-prod:
./scripts/promote_staging_to_prod.sh
./scripts/promote_staging_to_prod.sh
1 change: 1 addition & 0 deletions backend/api_server/request_id.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Inspired by: http://blog.mcpolemic.com/2016/01/18/adding-request-ids-to-flask.html
"""

import logging
import uuid

Expand Down
83 changes: 66 additions & 17 deletions backend/cellguide/api/cellguide-api.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
openapi: 3.0.0
info:
version: "1.0.0"
title: Chan Zuckerberg Initiative CELLxGENE Discover API
title: Chan Zuckerberg Initiative CELLxGENE CellGuide API
termsOfService: https://cellxgene.cziscience.com/tos
contact:
name: API Support
email: cellxgene@chanzuckerberg.com
description: |
## Uploads for CellGuide Descriptions
## Overview of Available Endpoints for the CELLxGENE CellGuide API
servers:
- description: Production environment
url: https://api.cellxgene.cziscience.com/
Expand Down Expand Up @@ -47,21 +47,80 @@ paths:
$ref: "#/components/responses/401"
"403":
$ref: "#/components/responses/403"
# This is commented out until product aligns on the CellGuide API
# Leaving this here for reference
# /v1/marker_genes:
# get:
# summary: |
# Get all CellGuide marker genes.
# operationId: backend.cellguide.api.v1.marker_genes.actions.get
# tags:
# - CellGuide
# responses:
# "200":
# description: "Successful retrieval of marker genes data."
# content:
# application/json:
# schema:
# $ref: "#/components/schemas/marker_gene_data"
# "404":
# description: "No data found for the specified parameters."
# content:
# application/json:
# schema:
# $ref: "#/components/schemas/problem"

components:
schemas:
cell_ontology_term_id_fs:
description: File system-compatible cell ontology term ID (with underscores)
example: CL_0000030
type: string
marker_gene_data:
description: Details of a marker gene, which can vary in structure based on the specificity of the query (organism, tissue, cell type). Can be a nested structure or a direct list.
type: object
additionalProperties: # This allows any property name for organism
type: object
additionalProperties: # This allows any property name for tissue
type: object
additionalProperties: # This allows any property name for cell type
type: array
items:
$ref: "#/components/schemas/marker_gene_entry"
marker_gene_entry:
type: object
properties:
gene:
type: string
description: The identifier for the gene
marker_score:
type: number
description: Score indicating the strength of the marker
me:
type: number
description: Mean expression of the marker gene
pc:
type: number
description: Percent of cells in which the marker gene is expressed
problem:
description: Error message container for HTTP APIs.
properties:
detail:
type: string
title:
type: string
type:
type: string
type: object
cellguide_description:
properties:
cell_ontology_term_id:
$ref: "#/components/schemas/cell_ontology_term_id"
$ref: "#/components/schemas/cell_ontology_term_id_fs"
description:
$ref: "#/components/schemas/cell_description"
references:
$ref: "#/components/schemas/references"
type: object
cell_ontology_term_id:
description: Cell Anthology ID
example: CL_0000030
type: string
cell_description:
description: A description of the cell type
type: string
Expand All @@ -70,16 +129,6 @@ components:
items:
type: string
type: array
problem:
description: Error message container for HTTP APIs.
properties:
detail:
type: string
title:
type: string
type:
type: string
type: object
responses:
"200":
description: OK
Expand Down
68 changes: 68 additions & 0 deletions backend/cellguide/api/common/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import gzip
import json
import os
from collections import defaultdict

from backend.cellguide.common.config import CellGuideConfig
from backend.cellguide.common.constants import COMPUTATIONAL_MARKER_GENES_FOLDERNAME, MARKER_GENE_PRESENCE_FILENAME
from backend.cellguide.common.providers.s3_provider import S3Provider
from backend.cellguide.common.utils import get_object_key


def _defaultdict_to_dict(d):
if isinstance(d, defaultdict):
# Convert the defaultdict to a dict and recursively apply this function
return {key: _defaultdict_to_dict(value) for key, value in d.items()}
else:
return d


def _initialize_cellguide_marker_gene_dict():
bucket = CellGuideConfig().bucket
s3_provider = S3Provider()

latest_snapshot_identifier = (
s3_provider.download_file(bucket_name=bucket, object_key=get_object_key(object="latest_snapshot_identifier"))
.decode("utf-8")
.strip()
)
compressed_data = s3_provider.download_file(
bucket_name=bucket,
object_key=get_object_key(
object=f"{latest_snapshot_identifier}/{COMPUTATIONAL_MARKER_GENES_FOLDERNAME}/{MARKER_GENE_PRESENCE_FILENAME}"
),
)
marker_gene_data = json.loads(gzip.decompress(compressed_data).decode("utf-8"))
data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

for gene in marker_gene_data:
for organism in marker_gene_data[gene]:
for tissue in marker_gene_data[gene][organism]:
for marker in marker_gene_data[gene][organism][tissue]:
data[organism][tissue][marker["cell_type_id"]].append(
{"marker_score": marker["marker_score"], "me": marker["me"], "pc": marker["pc"], "gene": gene}
)

data = _defaultdict_to_dict(data)

for organism in data:
for tissue in data[organism]:
for cell_type in data[organism][tissue]:
data[organism][tissue][cell_type].sort(key=lambda x: -x["marker_score"])

return data


_marker_gene_data_cache = None


def get_marker_gene_data():
global _marker_gene_data_cache
if _marker_gene_data_cache is None:
if os.getenv("DEPLOYMENT_STAGE") != "test":
# Initialize the marker gene data from the latest snapshot only if not in test mode
_marker_gene_data_cache = _initialize_cellguide_marker_gene_dict()
else:
# Initialize an empty structure if in test mode
_marker_gene_data_cache = {}
return _marker_gene_data_cache
23 changes: 23 additions & 0 deletions backend/cellguide/api/v1/marker_genes/actions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from flask import jsonify, make_response

from backend.cellguide.api.common.data import get_marker_gene_data


def get():
"""
Retrieve all CellGuide marker gene data.
This function handles the retrieval of all CellGuide marker gene data.
Returns:
Flask Response: JSON data of the marker genes.
The response structure is a nested dictionary with the following structure:
- organisms --> tissues --> cell types --> list(marker genes).
Organisms and tissues are labels, cell types are IDs, and marker genes are dictionaries with the following keys:
- `marker_score`: The score indicating the strength of the marker gene for the cell type.
- `me`: Mean expression of the gene across the cells of the specified type.
- `pc`: Percentage of cells within the specified type that express the gene.
- `gene`: The gene symbol associated with the marker gene data.
"""
return make_response(jsonify(get_marker_gene_data()), 200)
File renamed without changes.
Loading

0 comments on commit 0da5b60

Please sign in to comment.