Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch pending tower analyses #506

Merged
merged 8 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,20 @@ def tower_tasks_response():

@pytest.fixture
def tower_workflow_response() -> TowerWorkflowResponse:
worfklow = TowerWorkflow(status="RUNNING")
workflow = TowerWorkflow(status="RUNNING")
progress = TowerProgress(workflowProgress={}, processesProgress=[])
return TowerWorkflowResponse(
workflow=worfklow,
workflow=workflow,
progress=progress,
)


@pytest.fixture
def tower_response_submitted() -> TowerWorkflowResponse:
workflow = TowerWorkflow(status="SUBMITTED")
progress = TowerProgress(workflowProgress={}, processesProgress=[])
return TowerWorkflowResponse(
workflow=workflow,
progress=progress,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/services/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from datetime import datetime, timedelta
from unittest.mock import MagicMock, Mock

import pytest
from sqlalchemy.orm import Session


from trailblazer.clients.tower.tower_client import TowerAPIClient
from trailblazer.constants import (
PRIORITY_OPTIONS,
Expand Down
40 changes: 39 additions & 1 deletion tests/services/test_job_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from trailblazer.constants import TrailblazerStatus
import pytest

from trailblazer.clients.tower.models import TowerWorkflowResponse
from trailblazer.constants import TrailblazerStatus, WorkflowManager
from trailblazer.exceptions import NoJobsError
from trailblazer.services.job_service import JobService
from trailblazer.store.models import Analysis

Expand Down Expand Up @@ -57,3 +61,37 @@ def test_analysis_status_when_running_jobs(

# THEN the status is running
assert status == TrailblazerStatus.RUNNING


def test_fetch_pending_status_for_tower_without_jobs(
job_service: JobService,
tower_response_submitted: TowerWorkflowResponse,
):
"""
Verify that a Tower-managed analysis with no associated jobs returns a PENDING status
when Tower reports the workflow as submitted.
"""
# GIVEN a Tower analysis with workflow manager nf_tower and without jobs
analysis: Analysis = job_service.store.get_query(Analysis).first()
analysis.workflow_manager = WorkflowManager.TOWER
assert not analysis.jobs

# GIVEN a simulated response from tower with the return status being submitted
job_service.tower_service.client.get_workflow.return_value = tower_response_submitted

# WHEN fetching the analysis status
status: TrailblazerStatus = job_service.get_analysis_status(analysis.id)

# THEN the analysis status should be PENDING
assert status == TrailblazerStatus.PENDING


def test_no_jobs_error_for_slurm_analysis(job_service: JobService, analysis_without_jobs: Analysis):
"""
Ensure that a NoJobsError is raised for a SLURM analysis with no associated jobs.
"""
# GIVEN an analysis without any associated jobs
# WHEN fetching the analysis status
# THEN a NoJobsError should be raised
with pytest.raises(NoJobsError):
job_service.get_analysis_status(analysis_without_jobs.id)
37 changes: 26 additions & 11 deletions trailblazer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,32 @@ class TrailblazerStatusColor(StrEnum):
RUNNING: str = "blue"


TOWER_WORKFLOW_STATUS: dict[str, str] = {
"ABORTED": TrailblazerStatus.FAILED,
"CACHED": TrailblazerStatus.COMPLETED,
"CANCELLED": TrailblazerStatus.CANCELLED,
"COMPLETED": TrailblazerStatus.COMPLETED,
"FAILED": TrailblazerStatus.FAILED,
"NEW": TrailblazerStatus.PENDING,
"RUNNING": TrailblazerStatus.RUNNING,
"SUBMITTED": TrailblazerStatus.PENDING,
"SUCCEEDED": TrailblazerStatus.COMPLETED,
"UNKNOWN": TrailblazerStatus.FAILED,
class TowerStatus(StrEnum):
"""Tower statuses."""

ABORTED: str = "ABORTED"
CACHED: str = "CACHED"
CANCELLED: str = "CANCELLED"
COMPLETED: str = "COMPLETED"
FAILED: str = "FAILED"
NEW: str = "NEW"
RUNNING: str = "RUNNING"
SUBMITTED: str = "SUBMITTED"
SUCCEEDED: str = "SUCCEEDED"
UNKNOWN: str = "UNKNOWN"


TOWER_WORKFLOW_STATUS: dict[str, TrailblazerStatus] = {
TowerStatus.ABORTED: TrailblazerStatus.FAILED,
TowerStatus.CACHED: TrailblazerStatus.COMPLETED,
TowerStatus.CANCELLED: TrailblazerStatus.CANCELLED,
TowerStatus.COMPLETED: TrailblazerStatus.COMPLETED,
TowerStatus.FAILED: TrailblazerStatus.FAILED,
TowerStatus.NEW: TrailblazerStatus.PENDING,
TowerStatus.RUNNING: TrailblazerStatus.RUNNING,
TowerStatus.SUBMITTED: TrailblazerStatus.PENDING,
TowerStatus.SUCCEEDED: TrailblazerStatus.COMPLETED,
TowerStatus.UNKNOWN: TrailblazerStatus.FAILED,
}


Expand Down
10 changes: 5 additions & 5 deletions trailblazer/dto/create_analysis_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

class CreateAnalysisRequest(BaseModel):
case_id: str
email: str | None = None
config_path: str
out_dir: str
email: str | None = None
is_hidden: bool | None = None
order_id: int | None = None
out_dir: str
priority: TrailblazerPriority
workflow: str | None = None
ticket: str | None = None
tower_workflow_id: str | None = None
type: TrailblazerTypes
workflow: str | None = None
workflow_manager: WorkflowManager | None = None
tower_workflow_id: str | None = None
is_hidden: bool | None = None
6 changes: 3 additions & 3 deletions trailblazer/services/job_service/job_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ def get_analysis_status(self, analysis_id: int) -> TrailblazerStatus:
if analysis.status == TrailblazerStatus.CANCELLED:
return TrailblazerStatus.CANCELLED

if not analysis.jobs:
raise NoJobsError(f"No jobs found for analysis {analysis_id}")

if analysis.workflow_manager == WorkflowManager.TOWER:
return self.tower_service.get_status(analysis_id)

if not analysis.jobs:
raise NoJobsError(f"No jobs found for analysis {analysis_id}")

return get_status(analysis.jobs)

def get_analysis_progression(self, analysis_id: int) -> float:
Expand Down
4 changes: 3 additions & 1 deletion trailblazer/services/tower/tower_api_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def cancel_jobs(self, analysis_id: int) -> None:
def get_status(self, analysis_id: int) -> TrailblazerStatus:
analysis: Analysis = self.store.get_analysis_with_id(analysis_id)
response = self.client.get_workflow(analysis.tower_workflow_id)
status = TOWER_WORKFLOW_STATUS.get(response.workflow.status, TrailblazerStatus.ERROR)
status: TrailblazerStatus = TOWER_WORKFLOW_STATUS.get(
response.workflow.status, TrailblazerStatus.ERROR
)
if status == TrailblazerStatus.COMPLETED:
return TrailblazerStatus.QC
return status
Loading