Skip to content

Commit 5ce2d6d

Browse files
committed
added reprocess functions for bed and bedsets and cli options
1 parent 60563bd commit 5ce2d6d

File tree

7 files changed

+223
-18
lines changed

7 files changed

+223
-18
lines changed

bedboss/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.1"
1+
__version__ = "0.6.0"

bedboss/bbuploader/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from pephubclient import PEPHubClient
99
from pephubclient.helpers import MessageHandler
1010
from pephubclient.models import SearchReturnModel
11-
from setuptools.command.egg_info import overwrite_arg
1211
from sqlalchemy import and_, select
1312
from sqlalchemy.orm import Session
1413

@@ -28,13 +27,14 @@
2827
from bedboss.bedbuncher.bedbuncher import run_bedbuncher
2928
from bedboss.exceptions import BedBossException
3029
from bedboss.skipper import Skipper
31-
from bedboss.utils import download_file, standardize_genome_name
30+
from bedboss.utils import calculate_time, download_file, standardize_genome_name
3231
from bedboss.utils import standardize_pep as pep_standardizer
3332

3433
_LOGGER = logging.getLogger(PKG_NAME)
3534
_LOGGER.setLevel(logging.DEBUG)
3635

3736

37+
@calculate_time
3838
def upload_all(
3939
bedbase_config: str,
4040
outfolder: str = os.getcwd(),
@@ -271,6 +271,7 @@ def find_peps(
271271
)
272272

273273

274+
@calculate_time
274275
def upload_gse(
275276
gse: str,
276277
bedbase_config: Union[str, BedBaseAgent],

bedboss/bedboss.py

Lines changed: 122 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1+
import datetime
12
import logging
23
import os
34
import subprocess
45
from typing import Union
56

67
import bbconf
7-
import yaml
88
import pephubclient
99
import peppy
1010
import pypiper
11+
import yaml
1112
from bbconf.bbagent import BedBaseAgent
1213
from bbconf.const import DEFAULT_LICENSE
1314
from bbconf.models.base_models import FileModel
1415
from eido import validate_project
15-
import datetime
16+
from geniml.bbclient import BBClient
1617
from pephubclient.helpers import MessageHandler as m
1718
from pephubclient.helpers import is_registry_path
18-
from geniml.bbclient import BBClient
1919

2020
from bedboss._version import __version__
2121
from bedboss.bedbuncher import run_bedbuncher
@@ -32,7 +32,7 @@
3232
)
3333
from bedboss.refgenome_validator.main import ReferenceValidator
3434
from bedboss.skipper import Skipper
35-
from bedboss.utils import get_genome_digest, standardize_genome_name, calculate_time
35+
from bedboss.utils import calculate_time, get_genome_digest, standardize_genome_name
3636
from bedboss.utils import standardize_pep as pep_standardizer
3737

3838
_LOGGER = logging.getLogger(PKG_NAME)
@@ -50,6 +50,7 @@ def requirements_check() -> None:
5050
)
5151

5252

53+
@calculate_time
5354
def run_all(
5455
input_file: str,
5556
input_type: str,
@@ -264,6 +265,7 @@ def run_all(
264265
return bed_metadata.bed_digest
265266

266267

268+
@calculate_time
267269
def insert_pep(
268270
bedbase_config: str,
269271
output_folder: str,
@@ -278,6 +280,7 @@ def insert_pep(
278280
ensdb: str = None,
279281
just_db_commit: bool = False,
280282
force_overwrite: bool = False,
283+
update: bool = False,
281284
upload_s3: bool = False,
282285
upload_pephub: bool = False,
283286
upload_qdrant: bool = False,
@@ -306,6 +309,7 @@ def insert_pep(
306309
:param str ensdb: a full path to the ensdb gtf file required for genomes not in GDdata
307310
:param bool just_db_commit: whether save only to the database (Without saving locally )
308311
:param bool force_overwrite: whether to overwrite the existing record
312+
:param bool update: whether to update the record in the database. This option will overwrite the force_overwrite option. [Default: False]
309313
:param bool upload_s3: whether to upload to s3
310314
:param bool upload_pephub: whether to push bedfiles and metadata to pephub (default: False)
311315
:param bool upload_qdrant: whether to execute qdrant indexing
@@ -378,6 +382,7 @@ def insert_pep(
378382
ensdb=ensdb,
379383
just_db_commit=just_db_commit,
380384
force_overwrite=force_overwrite,
385+
update=update,
381386
upload_qdrant=upload_qdrant,
382387
upload_s3=upload_s3,
383388
upload_pephub=upload_pephub,
@@ -427,12 +432,12 @@ def insert_pep(
427432

428433

429434
@calculate_time
430-
def run_unprocessed_beds(
435+
def reprocess_all(
431436
bedbase_config: Union[str, BedBaseAgent],
432437
output_folder: str,
433438
limit: int = 10,
434439
nofail: bool = False,
435-
):
440+
) -> None:
436441
"""
437442
Run bedboss pipeline for all unprocessed beds in the bedbase
438443
@@ -504,7 +509,7 @@ def run_unprocessed_beds(
504509
) as file:
505510
yaml.dump(failed_samples, file)
506511

507-
from rich import print
512+
m.print_warning(f"Logs with failed samples are saved in {output_folder}")
508513

509514
m.print_success(f"Processing completed successfully")
510515

@@ -515,3 +520,113 @@ def run_unprocessed_beds(
515520
success_files=unprocessed_beds.limit - len(failed_samples),
516521
)
517522
print(print_values)
523+
524+
525+
@calculate_time
526+
def reprocess_one(
527+
bedbase_config: Union[str, BedBaseAgent],
528+
output_folder: str,
529+
identifier: str,
530+
) -> None:
531+
"""
532+
Run bedboss pipeline for one bed in the bedbase [Reprocess]
533+
534+
:param bedbase_config: bedbase configuration file path
535+
:param output_folder: output folder of the pipeline
536+
:param identifier: bed identifier
537+
538+
:return: None
539+
"""
540+
541+
if isinstance(bedbase_config, str):
542+
bbagent = BedBaseAgent(config=bedbase_config)
543+
elif isinstance(bedbase_config, bbconf.BedBaseAgent):
544+
bbagent = bedbase_config
545+
else:
546+
raise BedBossException("Incorrect bedbase_config type. Exiting...")
547+
548+
bbclient = BBClient()
549+
550+
bed_annot = bbagent.bed.get(identifier)
551+
bed_file = bbclient.load_bed(bed_annot.id)
552+
553+
run_all(
554+
input_file=bed_file.path,
555+
input_type="bed",
556+
outfolder=output_folder,
557+
genome=bed_annot.genome_alias,
558+
bedbase_config=bbagent,
559+
name=bed_annot.name,
560+
license_id=bed_annot.license_id,
561+
rfg_config=None,
562+
check_qc=False,
563+
validate_reference=True,
564+
chrom_sizes=None,
565+
open_signal_matrix=None,
566+
ensdb=None,
567+
other_metadata=None,
568+
just_db_commit=False,
569+
update=True,
570+
upload_qdrant=True,
571+
upload_s3=True,
572+
upload_pephub=True,
573+
light=False,
574+
universe=False,
575+
universe_method=None,
576+
universe_bedset=None,
577+
pm=None,
578+
)
579+
580+
_LOGGER.info(f"Successfully processed {identifier}")
581+
582+
583+
@calculate_time
584+
def reprocess_bedset(
585+
bedbase_config: Union[str, BedBaseAgent],
586+
output_folder: str,
587+
identifier: str,
588+
no_fail: bool = True,
589+
heavy: bool = False,
590+
):
591+
"""
592+
Recalculate bedset from the bedbase
593+
594+
:param bedbase_config: bedbase configuration file path
595+
:param output_folder: output folder of the pipeline
596+
:param identifier: bedset identifier
597+
:param no_fail: whether to raise an error if bedset was not added to the database
598+
:param heavy: whether to use heavy processing. Calculate plots for bedset
599+
600+
:return: None
601+
"""
602+
603+
if isinstance(bedbase_config, str):
604+
bbagent = BedBaseAgent(config=bedbase_config)
605+
elif isinstance(bedbase_config, bbconf.BedBaseAgent):
606+
bbagent = bedbase_config
607+
else:
608+
raise BedBossException("Incorrect bedbase_config type. Exiting...")
609+
610+
bedset_annot = bbagent.bedset.get(identifier)
611+
612+
run_bedbuncher(
613+
bedbase_config=bbagent,
614+
record_id=bedset_annot.id,
615+
bed_set=bedset_annot.bed_ids,
616+
name=bedset_annot.name,
617+
output_folder=output_folder,
618+
description=bedset_annot.description,
619+
heavy=heavy,
620+
upload_pephub=False,
621+
upload_s3=heavy,
622+
no_fail=no_fail,
623+
force_overwrite=True,
624+
annotation={
625+
**bedset_annot.model_dump(
626+
exclude={
627+
"bed_ids",
628+
}
629+
)
630+
},
631+
light=False,
632+
)

bedboss/cli.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ def run_all(
8989
force_overwrite: bool = typer.Option(
9090
False, help="Force overwrite the output files"
9191
),
92+
update: bool = typer.Option(
93+
False,
94+
help="Update the bedbase database with the new record if it exists. This overwrites 'force_overwrite' option",
95+
),
9296
light: bool = typer.Option(
9397
False, help="Run the pipeline in light mode. [Default: False]"
9498
),
@@ -135,6 +139,7 @@ def run_all(
135139
light=light,
136140
just_db_commit=just_db_commit,
137141
force_overwrite=force_overwrite,
142+
update=update,
138143
upload_qdrant=upload_qdrant,
139144
upload_s3=upload_s3,
140145
upload_pephub=upload_pephub,
@@ -168,6 +173,10 @@ def run_pep(
168173
force_overwrite: bool = typer.Option(
169174
False, help="Force overwrite the output files"
170175
),
176+
update: bool = typer.Option(
177+
False,
178+
help="Update the bedbase database with the new record if it exists. This overwrites 'force_overwrite' option",
179+
),
171180
upload_qdrant: bool = typer.Option(True, help="Upload to Qdrant"),
172181
upload_s3: bool = typer.Option(True, help="Upload to S3"),
173182
upload_pephub: bool = typer.Option(True, help="Upload to PEPHub"),
@@ -200,6 +209,7 @@ def run_pep(
200209
ensdb=ensdb,
201210
just_db_commit=just_db_commit,
202211
force_overwrite=force_overwrite,
212+
update=update,
203213
license_id=license_id,
204214
upload_s3=upload_s3,
205215
upload_pephub=upload_pephub,
@@ -218,6 +228,75 @@ def run_pep(
218228
)
219229

220230

231+
@app.command(help="Run unprocessed files, or reprocess them")
232+
def reprocess_all(
233+
bedbase_config: str = typer.Option(
234+
...,
235+
help="Path to the bedbase config file",
236+
exists=True,
237+
file_okay=True,
238+
readable=True,
239+
),
240+
outfolder: str = typer.Option(..., help="Path to the output folder"),
241+
limit: int = typer.Option(100, help="Limit the number of files to reprocess"),
242+
no_fail: bool = typer.Option(True, help="Do not fail on error"),
243+
):
244+
from bedboss.bedboss import reprocess_all as reprocess_all_function
245+
246+
reprocess_all(
247+
bedbase_config=bedbase_config,
248+
output_folder=outfolder,
249+
limit=limit,
250+
no_fail=no_fail,
251+
)
252+
253+
254+
@app.command(help="Run unprocessed file, or reprocess it [Only 1 file]")
255+
def reprocess_one(
256+
bedbase_config: str = typer.Option(
257+
...,
258+
help="Path to the bedbase config file",
259+
exists=True,
260+
file_okay=True,
261+
readable=True,
262+
),
263+
outfolder: str = typer.Option(..., help="Path to the output folder"),
264+
identifier: str = typer.Option(..., help="Identifier of the bed file"),
265+
):
266+
from bedboss.bedboss import reprocess_one as reprocess_one_function
267+
268+
reprocess_one(
269+
bedbase_config=bedbase_config,
270+
output_folder=outfolder,
271+
identifier=identifier,
272+
)
273+
274+
275+
@app.command(help="Reprocess a bedset")
276+
def reprocess_bedset(
277+
bedbase_config: str = typer.Option(
278+
...,
279+
help="Path to the bedbase config file",
280+
exists=True,
281+
file_okay=True,
282+
readable=True,
283+
),
284+
outfolder: str = typer.Option(..., help="Path to the output folder"),
285+
identifier: str = typer.Option(..., help="Bedset ID"),
286+
no_fail: bool = typer.Option(True, help="Do not fail on error"),
287+
heavy: bool = typer.Option(False, help="Run the heavy version of the pipeline"),
288+
):
289+
from bedboss.bedboss import reprocess_bedset as reprocess_bedset_function
290+
291+
reprocess_bedset_function(
292+
bedbase_config=bedbase_config,
293+
output_folder=outfolder,
294+
identifier=identifier,
295+
no_fail=no_fail,
296+
heavy=heavy,
297+
)
298+
299+
221300
@app.command(help=f"Create a bed files form a [{', '.join(options_list)}] file")
222301
def make_bed(
223302
input_file: str = typer.Option(

bedboss/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
import glob
22
import logging
33
import os
4-
import urllib.request
54
import time
5+
import urllib.request
6+
from functools import wraps
67

78
import peppy
89
import requests
910
from bedms import AttrStandardizer
1011
from pephubclient.files_manager import FilesManager
1112
from peppy.const import SAMPLE_RAW_DICT_KEY
1213
from pypiper import PipelineManager
13-
from functools import wraps
1414

1515
from bedboss.refgenome_validator.main import ReferenceValidator
1616

@@ -51,7 +51,6 @@ def standardize_genome_name(input_genome: str, bedfile: str = None) -> str:
5151
return input_genome
5252

5353

54-
# %%
5554
def download_file(url: str, path: str, no_fail: bool = False) -> None:
5655
"""
5756
Download file from the url to specific location

0 commit comments

Comments
 (0)