diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml new file mode 100755 index 00000000..0e47ef9b --- /dev/null +++ b/.github/workflows/pypi_publish.yml @@ -0,0 +1,49 @@ +name: Publish package python distribution to Pypi + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.12.7 + - name: Install pypi/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: Publish dist to PyPI + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/buisciii-tools + permissions: + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index b76da9b9..2050d306 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,14 +8,63 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Credits +### Template fixes and updates + +### Modules + +#### Added enhancements + +#### Fixes + +#### Changed + +#### Removed + +### Requirements + +## [2.X.Xhot] - 2024-0X-0X : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.X.2 + +### Credits + +### Template fixes and updates + +### Modules + +#### Added enhancements + +#### Fixes + +#### Changed + +#### Removed + +### Requirements + +## [2.2.2] - 2024-10-28 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.2 + +### Credits + Code contributions to the new version: +- [Pablo Mata](https://github.com/Shettland) +- [Victor Lopez](https://github.com/victor5lm) + ### Template fixes and updates +- Updated the fix-permissions module in __main__.py [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Fixed the singularity cache directory in taxprofiler.config [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Updated sftp_user.json [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Fixed viralrecon's lablog and the remove_columns_mapping_table.sh auxiliary script [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Updated the singularity image in the mtbseq templates [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Fixed a bug in bioinfo_doc.py [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). +- Updated new_service.py to check FASTQ integrity via md5sum [#356](https://github.com/BU-ISCIII/buisciii-tools/pull/356). + ### Modules #### Added enhancements +- Included a new github action to automatically publish releases to pypi [#351](https://github.com/BU-ISCIII/buisciii-tools/pull/351) + #### Fixes #### Changed diff --git a/README.md b/README.md index da5522a9..dddebc62 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ BU-ISCIII provides a serie or services in its portfolio for supporting bioinform ```bash micromamba create -n buisciii -f environment.yml micromamba activate buisciii -pip install --force-reinstall --upgrade git+https://github.com/bu-isciii/buisciii-tools.git@main +pip install buisciii-tools ``` or @@ -40,7 +40,7 @@ or git checkout main conda create -n buisciii -f environment.yml conda activate -pip install . +pip install buisciii-tools ``` ### Dev version diff --git a/bu_isciii/__main__.py b/bu_isciii/__main__.py index 1aebd780..a7e958ae 100755 --- a/bu_isciii/__main__.py +++ b/bu_isciii/__main__.py @@ -57,7 +57,7 @@ def run_bu_isciii(): ) # stderr.print("[green] `._,._,'\n", highlight=False) - __version__ = "2.2.1" + __version__ = "2.2.2" stderr.print( "[grey39] BU-ISCIII-tools version {}".format(__version__), highlight=False ) @@ -655,6 +655,7 @@ def autoclean_sftp(ctx, sftp_folder, days): "-d", "--input_directory", type=click.Path(), + multiple=True, default=None, required=True, help="Input directory to fix permissions (absolute path)", @@ -664,15 +665,18 @@ def fix_permissions(ctx, input_directory): """ Fix permissions """ - if not os.path.isdir(input_directory): - exit("Invalid input directory") conf = bu_isciii.config_json.ConfigJson() permissions = conf.get_configuration("global").get("permissions") - bu_isciii.utils.remake_permissions(input_directory, permissions) stderr = rich.console.Console( stderr=True, force_terminal=bu_isciii.utils.rich_force_colors() ) - stderr.print(f"[green]Correct permissions were applied to {input_directory}") + + for directory in input_directory: + if not os.path.isdir(directory): + stderr.print(f"[red]Invalid input directory: {directory}") + continue + bu_isciii.utils.remake_permissions(directory, permissions) + stderr.print(f"[green]Correct permissions were applied to {directory}") if __name__ == "__main__": diff --git a/bu_isciii/bioinfo_doc.py b/bu_isciii/bioinfo_doc.py index 519cb382..4654f9ed 100644 --- a/bu_isciii/bioinfo_doc.py +++ b/bu_isciii/bioinfo_doc.py @@ -715,7 +715,7 @@ def send_email(self, html_text, results_pdf_file): server.ehlo() server.login(user=email_host_user, password=email_host_password) except Exception as e: - stderr.print("[red] Unable to send e-mail" + e) + stderr.print("[red] Unable to send e-mail: " + str(e)) default_cc = "bioinformatica@isciii.es" msg = MIMEMultipart("alternative") msg["To"] = self.resolution_info["service_user_id"]["email"] diff --git a/bu_isciii/new_service.py b/bu_isciii/new_service.py index 0bcf6556..e22bcde1 100755 --- a/bu_isciii/new_service.py +++ b/bu_isciii/new_service.py @@ -8,6 +8,7 @@ import json import shutil import rich +import subprocess # Local imports import bu_isciii @@ -88,7 +89,35 @@ def __init__( ) self.full_path = os.path.join(self.path, self.service_folder) + def check_md5(self): + # Path to the .md5 file + project_name = self.service_samples[0]["project_name"] + md5_file_path = ( + f'{self.conf["fastq_repo"]}/{project_name}/md5sum_{project_name}.md5' + ) + if not os.path.exists(md5_file_path): + stderr.print(f"[red]ERROR: .md5 file not found at {md5_file_path}") + sys.exit(1) + + original_dir = os.getcwd() + md5_dir = os.path.dirname(md5_file_path) + os.chdir(md5_dir) + + # md5sum command + stderr.print(f"[blue]Checking MD5 integrity for {md5_file_path}") + try: + subprocess.run( + ["md5sum", "-c", os.path.basename(md5_file_path)], check=True + ) + stderr.print("[green]MD5 check passed!") + except subprocess.CalledProcessError as e: + stderr.print(f"[red]ERROR: MD5 check failed: {e.stderr}") + sys.exit(1) + finally: + os.chdir(original_dir) + def create_folder(self): + self.check_md5() if not self.no_create_folder: stderr.print( "[blue]I will create the service folder for " + self.resolution_id + "!" diff --git a/bu_isciii/templates/mag/DOC/taxprofiler.config b/bu_isciii/templates/mag/DOC/taxprofiler.config index 60569c0a..1d05137f 100644 --- a/bu_isciii/templates/mag/DOC/taxprofiler.config +++ b/bu_isciii/templates/mag/DOC/taxprofiler.config @@ -5,7 +5,7 @@ singularity { enabled = true autoMounts = true - singularity.cacheDir = '/data/cnm/ratb/pipelines/singularity-images/' + singularity.cacheDir = '/data/bi/pipelines/singularity-images/' } process { diff --git a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/all_samples/lablog b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/all_samples/lablog index d00741df..99a74d18 100644 --- a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/all_samples/lablog +++ b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/all_samples/lablog @@ -14,9 +14,9 @@ cd Mpileup; ln -s ../../*/Mpileup/* . ; cd - cd Position_Tables; ln -s ../../*/Position_Tables/* . ; cd - ##### Create join scripts -echo "srun --job-name MTBSEQ_JOIN --output logs/MTBSEQ_JOIN.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.0.4—hdfd78af_2 MTBseq --step TBjoin --threads 5 --samples ${scratch_dir}/samples.txt &" > _01_tb_join.sh -echo "srun --job-name MTBSEQ_AMEND --output logs/MTBSEQ_AMEND.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.0.4—hdfd78af_2 MTBseq --step TBamend --threads 5 --samples ${scratch_dir}/samples.txt &" > _02_tb_amend.sh -echo "srun --job-name MTBSEQ_GROUPS --output logs/MTBSEQ_GROUPS.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.0.4—hdfd78af_2 MTBseq --step TBgroups --threads 5 --samples ${scratch_dir}/samples.txt &" > _03_tb_groups.sh +echo "srun --job-name MTBSEQ_JOIN --output logs/MTBSEQ_JOIN.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.1.0--hdfd78af_0 MTBseq --step TBjoin --threads 5 --samples ${scratch_dir}/samples.txt &" > _01_tb_join.sh +echo "srun --job-name MTBSEQ_AMEND --output logs/MTBSEQ_AMEND.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.1.0--hdfd78af_0 MTBseq --step TBamend --threads 5 --samples ${scratch_dir}/samples.txt &" > _02_tb_amend.sh +echo "srun --job-name MTBSEQ_GROUPS --output logs/MTBSEQ_GROUPS.%j.log --partition middle_obx --mem 48G --chdir ${scratch_dir} --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/mtbseq:1.1.0--hdfd78af_0 MTBseq --step TBgroups --threads 5 --samples ${scratch_dir}/samples.txt &" > _03_tb_groups.sh #### Execute iqtree echo "srun --chdir ${scratch_dir} --output logs/IQTREEFULLALIGN.%j.log --job-name IQTREEFULLALIGN --cpus-per-task 20 --mem 15G --partition short_idx --time 08:00:00 singularity exec -B ${scratch_dir}/../../../../ /data/bi/pipelines/singularity-images/iqtree:2.1.4_beta--hdcc8f71_0 iqtree -s ${scratch_dir}/Amend/*amended_u95_phylo_w12.plainIDs.fasta -m K3Pu+F+I -T 20 -B 1000 -pre phylo.iqtree.bootstrap &" > _04_iqtreeall.sh diff --git a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog index 0eff7900..55345c6d 100644 --- a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog +++ b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog @@ -9,7 +9,7 @@ cat ../samples_id.txt | xargs -I % echo "mkdir %; ln -s ../../01-preprocessing/% cat ../samples_id.txt | xargs -I % echo "ln -s ../../01-preprocessing/%/%_R2_filtered.fastq.gz %/%_lib1_R2.fastq.gz" >> _00_prepareRaw.sh cat ../samples_id.txt | xargs -I % echo "cd %;ls *.fastq.gz | tr '_' '\t' | cut -f 1,2 | sort -u > samples.txt;cd -" > _01_preparesamples.sh -cat ../samples_id.txt | xargs -I @@ echo -e "srun --job-name MTBSEQ.@@ --output logs/MTBSEQ.@@.%j.log --partition middle_idx --mem 48G --chdir ${scratch_dir}/@@ --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/mtbseq:1.0.4--hdfd78af_2 MTBseq --step TBfull --threads 10 --samples samples.txt &" > _02_mtbseq.sh +cat ../samples_id.txt | xargs -I @@ echo -e "srun --job-name MTBSEQ.@@ --output logs/MTBSEQ.@@.%j.log --partition middle_idx --mem 48G --chdir ${scratch_dir}/@@ --cpus-per-task 10 singularity exec -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/mtbseq:1.1.0--hdfd78af_0 MTBseq --step TBfull --threads 10 --samples samples.txt &" > _02_mtbseq.sh # classification echo "mkdir classification_all" > _03_gather_results.sh diff --git a/bu_isciii/templates/sftp_user.json b/bu_isciii/templates/sftp_user.json index 344c8b49..ba627858 100755 --- a/bu_isciii/templates/sftp_user.json +++ b/bu_isciii/templates/sftp_user.json @@ -54,5 +54,6 @@ "bbaladron": ["SpainUDP"], "bioinfoadm": ["test"], "s.varona": ["misc"], - "nlabiod": ["Labarbovirus"] + "nlabiod": ["Labarbovirus"], + "jmgonzalez": ["Lablegionella"] } diff --git a/bu_isciii/templates/viralrecon/ANALYSIS/lablog_viralrecon b/bu_isciii/templates/viralrecon/ANALYSIS/lablog_viralrecon index ec944be3..f80d4524 100644 --- a/bu_isciii/templates/viralrecon/ANALYSIS/lablog_viralrecon +++ b/bu_isciii/templates/viralrecon/ANALYSIS/lablog_viralrecon @@ -479,6 +479,7 @@ do cp create_summary_report.sh ${FOLDER_NAME}/ cp deduplicate_long_table.sh ${FOLDER_NAME}/ cp percentajeNs.py ${FOLDER_NAME}/ + cp remove_columns_mapping_table.sh ${FOLDER_NAME}/ grep -i ${in} samples_ref.txt | cut -f1,2 > ${FOLDER_NAME}/samples_ref.txt echo "ln -s ../00-reads ." > ${FOLDER_NAME}/lablog printf "ln -s ../samples_id.txt .\n\n" >> ${FOLDER_NAME}/lablog diff --git a/bu_isciii/templates/viralrecon/ANALYSIS/remove_columns_mapping_table.sh b/bu_isciii/templates/viralrecon/ANALYSIS/remove_columns_mapping_table.sh index 434c51c9..97e769d8 100644 --- a/bu_isciii/templates/viralrecon/ANALYSIS/remove_columns_mapping_table.sh +++ b/bu_isciii/templates/viralrecon/ANALYSIS/remove_columns_mapping_table.sh @@ -3,7 +3,7 @@ output_file=$(echo processed_mapping_illumina_$(date '+%Y%m%d').tab) # Removal of the first three columns of the mapping illumina tab file -cut --complement -f1-3 mapping_*.tab > output_file +cut --complement -f1-3 mapping_*.tab > $output_file mv $output_file mapping_illumina_$(date '+%Y%m%d').tab # Success message diff --git a/bu_isciii/templates/viralrecon/RESULTS/lablog_viralrecon_results b/bu_isciii/templates/viralrecon/RESULTS/lablog_viralrecon_results index d05b2ab7..31bbdfd1 100755 --- a/bu_isciii/templates/viralrecon/RESULTS/lablog_viralrecon_results +++ b/bu_isciii/templates/viralrecon/RESULTS/lablog_viralrecon_results @@ -25,7 +25,6 @@ ln -s ../../ANALYSIS/*/mapping_illumina*.tab ./mapping_illumina.csv ln -s ../../ANALYSIS/*/assembly_stats.csv ./assembly_stats.csv ln -s ../../ANALYSIS/*/01-PikaVirus-results/all_samples_virus_table_filtered.tsv ./pikavirus_table.tsv -#conda activate viralrecon_report echo "python ./excel_generator.py -r ./references.tmp --merge_lineage_files" > _01_generate_excel_files.sh #Cleaning temp files and broken symbolic links echo "find . -xtype l -delete" > _02_clean_folders.sh diff --git a/setup.py b/setup.py index 50fdff3d..67938626 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages -version = "2.2.1" +version = "2.2.2" with open("README.md") as f: readme = f.read()