Skip to content

Commit

Permalink
Merge pull request #697 from metagenome-atlas/qc-reads
Browse files Browse the repository at this point in the history
Qc reads from init not in qc reads
  • Loading branch information
SilasK authored Oct 10, 2023
2 parents d917a94 + 345eee2 commit 767d8dd
Show file tree
Hide file tree
Showing 18 changed files with 192 additions and 230 deletions.
69 changes: 20 additions & 49 deletions .github/workflows/python-package-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@ on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:


defaults:
run:
shell: bash -el {0} # use default shell


concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand All @@ -25,7 +23,6 @@ jobs:
strategy:
max-parallel: 1


steps:
- name: Checkout
uses: actions/checkout@v3.5.2
Expand All @@ -39,12 +36,11 @@ jobs:
environment-file: atlasenv.yml
environment-name: atlasenv
# persist on the same month.
cache-environment-key: atlasenv- #${{ steps.date.outputs.date }}

cache-environment-key: atlasenv- #${{ steps.date.outputs.date }}

- name: Install atlas
run: |
python -m pip install . --no-deps -vv
python -m pip install . --no-deps -vv
- name: Test atlas
run: |
Expand All @@ -58,8 +54,6 @@ jobs:
- name: Dryrun
run: test/dryrun.sh



run-qc-and-assembly:
runs-on: ubuntu-latest
needs: build-and-dryrun
Expand All @@ -71,14 +65,12 @@ jobs:
- name: Checkout
uses: actions/checkout@v3.5.2


- uses: mamba-org/setup-micromamba@v1
with:
environment-file: atlasenv.yml
environment-name: atlasenv
cache-environment-key: atlasenv-


- name: Install atlas
run: |
python -m pip install . --no-deps -vv
Expand All @@ -94,14 +86,14 @@ jobs:
- name: Download test data
if: steps.get-data.outputs.cache-hit != 'true'
run: wget --quiet https://zenodo.org/record/3992790/files/test_reads.tar.gz

- name: cache example data
if: steps.get-data.outputs.cache-hit != 'true'
uses: actions/cache/save@v3
with:
path: test_reads.tar.gz
key: ${{ steps.get-data.outputs.cache-primary-key }}

- name: extract data
run: |
tar -xzf test_reads.tar.gz
Expand All @@ -113,17 +105,15 @@ jobs:
with:
path: databases
key: conda-envs-assembly

# - name: upack conda envs
# if: steps.get-envs.outputs.cache-hit != 'true'
# run: tar -xzf assembly_conda_envs.tar.gz


- name: Init
run: |
atlas init "test_reads" --threads "$N_THREADS" --working-dir wd
- name: Install dependencies for qc and assembly
if: steps.get-envs.outputs.cache-hit != 'true'
run: atlas run assembly --conda-create-envs-only -w wd
Expand All @@ -138,7 +128,6 @@ jobs:
path: databases
key: ${{ steps.get-envs.outputs.cache-primary-key }}


- name: Run QC
run: |
atlas run qc --max-mem $MEM --jobs=$N_THREADS --restart-times=2 --working-dir wd
Expand All @@ -148,58 +137,51 @@ jobs:
atlas run assembly --max-mem $MEM --jobs=$N_THREADS --restart-times=2 --working-dir wd
- name: Store Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: logs
path: wd/logs

- name: Store Sample Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: sample_logs
path: wd/sample1/logs

- name: Store reports
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: reports
path: wd/reports


- name: Cache working dir
uses: actions/cache/save@v3
with:
path: wd
key: assembly-working-dir



run-genecatalog:
runs-on: ubuntu-latest
needs: [ build-and-dryrun, run-qc-and-assembly ]
needs: [build-and-dryrun, run-qc-and-assembly]
env:
N_THREADS: 2
MEM: 3


steps:
- name: Checkout
uses: actions/checkout@v3.5.2


- uses: mamba-org/setup-micromamba@v1
with:
environment-file: atlasenv.yml
environment-name: atlasenv
cache-environment-key: atlasenv-


- name: install atlas
run: |
python -m pip install . --no-deps -vv
atlas --help
- name: get conda envs
id: get-envs
uses: actions/cache/restore@v3
Expand All @@ -222,19 +204,18 @@ jobs:
ls -l databases/conda_envs
atlas run assembly -w wd -n
- name: test Genecatalog
run: |
atlas run genecatalog --restart-times=2 --working-dir wd --omit-from combine_egg_nogg_annotations combine_dram_genecatalog_annotations
atlas run genecatalog --restart-times=2 --working-dir wd --omit-from combine_egg_nogg_annotations combine_dram_genecatalog_annotations
- name: Store Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: logs
path: wd/logs

- name: Store Sample Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: sample_logs
path: wd/sample1/logs
Expand All @@ -246,32 +227,27 @@ jobs:
path: databases
key: ${{ steps.get-envs.outputs.cache-primary-key }}


run-binning:
runs-on: ubuntu-latest
needs: [ build-and-dryrun, run-qc-and-assembly ]
needs: [build-and-dryrun, run-qc-and-assembly]
env:
N_THREADS: 2
MEM: 3
MEM: 0.9

steps:
- name: Checkout
uses: actions/checkout@v3.5.2


- uses: mamba-org/setup-micromamba@v1
with:
environment-file: atlasenv.yml
environment-name: atlasenv
cache-environment-key: atlasenv-


- name: install atlas
run: |
python -m pip install . --no-deps -vv
atlas --help
- name: get conda envs
id: get-envs
uses: actions/cache/restore@v3
Expand All @@ -295,25 +271,24 @@ jobs:
ls -l databases/conda_envs
atlas run assembly -w wd -n
- name: test binning
run: |
atlas run binning --restart-times=2 --working-dir wd --omit-from checkm2_download_db
atlas run binning --restart-times=2 --working-dir wd --omit-from checkm2_download_db --max-mem $MEM &> >(tee wd/logs/atlas_binning.log)
- name: Store Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: logs
name: logs_binning
path: wd/logs

- name: Store Sample Logs
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: sample_logs
name: sample_logs_binning
path: wd/sample1/logs

- name: Store conda envs
# if: steps.get-envs.outputs.cache-hit != 'true'
# if: steps.get-envs.outputs.cache-hit != 'true'
uses: actions/cache/save@v3
with:
path: databases
Expand All @@ -327,25 +302,21 @@ jobs:
# - name: Checkout
# uses: actions/checkout@v3.5.2


# - uses: mamba-org/setup-micromamba@v1
# with:
# environment-file: atlasenv.yml
# environment-name: atlasenv
# cache-environment-key: atlasenv-


# - name: Install atlas
# run: |
# python -m pip install . --no-deps -vv
# atlas --help


# - name: get conda envs
# id: get-envs
# uses: actions/cache/restore@v3
# with:
# path: databases
# key: conda-envs-assembly
# restore-keys: conda-envs-*

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,4 @@ ENV/
# on mac
.DS_Store
example_data
atlas/regex_formating.py
1 change: 1 addition & 0 deletions atlas/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def run_workflow(
" --rerun-triggers mtime "
"{jobs} --rerun-incomplete "
"--configfile '{config_file}' --nolock "
" --show-failed-logs "
" {profile} --use-conda {conda_prefix} {dryrun} "
" {max_mem_string} "
" --scheduler greedy "
Expand Down
10 changes: 1 addition & 9 deletions atlas/default_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@
MEM = 80
JAVA_MEM_FRACTION = 0.85
PREALLOCATE_RAM = "t"
PREPROCESS_ADAPTER_MIN_K = 8
PREPROCESS_KMER_TRIM = "r"
PREPROCESS_MINIMUM_BASE_QUALITY = 10
PREPROCESS_ALLOWABLE_KMER_MISMATCHES = 1
PREPROCESS_REFERENCE_KMER_MATCH_LENGTH = 27
QTRIM = "rl"
PREPROCESS_MINIMUM_PASSING_READ_LENGTH = 51
PREPROCESS_MINIMUM_BASE_FREQUENCY = 0.05
PREPROCESS_MAX_NS = -1


MERGING_FLAGS = "ecct iterations=1"
MERGING_EXTEND2 = 50
Expand Down
14 changes: 8 additions & 6 deletions atlas/init/atlas_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
validate_bingroup_size_cobinning,
validate_bingroup_size_metabat,
BinGroupSizeError,
ADDITIONAL_SAMPLEFILE_HEADERS,
)

# default globals
Expand Down Expand Up @@ -55,13 +54,16 @@ def prepare_sample_table_for_atlas(

sample_table.rename(columns={f: f"{prefix}{f}" for f in fractions}, inplace=True)

# Add BinGroup and additional empty headers
Headers = ADDITIONAL_SAMPLEFILE_HEADERS
for h in Headers:
sample_table[h] = np.nan

sample_table["BinGroup"] = "All"

if not reads_are_QC:
for f in fractions:
sample_table[f"Reads_QC_{f}"] = (
f"QC/reads/" + sample_table.index + f"_{f}.fastq.gz"
)

sample_table["Assembly"] = "Assembly/fasta/" + sample_table.index + ".fasta"

validate_sample_table(sample_table)

sample_table.to_csv(outfile, sep="\t")
Expand Down
14 changes: 0 additions & 14 deletions atlas/make_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,7 @@ def make_default_config():
config["mem"] = MEM
config["large_mem"] = 250
config["large_threads"] = 16
config["preprocess_adapter_min_k"] = PREPROCESS_ADAPTER_MIN_K
config["preprocess_minimum_base_quality"] = PREPROCESS_MINIMUM_BASE_QUALITY
config[
"preprocess_allowable_kmer_mismatches"
] = PREPROCESS_ALLOWABLE_KMER_MISMATCHES
config[
"preprocess_reference_kmer_match_length"
] = PREPROCESS_REFERENCE_KMER_MATCH_LENGTH
config[
"preprocess_minimum_passing_read_length"
] = PREPROCESS_MINIMUM_PASSING_READ_LENGTH
config["preprocess_minimum_base_frequency"] = PREPROCESS_MINIMUM_BASE_FREQUENCY

config["deduplicate"] = True
config["error_correction_overlapping_pairs"] = True

config["contaminant_max_indel"] = CONTAMINANT_MAX_INDEL
config["contaminant_min_ratio"] = CONTAMINANT_MIN_RATIO
Expand Down
4 changes: 1 addition & 3 deletions atlas/sample_table.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import pandas as pd

ADDITIONAL_SAMPLEFILE_HEADERS = [] # ,'Contigs']

import logging

logger = logging.getLogger(__file__)


def validate_sample_table(sampleTable):
Expected_Headers = ["BinGroup"] + ADDITIONAL_SAMPLEFILE_HEADERS
Expected_Headers = ["BinGroup"]
for h in Expected_Headers:
if not (h in sampleTable.columns):
logger.error(f"expect '{h}' to be found in samples.tsv")
Expand Down
2 changes: 1 addition & 1 deletion atlasenv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ dependencies:
- click >=7
- ruamel.yaml >=0.17
- cookiecutter

- wget
Loading

0 comments on commit 767d8dd

Please sign in to comment.