Skip to content

Commit

Permalink
Merge pull request #109 from ncsa/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
joshfactorial authored May 16, 2024
2 parents 2c80a44 + c8bf73d commit 66ab859
Show file tree
Hide file tree
Showing 14 changed files with 2,035 additions and 49 deletions.
76 changes: 42 additions & 34 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -1,53 +1,61 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
# This workflow configures the environment and executes NEAT read-simulator tests using relative paths for a series of configuration files individually
# For more information on using Python with GitHub Actions, refer to:
# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: NEAT unit tests
name: NEAT Unit Tests

on:
push:
branches: [ "main", "develop" ]
branches: [develop, main]
pull_request:
branches: [ "main" ]
branches: [main]

jobs:
build:
detailed_test_execution:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: s-weigand/setup-conda@v1.1.1
with:
conda-channels: bioconda, conda-forge
conda-channels: [bioconda, conda-forge]
activate-conda: true
repository: NCSA/NEAT
- name: basic test
- name: Environment Setup
run: |
conda env create -f environment.yml -n test_neat
conda activate test_neat
poetry install
neat
cd config_template
- name: run coverage tests
run: |
conda activate test_neat
python tests/coverage_tests.py
# - name: lint with flake8
# run: |
# conda activate neat
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# - name: Execute test_gen_reads
# run: |
# conda activate neat
# cd ${{ github.workspace }}
# poetry install
# neat --log-level ERROR --no-log read-simulator -c data/test_config.yml -o test
# - run: echo "This job's status is ${{ job.status }}."
# - name: Execute seq_err_model_test
# run: |
# cd ${{ github.workspace }}
# neat --log-level ERROR --no-log model-seq-err -i data/baby.fastq
# - run: echo "This job's status is ${{ job.status }}."
- name: Run NEAT Simulation for config_test1
run: python -m neat --log-level DEBUG read-simulator -c config_test1.yml -o ../outputs/test1_read-simulator

- name: Run NEAT Simulation for config_test2
run: python -m neat --log-level DEBUG read-simulator -c config_test2.yml -o ../outputs/test2_read-simulator

- name: Run NEAT Simulation for config_test3
run: python -m neat --log-level DEBUG read-simulator -c config_test3.yml -o ../outputs/test3_read-simulator

- name: Run NEAT Simulation for config_test4
run: python -m neat --log-level DEBUG read-simulator -c config_test4.yml -o ../outputs/test4_read-simulator

- name: Run NEAT Simulation for config_test5
run: python -m neat --log-level DEBUG read-simulator -c config_test5.yml -o ../outputs/test5_read-simulator

- name: Run NEAT Simulation for config_test6
run: python -m neat --log-level DEBUG read-simulator -c config_test6.yml -o ../outputs/test6_read-simulator

- name: Run NEAT Simulation for config_test7
run: python -m neat --log-level DEBUG read-simulator -c config_test7.yml -o ../outputs/test7_read-simulator

- name: Run NEAT Simulation for config_test8
run: python -m neat --log-level DEBUG read-simulator -c config_test8.yml -o ../outputs/test8_read-simulator

- name: Run NEAT Simulation for config_test9
run: python -m neat --log-level DEBUG read-simulator -c config_test9.yml -o ../outputs/test9_read-simulator

- name: Run NEAT Simulation for config_test10
run: python -m neat --log-level DEBUG read-simulator -c config_test10.yml -o ../outputs/test10_read-simulator

- name: Run NEAT Simulation for config_test11
run: python -m neat --log-level DEBUG read-simulator -c config_test11.yml -o ../outputs/test11_read-simulator
177 changes: 177 additions & 0 deletions config_template/config_test1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Test 1: Default parameters, H1N1 data

## Template for gen_reads parallel
## Any parameter that is not required but has a default value will use the
## default value even if the variable is not included in the config. For
## required items, they must be included in the config and the must be given a value.
## All other items can be present or not. If present and the value is set to a single
## period, the variable will be treated as though it had been omitted. Please do
## not modify this template, but instead make a copy in your working directory. Done this
## way, you can run without even needing to declare -c.

# Absolute path to input reference fasta file
# type = string | required: yes
reference: ../data/H1N1.fa

# How to partition the reference for analysis. By default, NEAT will
# attempt to process one contig per thread. However, if you have very
# large fasta files, you will see additional runtime benefit from choosing
# the subdivision method, which will split the contigs up into equal sizes
# for processing. If you need further speedups and have access to a distributed system
# you can use a shell script wrapper around NEAT to split the fasta into
# contigs, then join the results later. NEAT does not feature translocations, so
# this will not affect NEAT's output. Note that subdivision will only activate for
# number of threads > 1.
# type = string | required: no | default = chrom | possible values: chrom, subdivision
partition_mode: .

# Read length of the reads in the fastq output. Only required if @produce_fastq is set to true
# type = int | required: no | default = 101
read_len: .

# Number of threads to request for NEAT. The recommended amount is the number of chromosomes in
# your input fasta plus 1.
# type = int | required: no | default = 1
threads: .

# Average Coverage for the entire genome.
# type = float | required: no | default = 10.0
coverage: .

# Absolute path to file with sequencing error model
# type = string | required: no | default: <NEAT_DIR>/neat/models/defaults/default_error_model.pickle.gz
error_model: .

# Average sequencing error rate for the sequencing machine
# type = float | required = no | must be between 0.0 and 0.3
avg_seq_error: .

# This scales the quality scores to match the desired average sequencing error rate
# specified by avg_seq_error.
# type: boolean | required = no | default = false
rescale_qualities: .

# This is the factor to add to the quality scores to get the ascii text version of the
# score. The default follows the sanger quality offset
# type: int | required = no | default = 33
quality_offset: .

# Desired ploidy
# type = int | required = no | default = 2
ploidy: .

# Absolute path to vcf file containing variants that will always be included, regardless
# of genotype and filter. You can pre-filter your vcf for these fields before inputting it
# if this is not the desired behavior.
# type: string | required = no
input_variants: .

# Absolute path to bed file containing reference regions that the simulation
# should target.
# type = string | required = no
target_bed: .

# Scalar value for coverage in regions outside the targeted bed. Example 0.5
# would get you roughly half the coverage as the on target areas. Default is
# 2% of total coverage in off-target regions.
# type: float | required = no | default = 0.02
off_target_scalar: .

# Whether to discard areas outside the targeted bed region. By default, this is set
# to false and NEAT will use a different model for off-target regions but still
# include them in the final output.
# TODO this may not be necessary
# type: boolean | required = no | default = false
discard_offtarget: .

# Absolute path to bed file containing reference regions that the simulation
# should discard.
# type = string | required = no
discard_bed: .

# Absolute path to the mutation model pickle file. Omitting this value will cause
# NEAT to use the default model, with some standard parameters, and generally uniform biases.
# type: string | required = no
mutation_model: .

# Average mutation rate per base pair. Overall average is 0.001, or model default
# Use either this value to override the mutation rate for the default or input model.
# type: float | required = no | must be between 0.0 and 0.3
mutation_rate: .

# Absolute path to a bed file with mutation rates by region.
# Rates must be in the fourth column and be of the form "mut_rate=x.xx"
# Rates must be between 0.00 and 0.03
# type: string | required = no
mutation_bed: .

# Absolute path to GC content model generated by compute_gc.py
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_gc_bias_model.pickle.gz
gc_model: .

# Whether the output should be paired ended. For certain conditions (i.e., vcf only or
# fasta only), this will be ignored. If this is true, then there must be an included fragment
# length model output from runner.py or a mean and standard deviation
# by declaring values for @fragment_mean and @fragment_std_dev.
# type: boolean | required = no | default = false
paired_ended: .

# Absolute path to a pickle file containing the fragment length model output
# from runner.py.
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_fraglen_model.pickle.gz
fragment_model: .

# Mean for the paired end fragment length. This only applies if paired-ended is set to true.
# This number will form the mean for the sample distribution of the fragment lengths in the simulation
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
# type: float | required: no (unless paired-ended)
fragment_mean: .

# Standard deviation for the paired end fragment length. This only applies if paired-ended is set to true.
# This number will form the standard deviation about the mean specified above for the sample distribution
# of the fragment lengths in the simulation.
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used.
# type: float | required: no (unless paired-ended)
fragment_st_dev: .

# Whether to produce the golden bam file. This file will contain the reads
# aligned with the exact region of the genome
# type: boolean | required = no | default = false
produce_bam: .

# Whether to produce a vcf file containing all the mutation errors added
# by NEAT.
# type: boolean | required = no | default = false
produce_vcf: .

# Whether to output the mutated fasta. This will output a fasta file with mutations
# inserted. It does not include sequencing errors or read information. Useful for
# multigenerational mutations.
# type: boolean | required = no | default = false
produce_fasta: .

# Whether to output the fastq(s) of the reads. This is the default output. NEAT
# will produce 1 fastq for single ended reads or 2 fastqs for paired ended.
# type: boolean | required = no | default = true
produce_fastq: .

# If set to true, this will ignore statistical models and force coverage to be
# constant across the genome. This is considered a debugging feature.
# type: boolean | required = no | default = false
no_coverage_bias: .

# Set an RNG seed value. Runs using identical RNG values should produce identical results
# so things like read locations, variant positions, error positions, etc. should be the same.
# Useful for debugging.
# type: int | required = no
rng_seed: .

# Set an absolute minimum number of mutations. The program always adds at least 1 mutation.
# Useful for very small datasets.
# type: int | required = no
min_mutations: .

# Overwrite the output files, if they are named the same as the current run.
# Default is to quit if files already exist to avoid data destruction
# type: bool | required = no | default = false
overwrite_output: True
Loading

0 comments on commit 66ab859

Please sign in to comment.