-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #109 from ncsa/develop
Develop
- Loading branch information
Showing
14 changed files
with
2,035 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,61 @@ | ||
# This workflow will install Python dependencies, run tests and lint with a single version of Python | ||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | ||
# This workflow configures the environment and executes NEAT read-simulator tests using relative paths for a series of configuration files individually | ||
# For more information on using Python with GitHub Actions, refer to: | ||
# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | ||
|
||
name: NEAT unit tests | ||
name: NEAT Unit Tests | ||
|
||
on: | ||
push: | ||
branches: [ "main", "develop" ] | ||
branches: [develop, main] | ||
pull_request: | ||
branches: [ "main" ] | ||
branches: [main] | ||
|
||
jobs: | ||
build: | ||
detailed_test_execution: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: s-weigand/setup-conda@v1.1.1 | ||
with: | ||
conda-channels: bioconda, conda-forge | ||
conda-channels: [bioconda, conda-forge] | ||
activate-conda: true | ||
repository: NCSA/NEAT | ||
- name: basic test | ||
- name: Environment Setup | ||
run: | | ||
conda env create -f environment.yml -n test_neat | ||
conda activate test_neat | ||
poetry install | ||
neat | ||
cd config_template | ||
- name: run coverage tests | ||
run: | | ||
conda activate test_neat | ||
python tests/coverage_tests.py | ||
# - name: lint with flake8 | ||
# run: | | ||
# conda activate neat | ||
# # stop the build if there are Python syntax errors or undefined names | ||
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||
# - name: Execute test_gen_reads | ||
# run: | | ||
# conda activate neat | ||
# cd ${{ github.workspace }} | ||
# poetry install | ||
# neat --log-level ERROR --no-log read-simulator -c data/test_config.yml -o test | ||
# - run: echo "This job's status is ${{ job.status }}." | ||
# - name: Execute seq_err_model_test | ||
# run: | | ||
# cd ${{ github.workspace }} | ||
# neat --log-level ERROR --no-log model-seq-err -i data/baby.fastq | ||
# - run: echo "This job's status is ${{ job.status }}." | ||
- name: Run NEAT Simulation for config_test1 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test1.yml -o ../outputs/test1_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test2 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test2.yml -o ../outputs/test2_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test3 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test3.yml -o ../outputs/test3_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test4 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test4.yml -o ../outputs/test4_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test5 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test5.yml -o ../outputs/test5_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test6 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test6.yml -o ../outputs/test6_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test7 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test7.yml -o ../outputs/test7_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test8 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test8.yml -o ../outputs/test8_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test9 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test9.yml -o ../outputs/test9_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test10 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test10.yml -o ../outputs/test10_read-simulator | ||
|
||
- name: Run NEAT Simulation for config_test11 | ||
run: python -m neat --log-level DEBUG read-simulator -c config_test11.yml -o ../outputs/test11_read-simulator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
# Test 1: Default parameters, H1N1 data | ||
|
||
## Template for gen_reads parallel | ||
## Any parameter that is not required but has a default value will use the | ||
## default value even if the variable is not included in the config. For | ||
## required items, they must be included in the config and the must be given a value. | ||
## All other items can be present or not. If present and the value is set to a single | ||
## period, the variable will be treated as though it had been omitted. Please do | ||
## not modify this template, but instead make a copy in your working directory. Done this | ||
## way, you can run without even needing to declare -c. | ||
|
||
# Absolute path to input reference fasta file | ||
# type = string | required: yes | ||
reference: ../data/H1N1.fa | ||
|
||
# How to partition the reference for analysis. By default, NEAT will | ||
# attempt to process one contig per thread. However, if you have very | ||
# large fasta files, you will see additional runtime benefit from choosing | ||
# the subdivision method, which will split the contigs up into equal sizes | ||
# for processing. If you need further speedups and have access to a distributed system | ||
# you can use a shell script wrapper around NEAT to split the fasta into | ||
# contigs, then join the results later. NEAT does not feature translocations, so | ||
# this will not affect NEAT's output. Note that subdivision will only activate for | ||
# number of threads > 1. | ||
# type = string | required: no | default = chrom | possible values: chrom, subdivision | ||
partition_mode: . | ||
|
||
# Read length of the reads in the fastq output. Only required if @produce_fastq is set to true | ||
# type = int | required: no | default = 101 | ||
read_len: . | ||
|
||
# Number of threads to request for NEAT. The recommended amount is the number of chromosomes in | ||
# your input fasta plus 1. | ||
# type = int | required: no | default = 1 | ||
threads: . | ||
|
||
# Average Coverage for the entire genome. | ||
# type = float | required: no | default = 10.0 | ||
coverage: . | ||
|
||
# Absolute path to file with sequencing error model | ||
# type = string | required: no | default: <NEAT_DIR>/neat/models/defaults/default_error_model.pickle.gz | ||
error_model: . | ||
|
||
# Average sequencing error rate for the sequencing machine | ||
# type = float | required = no | must be between 0.0 and 0.3 | ||
avg_seq_error: . | ||
|
||
# This scales the quality scores to match the desired average sequencing error rate | ||
# specified by avg_seq_error. | ||
# type: boolean | required = no | default = false | ||
rescale_qualities: . | ||
|
||
# This is the factor to add to the quality scores to get the ascii text version of the | ||
# score. The default follows the sanger quality offset | ||
# type: int | required = no | default = 33 | ||
quality_offset: . | ||
|
||
# Desired ploidy | ||
# type = int | required = no | default = 2 | ||
ploidy: . | ||
|
||
# Absolute path to vcf file containing variants that will always be included, regardless | ||
# of genotype and filter. You can pre-filter your vcf for these fields before inputting it | ||
# if this is not the desired behavior. | ||
# type: string | required = no | ||
input_variants: . | ||
|
||
# Absolute path to bed file containing reference regions that the simulation | ||
# should target. | ||
# type = string | required = no | ||
target_bed: . | ||
|
||
# Scalar value for coverage in regions outside the targeted bed. Example 0.5 | ||
# would get you roughly half the coverage as the on target areas. Default is | ||
# 2% of total coverage in off-target regions. | ||
# type: float | required = no | default = 0.02 | ||
off_target_scalar: . | ||
|
||
# Whether to discard areas outside the targeted bed region. By default, this is set | ||
# to false and NEAT will use a different model for off-target regions but still | ||
# include them in the final output. | ||
# TODO this may not be necessary | ||
# type: boolean | required = no | default = false | ||
discard_offtarget: . | ||
|
||
# Absolute path to bed file containing reference regions that the simulation | ||
# should discard. | ||
# type = string | required = no | ||
discard_bed: . | ||
|
||
# Absolute path to the mutation model pickle file. Omitting this value will cause | ||
# NEAT to use the default model, with some standard parameters, and generally uniform biases. | ||
# type: string | required = no | ||
mutation_model: . | ||
|
||
# Average mutation rate per base pair. Overall average is 0.001, or model default | ||
# Use either this value to override the mutation rate for the default or input model. | ||
# type: float | required = no | must be between 0.0 and 0.3 | ||
mutation_rate: . | ||
|
||
# Absolute path to a bed file with mutation rates by region. | ||
# Rates must be in the fourth column and be of the form "mut_rate=x.xx" | ||
# Rates must be between 0.00 and 0.03 | ||
# type: string | required = no | ||
mutation_bed: . | ||
|
||
# Absolute path to GC content model generated by compute_gc.py | ||
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_gc_bias_model.pickle.gz | ||
gc_model: . | ||
|
||
# Whether the output should be paired ended. For certain conditions (i.e., vcf only or | ||
# fasta only), this will be ignored. If this is true, then there must be an included fragment | ||
# length model output from runner.py or a mean and standard deviation | ||
# by declaring values for @fragment_mean and @fragment_std_dev. | ||
# type: boolean | required = no | default = false | ||
paired_ended: . | ||
|
||
# Absolute path to a pickle file containing the fragment length model output | ||
# from runner.py. | ||
# type: string | required = no | default: <NEAT_DIR>/neat/models/defaults/default_fraglen_model.pickle.gz | ||
fragment_model: . | ||
|
||
# Mean for the paired end fragment length. This only applies if paired-ended is set to true. | ||
# This number will form the mean for the sample distribution of the fragment lengths in the simulation | ||
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used. | ||
# type: float | required: no (unless paired-ended) | ||
fragment_mean: . | ||
|
||
# Standard deviation for the paired end fragment length. This only applies if paired-ended is set to true. | ||
# This number will form the standard deviation about the mean specified above for the sample distribution | ||
# of the fragment lengths in the simulation. | ||
# Note: This number is REQUIRED if paired_ended is set to true, unless a fragment length model is used. | ||
# type: float | required: no (unless paired-ended) | ||
fragment_st_dev: . | ||
|
||
# Whether to produce the golden bam file. This file will contain the reads | ||
# aligned with the exact region of the genome | ||
# type: boolean | required = no | default = false | ||
produce_bam: . | ||
|
||
# Whether to produce a vcf file containing all the mutation errors added | ||
# by NEAT. | ||
# type: boolean | required = no | default = false | ||
produce_vcf: . | ||
|
||
# Whether to output the mutated fasta. This will output a fasta file with mutations | ||
# inserted. It does not include sequencing errors or read information. Useful for | ||
# multigenerational mutations. | ||
# type: boolean | required = no | default = false | ||
produce_fasta: . | ||
|
||
# Whether to output the fastq(s) of the reads. This is the default output. NEAT | ||
# will produce 1 fastq for single ended reads or 2 fastqs for paired ended. | ||
# type: boolean | required = no | default = true | ||
produce_fastq: . | ||
|
||
# If set to true, this will ignore statistical models and force coverage to be | ||
# constant across the genome. This is considered a debugging feature. | ||
# type: boolean | required = no | default = false | ||
no_coverage_bias: . | ||
|
||
# Set an RNG seed value. Runs using identical RNG values should produce identical results | ||
# so things like read locations, variant positions, error positions, etc. should be the same. | ||
# Useful for debugging. | ||
# type: int | required = no | ||
rng_seed: . | ||
|
||
# Set an absolute minimum number of mutations. The program always adds at least 1 mutation. | ||
# Useful for very small datasets. | ||
# type: int | required = no | ||
min_mutations: . | ||
|
||
# Overwrite the output files, if they are named the same as the current run. | ||
# Default is to quit if files already exist to avoid data destruction | ||
# type: bool | required = no | default = false | ||
overwrite_output: True |
Oops, something went wrong.