-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig_template.py
78 lines (68 loc) · 2.96 KB
/
config_template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
This "module" provides a template for the "config.py" that a user should
create to specify the parameters needed for the tool to run.
"""
# What identifiers should we send with API requests sent to NCBI?
Tool='compendium'
Email='email_goes_here'
Key='12345' # API key obtained (for free!) from NCBI
# Where should we store the archived data?
archive_path = '/path/to_your/archive/'
# Where is the SQLite database file?
db_path = '/path_to_your/compendium.db'
# How many projects should we try to have running at one time?
max_projects = 8
# Should a user's input be required to reprocess and discard projects?
confirm_destruct = True
# Where does the pipeline code live?
snakemake_git = 'git@github.com:blekhmanlab/snakemake-compendium.git'
############### Web request settings ###############
# When building up metadata for each project, we need to make multiple calls
# to the NCBI's eUtils API. These settings dictate how this happens.
#
# How long should we wait to get a response from a web request before bailing?
timeout = 25
#
# What base URLs should be used when building up queries?
esearch_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?tool={Tool}&email={Email}&api_key={Key}&db=sra&usehistory=y&term=' # pylint: disable=line-too-long
efetch_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?tool={Tool}&email={Email}&db=sra&query_key=1'
#
##############################################################
############### Sample-level settings ###############
# When evaluating the proportion of reads retained through the entire pipeline,
# what thresholds should be used for the "warning" and "error" levels?
retained_worrisome = 0.69
retained_error = 0.59 # MINIMUM
#
# When evaluating the proportion of reads removed from a sample,
# what thresholds should be used for the "warning" and "error" levels?
chimera_worrisome = 0.10
chimera_error = 0.20 # MAXIMUM
#
# When evaluating the proportion of forward reads removed from a
# sample, what thresholds should be used for the "warning" and "error" levels?
merged_worrisome = 0.80
merged_error = 0.65 # MINIMUM
#
##############################################################
############### Project-level settings ###############
#
# When evaluating the proportion of SAMPLES flagged for issues,
# what thresholds should be used for determining whether a project
# should be reprocessed?
#
# (IMPORTANT: The thresholds here work slightly differently
# than the sample-level ones above: There is no "warning" level for projects.
# The "worrisome" threshold here is *what proportion of samples must be marked as
# worrisome for the project to be discarded*. The "error" threshold is what proportion
# of samples must be marked as errors for the project to be discarded.)
project_retained_worrisome = 0.70
project_retained_error = 0.20
#
project_merged_worrisome = 0.4
project_merged_error = 0.15
#
project_chimera_worrisome = 0.4
project_chimera_error = 0.2
#
##############################################################