Skip to content

Commit

Permalink
Merge pull request #162 from tleonardi/release_1.0.1
Browse files Browse the repository at this point in the history
Release 1.0.1
  • Loading branch information
tleonardi authored Nov 27, 2020
2 parents 9f8effe + 252076c commit 3ca757e
Show file tree
Hide file tree
Showing 13 changed files with 1,136 additions and 403 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,4 @@ data
docs/demo/results
tmp
dev
tests/test_datasets
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
dist: xenial
dist:
- focal
- bionic
language: python

stage: test

python:
- 3.6
- 3.7
- 3.8

branches:
only:
Expand Down
14 changes: 11 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
# Changelog

## [v1.0.0rc3-1]
## v1.0.1

### Fixed
- Fixed #120, #122, #138

### Added
- Improved logging

## v1.0.0rc3-1

### Fixed
- Fixed bug in CLI entrypoint

## [v1.0.0rc3]
## v1.0.0rc3

### Added
- Reads simulator now uses variability measured from the data
Expand All @@ -18,7 +26,7 @@
- Fixed errors with 0 pvalues (#87 and #90)
- Fixed error when passing a Whitelist object to SampComp (#91)

## [v1.0.0rc2]
## v1.0.0rc2

### Added
- Continuous testing with Travis CI
Expand Down
16 changes: 16 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,22 @@ We try to follow the [GitHub flow](https://guides.github.com/introduction/flow/)

5. Submit a [pull request](https://guides.github.com/activities/forking/#making-a-pull-request).

# Our release process

* Create a new branch called `release_x.y.z` from `devel`.
* Bump the version number
* Update the CHANGELOG
* Create PR against master
* Merge PR with a merge commit
* Tag the commit with the new version number
* Run `poetry build` and `poetry publish` to publish the new version to PyPI

# Our versioning system

The version number is _exclusively_ stored in `pyproject.toml` and loaded from the metadata by __init__.py (see [here](https://github.com/python-poetry/poetry/issues/144#issuecomment-623927302)).
The version number is also referenced explicitly in the CHANGELOG.
The `master` branch only points to a stable release semantically versioned: X.Y.Z. We use patches for bug fixes, minor versions for new features and major versions for breaking changes.



---
Expand Down
35 changes: 16 additions & 19 deletions nanocompore/SampComp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#
# Std lib
import logging
from loguru import logger
from collections import *
import shelve
import multiprocessing as mp
Expand Down Expand Up @@ -32,11 +32,8 @@
os.environ["OMP_NUM_THREADS"] = "1"
os.environ['OPENBLAS_NUM_THREADS'] = '1'

# Logger setup
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)
log_level_dict = {"debug":logging.DEBUG, "info":logging.INFO, "warning":logging.WARNING}

log_level_dict = {"debug":"DEBUG", "info":"INFO", "warning":"WARNING"}
logger.remove()
#~~~~~~~~~~~~~~MAIN CLASS~~~~~~~~~~~~~~#
class SampComp(object):
""" Init analysis and check args"""
Expand Down Expand Up @@ -124,10 +121,6 @@ def __init__(self,
if not i in ["self","whitelist"]:
option_d[i]=j

# Set logging level
logger.setLevel(log_level_dict.get(log_level, logging.WARNING))
logger.info("Initialising SampComp and checking options")

# Check if output folder already exists
try:
mkdir(fn=outpath, exist_ok=overwrite)
Expand All @@ -140,6 +133,11 @@ def __init__(self,
logger.debug("Writing log file")
json.dump(option_d, log_fp, indent=2)

# Set logging level
logger.add(sys.stderr, format="{time} {level} - {process.name} | {message}", enqueue=True, level=log_level_dict.get(log_level, "WARNING"))
logger.add(log_fn, format="{time} {level} - {process.name} | {message}", enqueue=True, level="TRACE")
logger.info("Initialising SampComp and checking options")

# If eventalign_fn_dict is not a dict try to load a YAML file instead
if type(eventalign_fn_dict) == str:
logger.debug("Parsing YAML file")
Expand Down Expand Up @@ -188,8 +186,7 @@ def __init__(self,
downsample_high_coverage = downsample_high_coverage,
max_invalid_kmers_freq = max_invalid_kmers_freq,
select_ref_id = select_ref_id,
exclude_ref_id = exclude_ref_id,
log_level = log_level)
exclude_ref_id = exclude_ref_id)
elif not isinstance(whitelist, Whitelist):
raise NanocomporeError("Whitelist is not valid")

Expand Down Expand Up @@ -243,11 +240,12 @@ def __call__(self):
ps.start()
# Monitor error queue
for tb in iter(error_q.get, None):
logger.trace("Error caught from error_q")
raise NanocomporeError(tb)

# Catch error and reraise it
except(BrokenPipeError, KeyboardInterrupt, NanocomporeError) as E:
logger.debug("An error occured. Killing all processes\n")
logger.error("An error occured. Killing all processes\n")
raise E

finally:
Expand All @@ -264,8 +262,7 @@ def __call__(self):
return SampCompDB(
db_fn=self.__db_fn,
fasta_fn=self.__fasta_fn,
bed_fn=self.__bed_fn,
log_level=self.__log_level)
bed_fn=self.__bed_fn)

#~~~~~~~~~~~~~~PRIVATE MULTIPROCESSING METHOD~~~~~~~~~~~~~~#
def __list_refid(self, in_q, error_q):
Expand Down Expand Up @@ -300,7 +297,6 @@ def __process_references(self, in_q, out_q, error_q):
# Process refid in input queue
for ref_id, ref_dict in iter(in_q.get, None):
logger.debug("Worker thread processing new item from in_q: {}".format(ref_id))

# Create an empty dict for all positions first
ref_pos_list = self.__make_ref_pos_list(ref_id)

Expand Down Expand Up @@ -360,6 +356,7 @@ def __process_references(self, in_q, out_q, error_q):
# Save previous position
prev_pos = pos

logger.debug("Data for {} loaded.".format(ref_id))
if self.__comparison_methods:
random_state=np.random.RandomState(seed=42)
ref_pos_list = txCompare(
Expand All @@ -371,7 +368,6 @@ def __process_references(self, in_q, out_q, error_q):
min_coverage= self.__min_coverage,
allow_warnings=self.__allow_warnings,
logit=self.__logit,
logger=logger,
random_state=random_state)

# Add the current read details to queue
Expand All @@ -384,8 +380,9 @@ def __process_references(self, in_q, out_q, error_q):
self.__eventalign_fn_close(fp_dict)

# Manage exceptions, deal poison pills and close files
except Exception:
logger.debug("Error in worker. Kill output queue")
except Exception as e:
logger.error("Error in worker. Kill output queue")
logger.error(e)
for i in range(self.__nthreads):
out_q.put(None)
self.__eventalign_fn_close(fp_dict)
Expand Down
25 changes: 9 additions & 16 deletions nanocompore/SampCompDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#
# Std lib
import logging
from loguru import logger
from collections import *
import shelve
from dbm import error as dbm_error
Expand All @@ -21,16 +21,13 @@
import seaborn as sns
from bedparse import bedline
from statsmodels.stats.multitest import multipletests
from sklearn.mixture.gaussian_mixture import GaussianMixture
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import scale as scale

# Local package
from nanocompore.common import *

# Logger setup
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)
log_level_dict = {"debug":logging.DEBUG, "info":logging.INFO, "warning":logging.WARNING}

#~~~~~~~~~~~~~~MAIN CLASS~~~~~~~~~~~~~~#
class SampCompDB(object):
Expand All @@ -41,8 +38,7 @@ def __init__(self,
db_fn:str,
fasta_fn:str,
bed_fn:str = None,
run_type:str = "RNA",
log_level:str = "info"):
run_type:str = "RNA"):
"""
Import a shelve db and a fasta reference file. Automatically returned by SampComp
Can also be manually created from an existing shelve db output
Expand All @@ -54,12 +50,8 @@ def __init__(self,
Path to a BED file containing the annotation of the transcriptome used as reference when mapping
* run_type
Define the run type model to import {RNA, DNA}
* log_level
Set the log level. {warning,info,debug}"
"""

# Set logging level
logger.setLevel(log_level_dict.get(log_level, logging.WARNING))
logger.info("Loading SampCompDB")

# Try to get ref_id list and metadata from shelve db
Expand All @@ -79,7 +71,8 @@ def __init__(self,
logger.debug("\tCannot find the ref_id_list in shelve. Try to build the list from entries")
self.ref_id_list = [k for k in db.keys() if k not in ['__metadata', '__ref_id_list']]
if not self.ref_id_list:
raise NanocomporeError("The result database is empty")
logger.info("The result database is empty")
return None
except dbm_error:
raise NanocomporeError("The result database cannot be opened")

Expand Down Expand Up @@ -329,19 +322,19 @@ def save_to_bed(self, output_fn=None, bedgraph=False, pvalue_field=None, pvalue_
else:
line=bedline([record.chr, record.genomicPos-(span-1), record.genomicPos+1, "%s_%s" % (record.ref_id, record.ref_kmer), pvalue, record.strand])

if convert is "ensembl_to_ucsc":
if convert == "ensembl_to_ucsc":
line=line.translateChr(assembly=assembly, target="ucsc", patches=True)
elif convert is "ucsc_to_ensembl":
elif convert == "ucsc_to_ensembl":
line=line.translateChr(assembly=assembly, target="ens", patches=True)
bed_file.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (line.chr, line.start, line.end, line.name, line.score, line.strand))
elif bedgraph:
if record.strand == "+":
line=bedline([record.chr, record.genomicPos+2, record.genomicPos+3, "%s_%s" % (record.ref_id, record.ref_kmer), pvalue, record.strand])
else:
line=bedline([record.chr, record.genomicPos-2, record.genomicPos-1, "%s_%s" % (record.ref_id, record.ref_kmer), pvalue, record.strand])
if convert is "ensembl_to_ucsc":
if convert == "ensembl_to_ucsc":
line=line.translateChr(assembly=assembly, target="ucsc", patches=True)
elif convert is "ucsc_to_ensembl":
elif convert == "ucsc_to_ensembl":
line=line.translateChr(assembly=assembly, target="ens", patches=True)
bed_file.write("%s\t%s\t%s\t%s\n" % (line.chr, line.start, line.end, line.score))

Expand Down
10 changes: 8 additions & 2 deletions nanocompore/TxComp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


# Third party
from loguru import logger
from scipy.stats import mannwhitneyu, ttest_ind, chi2, f_oneway
from scipy.stats.mstats import ks_twosamp
import statsmodels.discrete.discrete_model as dm
Expand All @@ -27,12 +28,12 @@ def txCompare(
methods=None,
sequence_context=0,
min_coverage=20,
logger=None,
ref=None,
sequence_context_weights="uniform",
anova=True,
logit=False,
allow_warnings=False):
logger.debug("TxCompare")

if sequence_context_weights != "uniform" and sequence_context_weights != "harmonic":
raise NanocomporeError("Invalid sequence_context_weights (uniform or harmonic)")
Expand All @@ -44,6 +45,7 @@ def txCompare(
anova=False
logit=True
for pos, pos_dict in enumerate(ref_pos_list):
logger.trace(f"Processing position {pos}")
# Filter out low coverage positions
lowcov = False
for cond_dict in pos_dict["data"].values():
Expand All @@ -54,6 +56,7 @@ def txCompare(

# Perform stat tests if not low cov
if lowcov:
logger.trace(f"Position {pos} is low coverage, skipping")
n_lowcov+=1
else:
res = dict()
Expand All @@ -67,6 +70,7 @@ def txCompare(
condition2_dwell = np.concatenate([ rep['dwell'] for rep in data[condition_labels[1]].values() ])

for met in methods:
logger.trace(f"Running {met} test on position {pos}")
if met in ["MW", "KS", "TT"] :
try:
pvalues = nonparametric_test(condition1_intensity, condition2_intensity, condition1_dwell, condition2_dwell, method=met)
Expand All @@ -92,10 +96,12 @@ def txCompare(
tests.add("GMM_logit_pvalue")

# Calculate shift statistics
logger.trace(f"Calculatign shift stats for {pos}")
res['shift_stats'] = shift_stats(condition1_intensity, condition2_intensity, condition1_dwell, condition2_dwell)
# Save results in main
logger.trace(f"Saving test results for {pos}")
ref_pos_list[pos]['txComp'] = res
logger.debug("Skipping {} positions because not present in all samples with sufficient coverage".format(n_lowcov))
logger.debug("Skipped {} positions because not present in all samples with sufficient coverage".format(n_lowcov))

# Combine pvalue within a given sequence context
if sequence_context > 0:
Expand Down
Loading

0 comments on commit 3ca757e

Please sign in to comment.