diff --git a/.travis.yml b/.travis.yml index cfce93c..c47981e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,19 @@ +sudo: false language: python python: - - 2.6 - - 2.7 - - 3.3 + - "2.7" + - "3.3" + - "3.4" + - "3.5" +addons: + apt: + packages: + - libopenblas-dev + - liblapack-dev + - libatlas-dev before_install: - - sudo apt-get update -qq - - sudo apt-get install -qq libopenblas-dev liblapack-dev libatlas-dev - pip install numpy - pip install coveralls -install: - - python setup.py install script: - coverage run --source=pyDNase setup.py test after_success: diff --git a/CHANGES b/CHANGES index 57eab59..15bc043 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +0.2.5 - 2016-07-23 +================== +* IMPORTANT: Python 2.6 and 3.2 support dropped. 2.7, 3.3, 3.4, and 3.5 are where it's at. +* BUG: pyDNase now ignores unmapped reads in the BAM input file. NOTE: Will not ignore secondary alignments in paired reads. Filter these out beforehand if you need this behaviour for the timebeing. +* BUG: Fix `dnase_bias_estimator.py` script arguments. +* BUG: Fix `frd_limit` flag not being able to be set to 0 in `wellington_footprints.py` and `wellington_bootstrap.py`. + 0.2.4 - 2016-05-30 ================== * BUG: Update author’s contact details diff --git a/docs/installation.rst b/docs/installation.rst index 6f1a3e4..5c5753d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -42,8 +42,8 @@ In order to install :mod:`pyDNase`, the following software is required. Most peo .. note:: If you're using another \*NIX distro, I assume you know what you're doing. -#. Python_ >= 2.6 (including Python 3!) - * This will come installed with OS X or any respectable \*NIX distro. +#. Python_ 2.7, 3.3, 3.4, or 3.5 + * One of these will usually come installed with OS X or any respectable \*NIX distro. #. pip_: Used for automated installation of Python packages. If you don't already have pip_ installed, you can use the following command to install it :: diff --git a/docs/tutorial.rst b/docs/tutorial.rst index efd10f9..8a2ad01 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -100,7 +100,7 @@ You should really take some time to read through the settings in the documentati I often get the comment that footprints from are too stringent. This is a common question - if you have low read depths you might need to adjust the ``-fdrlimit`` parameter to something less stringent like ``"-10"`` or ``"-5"`` (the closer to 0, the more liberal), which sets the mimimum amount of evidence required to support the alternate hypothesis of there being a footprint present. -.. tip:: You can set ``-fdrlimit`` to ``-0.01`` if you want to disable this feature altogether, and then sort the footprints by their Wellington scores (e.g. ``sort -nk 5 > ``) and then visualise the footprints choose your threshold this way if you are unsure. +.. tip:: You can set ``-fdrlimit`` to ``0`` if you want to disable this feature altogether, and then sort the footprints by their Wellington scores (e.g. ``sort -nk 5 > ``) and then visualise the footprints choose your threshold this way if you are unsure. Interpreting Wellington's Output diff --git a/pyDNase/__init__.py b/pyDNase/__init__.py index ebb7f12..8f98f8a 100644 --- a/pyDNase/__init__.py +++ b/pyDNase/__init__.py @@ -77,14 +77,15 @@ def __addCutsToCache(self,chrom,start,end): end (int): The end of the interval """ for alignedread in self.samfile.fetch(chrom, max(start, 0), end): - if alignedread.is_reverse: - a = int(alignedread.aend) - if a <= end +1: - self.cutCache[chrom]["-"][a] = self.cutCache[chrom]["-"].get(a, 0) + 1 - else: - a = int(alignedread.pos) -1 - if a >= start: - self.cutCache[chrom]["+"][a] = self.cutCache[chrom]["+"].get(a, 0) + 1 + if not alignedread.is_unmapped: + if alignedread.is_reverse: + a = int(alignedread.reference_end) + if a <= end +1: + self.cutCache[chrom]["-"][a] = self.cutCache[chrom]["-"].get(a, 0) + 1 + else: + a = int(alignedread.reference_start) -1 + if a >= start: + self.cutCache[chrom]["+"][a] = self.cutCache[chrom]["+"].get(a, 0) + 1 self.lookupCache[chrom].append(start) def __lookupReadsUsingCache(self,startbp,endbp,chrom): @@ -118,14 +119,15 @@ def __lookupReadsWithoutCache(self,startbp,endbp,chrom): tempcutf = {} tempcutr = {} for alignedread in self.samfile.fetch(chrom, max(startbp, 0), endbp): - if alignedread.is_reverse: - a = int(alignedread.aend) - if a <= endbp +1: - tempcutr[a] = tempcutr.get(a, 0) + 1 - else: - a = int(alignedread.pos) - 1 - if a >= startbp: - tempcutf[a] =tempcutf.get(a, 0) + 1 + if not alignedread.is_unmapped: + if alignedread.is_reverse: + a = int(alignedread.reference_end) + if a <= endbp +1: + tempcutr[a] = tempcutr.get(a, 0) + 1 + else: + a = int(alignedread.reference_start) - 1 + if a >= startbp: + tempcutf[a] = tempcutf.get(a, 0) + 1 fwCutArray = [tempcutf.get(i, 0) for i in range(startbp + self.loffset ,endbp + self.loffset)] revCutArray = [tempcutr.get(i, 0) for i in range(startbp + self.roffset, endbp + self.roffset)] return {"+":fwCutArray,"-":revCutArray} diff --git a/pyDNase/_version.py b/pyDNase/_version.py index 7a17bdd..845be45 100644 --- a/pyDNase/_version.py +++ b/pyDNase/_version.py @@ -1 +1 @@ -__version__ = "0.2.4" \ No newline at end of file +__version__ = "0.2.5" \ No newline at end of file diff --git a/pyDNase/scripts/dnase_bias_estimator.py b/pyDNase/scripts/dnase_bias_estimator.py index f87ccd4..8b674f3 100755 --- a/pyDNase/scripts/dnase_bias_estimator.py +++ b/pyDNase/scripts/dnase_bias_estimator.py @@ -86,7 +86,7 @@ def genome_dic(g_file): parser.add_argument("regions", help="BED file of the regions you want to exclude from calculating the bias. This is usually the DHSs.") parser.add_argument("reads", help="The sorted, indexed BAM file containing the DNase-seq data") parser.add_argument("genome_sequence", help="The sorted, indexed FASTA file containing the genome sequence") - parser.add_argument("genomesize", help="The .chrom.sizes file containing chromosome sizes generated using something like \"mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \"select chrom, size from hg19.chromInfo\" > hg19.chrom.sizes\"") + parser.add_argument("genome_size", help="The .chrom.sizes file containing chromosome sizes generated using something like \"mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \"select chrom, size from hg19.chromInfo\" > hg19.chrom.sizes\"") parser.add_argument("output", help="output file prefix to write the observed/expected ratios to (will append .txt and .pickle)") args = parser.parse_args() @@ -137,6 +137,6 @@ def genome_dic(g_file): totalsum = float(sum(enriched.values())) whatdic = {key:{'forward':val/totalsum,'reverse':enriched[rev_comp(key)]/totalsum} for key,val in enriched.iteritems()} with open(outfile + ".pickle", "w") as bias_file: - pickle.dump(whatdic,bias_file) + pickle.dump(whatdic,bias_file) os.remove(bed_file_for_6mers) diff --git a/pyDNase/scripts/wellington_bootstrap.py b/pyDNase/scripts/wellington_bootstrap.py index 3ecb1aa..3b6cd78 100755 --- a/pyDNase/scripts/wellington_bootstrap.py +++ b/pyDNase/scripts/wellington_bootstrap.py @@ -171,7 +171,7 @@ def __call__(self): raise RuntimeError("Footprint sizes must be supplied as from,to,step") assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1" -assert args.FDR_limit < 0, "FDR limit must be less than 0" +assert args.FDR_limit <= 0, "FDR limit must be less than or equal to 0 (to disable)" # Treatment reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A) diff --git a/pyDNase/scripts/wellington_footprints.py b/pyDNase/scripts/wellington_footprints.py index 266df3e..1fa7d84 100755 --- a/pyDNase/scripts/wellington_footprints.py +++ b/pyDNase/scripts/wellington_footprints.py @@ -85,7 +85,7 @@ def xrange_from_string(range_string): raise RuntimeError("p-value cutoffs must be supplied as a string of numbers separated by commas") assert 0 < clargs.FDR_cutoff < 1, "FDR must be between 0 and 1" -assert clargs.FDR_limit < 0, "FDR limit must be less than 0" +assert clargs.FDR_limit <= 0, "FDR limit must be less than or equal to 0 (to disable)" assert len([f for f in os.listdir(clargs.outputdir) if f[0] != "."]) == 0, "output directory {0} is not empty!".format(clargs.outputdir) if not clargs.output_prefix: diff --git a/setup.py b/setup.py index 70b8b57..2c40d76 100644 --- a/setup.py +++ b/setup.py @@ -25,10 +25,10 @@ ], install_requires=[ - # Not enforcing versions for numpy and matplotlib as they can be a bitch to upgrade + # Not enforcing versions for numpy as it can be a bitch to upgrade "numpy", # Tested on >=1.5.0 - "matplotlib", # Tested on >=1.2 - "pysam >= 0.7.5", + "matplotlib < 2.0.0", # mpl > 2.0 only works on py3 + "pysam >= 0.8.1", "clint >= 0.3.2", ],