Merge pull request #17 from jpiper/misc_fixes

Misc fixes
jpiper · Jul 23, 2016 · 1dbb5d6 · 1dbb5d6
2 parents 2416572 + 864fc2d
commit 1dbb5d6
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 34 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,15 +1,19 @@
+sudo: false
 language: python
 python:
-    - 2.6
-    - 2.7
-    - 3.3
+    - "2.7"
+    - "3.3"
+    - "3.4"
+    - "3.5"
+addons:
+  apt:
+    packages:
+    - libopenblas-dev
+    - liblapack-dev
+    - libatlas-dev
 before_install:
-    - sudo apt-get update -qq
-    - sudo apt-get install -qq libopenblas-dev liblapack-dev libatlas-dev
     - pip install numpy
     - pip install coveralls
-install:
-    - python setup.py install
 script:
     - coverage run --source=pyDNase setup.py test
 after_success:

diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,10 @@
+0.2.5 - 2016-07-23
+==================
+* IMPORTANT: Python 2.6 and 3.2 support dropped. 2.7, 3.3, 3.4, and 3.5 are where it's at.
+* BUG: pyDNase now ignores unmapped reads in the BAM input file. NOTE: Will not ignore secondary alignments in paired reads. Filter these out beforehand if you need this behaviour for the timebeing.
+* BUG: Fix `dnase_bias_estimator.py` script arguments.
+* BUG: Fix `frd_limit` flag not being able to be set to 0 in `wellington_footprints.py` and `wellington_bootstrap.py`.
+
 0.2.4 - 2016-05-30
 ==================
 * BUG: Update author’s contact details

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -42,8 +42,8 @@ In order to install :mod:`pyDNase`, the following software is required. Most peo
    .. note::
         If you're using another \*NIX distro, I assume you know what you're doing.
 
-#. Python_ >= 2.6 (including Python 3!)
-    * This will come installed with OS X or any respectable \*NIX distro.
+#. Python_ 2.7, 3.3, 3.4, or 3.5
+    * One of these will usually come installed with OS X or any respectable \*NIX distro.
 
 #. pip_:
         Used for automated installation of Python packages. If you don't already have pip_ installed, you can use the following command to install it ::

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -100,7 +100,7 @@ You should really take some time to read through the settings in the documentati
 
 I often get the comment that footprints from are too stringent. This is a common question - if you have low read depths you might need to adjust the ``-fdrlimit`` parameter to something less stringent like ``"-10"`` or ``"-5"`` (the closer to 0, the more liberal), which sets the mimimum amount of evidence required to support the alternate hypothesis of there being a footprint present.
 
-.. tip:: You can set ``-fdrlimit`` to ``-0.01`` if you want to disable this feature altogether, and then sort the footprints by their Wellington scores (e.g. ``sort -nk 5 <fp.bed> > <out.bed>``) and then visualise the footprints choose your threshold this way if you are unsure.
+.. tip:: You can set ``-fdrlimit`` to ``0`` if you want to disable this feature altogether, and then sort the footprints by their Wellington scores (e.g. ``sort -nk 5 <fp.bed> > <out.bed>``) and then visualise the footprints choose your threshold this way if you are unsure.
 
 
 Interpreting Wellington's Output

diff --git a/pyDNase/__init__.py b/pyDNase/__init__.py
@@ -77,14 +77,15 @@ def __addCutsToCache(self,chrom,start,end):
             end (int): The end of the interval
         """
         for alignedread in self.samfile.fetch(chrom, max(start, 0), end):
-            if alignedread.is_reverse:
-                a = int(alignedread.aend)
-                if a <= end +1:
-                    self.cutCache[chrom]["-"][a] = self.cutCache[chrom]["-"].get(a, 0) + 1
-            else:
-                a = int(alignedread.pos) -1
-                if a >= start:
-                    self.cutCache[chrom]["+"][a] = self.cutCache[chrom]["+"].get(a, 0) + 1
+            if not alignedread.is_unmapped:
+                if alignedread.is_reverse:
+                    a = int(alignedread.reference_end)
+                    if a <= end +1:
+                        self.cutCache[chrom]["-"][a] = self.cutCache[chrom]["-"].get(a, 0) + 1
+                else:
+                    a = int(alignedread.reference_start) -1
+                    if a >= start:
+                        self.cutCache[chrom]["+"][a] = self.cutCache[chrom]["+"].get(a, 0) + 1
         self.lookupCache[chrom].append(start)
 
     def __lookupReadsUsingCache(self,startbp,endbp,chrom):
@@ -118,14 +119,15 @@ def __lookupReadsWithoutCache(self,startbp,endbp,chrom):
         tempcutf = {}
         tempcutr = {}
         for alignedread in self.samfile.fetch(chrom, max(startbp, 0), endbp):
-            if alignedread.is_reverse:
-                a = int(alignedread.aend)
-                if a <= endbp +1:
-                    tempcutr[a] = tempcutr.get(a, 0) + 1
-            else:
-                a = int(alignedread.pos) - 1
-                if a >= startbp:
-                    tempcutf[a] =tempcutf.get(a, 0) + 1
+            if not alignedread.is_unmapped:
+                if alignedread.is_reverse:
+                    a = int(alignedread.reference_end)
+                    if a <= endbp +1:
+                        tempcutr[a] = tempcutr.get(a, 0) + 1
+                else:
+                    a = int(alignedread.reference_start) - 1
+                    if a >= startbp:
+                        tempcutf[a] = tempcutf.get(a, 0) + 1
         fwCutArray  = [tempcutf.get(i, 0) for i in range(startbp + self.loffset ,endbp + self.loffset)]
         revCutArray = [tempcutr.get(i, 0) for i in range(startbp + self.roffset, endbp + self.roffset)]
         return {"+":fwCutArray,"-":revCutArray}

diff --git a/pyDNase/_version.py b/pyDNase/_version.py
@@ -1 +1 @@
-__version__ = "0.2.4"
+__version__ = "0.2.5"
diff --git a/pyDNase/scripts/dnase_bias_estimator.py b/pyDNase/scripts/dnase_bias_estimator.py
@@ -86,7 +86,7 @@ def genome_dic(g_file):
 	parser.add_argument("regions", help="BED file of the regions you want to exclude from calculating the bias. This is usually the DHSs.")
 	parser.add_argument("reads", help="The sorted, indexed BAM file containing the DNase-seq data")
 	parser.add_argument("genome_sequence", help="The sorted, indexed FASTA file containing the genome sequence")
-	parser.add_argument("genomesize", help="The .chrom.sizes file containing chromosome sizes generated using something like  \"mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \"select chrom, size from hg19.chromInfo\"  > hg19.chrom.sizes\"")
+	parser.add_argument("genome_size", help="The .chrom.sizes file containing chromosome sizes generated using something like  \"mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \"select chrom, size from hg19.chromInfo\"  > hg19.chrom.sizes\"")
 	parser.add_argument("output", help="output file prefix to write the observed/expected ratios to (will append .txt and .pickle)")
 	args  = parser.parse_args()
 
@@ -137,6 +137,6 @@ def genome_dic(g_file):
 	totalsum = float(sum(enriched.values()))
 	whatdic = {key:{'forward':val/totalsum,'reverse':enriched[rev_comp(key)]/totalsum} for key,val in enriched.iteritems()}
 	with open(outfile + ".pickle", "w") as bias_file:
-	    pickle.dump(whatdic,bias_file)
+		pickle.dump(whatdic,bias_file)
 
 	os.remove(bed_file_for_6mers)
diff --git a/pyDNase/scripts/wellington_bootstrap.py b/pyDNase/scripts/wellington_bootstrap.py
@@ -171,7 +171,7 @@ def __call__(self):
     raise RuntimeError("Footprint sizes must be supplied as from,to,step")
 
 assert 0 < args.FDR_cutoff < 1, "FDR must be between 0 and 1"
-assert args.FDR_limit < 0, "FDR limit must be less than 0"
+assert args.FDR_limit <= 0, "FDR limit must be less than or equal to 0 (to disable)"
 
 # Treatment
 reads2 = pyDNase.BAMHandler(args.treatment_bam, caching=0, ATAC=args.A)

diff --git a/pyDNase/scripts/wellington_footprints.py b/pyDNase/scripts/wellington_footprints.py
@@ -85,7 +85,7 @@ def xrange_from_string(range_string):
     raise RuntimeError("p-value cutoffs must be supplied as a string of numbers separated by commas")
 
 assert 0 < clargs.FDR_cutoff < 1, "FDR must be between 0 and 1"
-assert clargs.FDR_limit < 0, "FDR limit must be less than 0"
+assert clargs.FDR_limit <= 0, "FDR limit must be less than or equal to 0 (to disable)"
 assert len([f for f in os.listdir(clargs.outputdir) if f[0] != "."]) == 0, "output directory {0} is not empty!".format(clargs.outputdir)
 
 if not clargs.output_prefix:

diff --git a/setup.py b/setup.py
@@ -25,10 +25,10 @@
     ],
 
     install_requires=[
-        # Not enforcing versions for numpy and matplotlib as they can be a bitch to upgrade
+        # Not enforcing versions for numpy as it can be a bitch to upgrade
         "numpy", # Tested on >=1.5.0
-        "matplotlib", # Tested on >=1.2
-        "pysam >= 0.7.5",
+        "matplotlib < 2.0.0", # mpl > 2.0 only works on py3
+        "pysam >= 0.8.1",
         "clint >= 0.3.2",
     ],