erange version 4.0a dev release
[erange.git] / getallNRSE.py
index c2e639f5c09b1ad4660256f49fe3d4aab7cc9bab..c02bab11ccfcc53c059d4796b1029d796f55bae8 100755 (executable)
@@ -9,11 +9,12 @@ except:
 from cistematic.core import complement
 from cistematic.core.motif import Motif
 from cistematic.genomes import Genome
-from commoncode import readDataset, getMergedRegions, findPeak
+from commoncode import getMergedRegions, findPeak, getConfigParser, getConfigOption, getConfigBoolOption, getConfigFloatOption
+import ReadDataset
 from pylab import *
 import matplotlib
 
-print '%s: version 3.4' % sys.argv[0]
+print 'getallNRSE: version 3.5'
 
 def main(argv=None):
     if not argv:
@@ -21,21 +22,7 @@ def main(argv=None):
 
     usage = "usage: python %s genome regionfile siteOutfile [options]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--dataset", dest="chipfilename")
-    parser.add_option("--min", type="float", dest="minHeight")
-    parser.add_option("--minfraction", type="float", dest="minFraction")
-    parser.add_option("--plot", dest="plotname")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.add_option("--raw", action="store_false", dest="normalize")
-    parser.add_option("--verbose", action="store_true", dest="doVerbose")
-    parser.add_option("--markov1", action="store_true", dest="doMarkov1")
-    parser.add_option("--peakdist", type="int", dest="maxpeakdist")
-    parser.add_option("--fullOnly", action="store_true", dest="fullOnly")
-    parser.add_option("--motifdir", dest="motifDir")
-    parser.set_defaults(chipfilename="", minHeight=-2., minFraction=-2., plotname="",
-                        doCache=False, normalize=True, doVerbose=False, doMarkov1=False,
-                        maxpeakdist=None, fullOnly=False, motifDir="./")
+    parser = getParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -53,6 +40,42 @@ def main(argv=None):
                options.motifDir)
 
 
+def getParser(usage):
+
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--dataset", dest="chipfilename")
+    parser.add_option("--min", type="float", dest="minHeight")
+    parser.add_option("--minfraction", type="float", dest="minFraction")
+    parser.add_option("--plot", dest="plotname")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+    parser.add_option("--raw", action="store_false", dest="normalize")
+    parser.add_option("--verbose", action="store_true", dest="doVerbose")
+    parser.add_option("--markov1", action="store_true", dest="doMarkov1")
+    parser.add_option("--peakdist", type="int", dest="maxpeakdist")
+    parser.add_option("--fullOnly", action="store_true", dest="fullOnly")
+    parser.add_option("--motifdir", dest="motifDir")
+
+    configParser = getConfigParser()
+    section = "getallNRSE"
+    chipfilename = getConfigOption(configParser, section, "chipfilename", "")
+    minHeight = getConfigFloatOption(configParser, section, "minHeight", -2.)
+    minFraction = getConfigFloatOption(configParser, section, "minFraction", -2.)
+    plotname = getConfigOption(configParser, section, "plotname", "")
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+    normalize = getConfigBoolOption(configParser, section, "normalize", True)
+    doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False)
+    doMarkov1 = getConfigBoolOption(configParser, section, "doMarkov1", False)
+    maxpeakdist = getConfigOption(configParser, section, "maxpeakdist", None)
+    fullOnly = getConfigBoolOption(configParser, section, "fullOnly", False)
+    motifDir = getConfigOption(configParser, section, "motifDir", "./")
+
+    parser.set_defaults(chipfilename=chipfilename, minHeight=minHeight, minFraction=minFraction, plotname=plotname,
+                        doCache=doCache, normalize=normalize, doVerbose=doVerbose, doMarkov1=doMarkov1,
+                        maxpeakdist=maxpeakdist, fullOnly=fullOnly, motifDir=motifDir)
+
+    return parser
+
+
 def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2.,
                minFraction=-2., plotname="", doCache=False, normalize=True,
                doVerbose=False, doMarkov1=False, maxpeakdist=None, fullOnly=False,
@@ -69,7 +92,7 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2.,
     doDataset = False
     normalizeBy = 1
     if chipfilename:
-        hitRDS = readDataset(chipfilename, verbose=doVerbose, cache=doCache)
+        hitRDS = ReadDataset.ReadDataset(chipfilename, verbose=doVerbose, cache=doCache)
         doDataset = True
         if normalize:
             normalizeBy = len(hitRDS) / 1000000.
@@ -107,8 +130,8 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2.,
         if "rand" in rchrom or "M" in rchrom or "hap" in rchrom:
             continue
 
-        for (start, stop, length) in regions[rchrom]:
-            regionList.append((rchrom, start, length))
+        for region in regions[rchrom]:
+            regionList.append((rchrom, region.start, region.length))
 
     notFoundIndex = 0
     currentChrom = ""
@@ -120,12 +143,14 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2.,
                 hitDict = hitRDS.getReadsDict(chrom=fullchrom, withWeight=True, doMulti=True)
                 currentChrom = rchrom
 
-            (topPos, numHits, smoothArray, numPlus) = findPeak(hitDict[rchrom], start, length, doWeight=True)
+            peak = findPeak(hitDict[rchrom], start, length, doWeight=True)
+            topPos = peak.topPos
+            numHits = peak.numHits
             if len(topPos) == 0:
                 print "topPos error"
 
             peakpos = topPos[0]
-            peakscore = smoothArray[peakpos]
+            peakscore = peak.smoothArray[peakpos]
             if peakscore == 0.:
                 peakscore = -1.