erange version 4.0a dev release
[erange.git] / getallsites.py
index 39335e481f3b30be49f46f4889bf2435cfa9b109..4d6707615d9d7ea95c5caf64a48c532e934c7e5f 100755 (executable)
@@ -8,9 +8,10 @@ except:
 from cistematic.core.motif import Motif, hasMotifExtension
 from cistematic.core import complement
 from cistematic.genomes import Genome
-from commoncode import readDataset, getMergedRegions, findPeak
+from commoncode import getMergedRegions, findPeak, getConfigParser, getConfigOption, getConfigBoolOption
+import ReadDataset
 
-print "%prog: version 2.4"
+print "getallsites: version 2.5"
 
 
 def main(argv=None):
@@ -19,21 +20,7 @@ def main(argv=None):
 
     usage = "usage: python %prog genome motifFile motThreshold regionfile siteOutfile [options]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--dataset", dest="chipfilename")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.add_option("--best", action="store_true", dest="bestOnly",
-                      help="only report the best position for each region")
-    parser.add_option("--usepeak", action="store_true", dest="usePeak",
-                      help="use peak position and height from regions file")
-    parser.add_option("--printseq", action="store_true", dest="printSeq")
-    parser.add_option("--nomerge", action="store_true", dest="noMerge")
-    parser.add_option("--markov1", action="store_true", dest="doMarkov1")
-    parser.add_option("--rank", type="int", dest="useRank",
-                      help="return region ranking based on peak height ranking [requires --usepeak]")
-    parser.set_defaults(chipfilename="", doCache=False, bestOnly=False, usePeak=False,
-                        printSeq=False, doMarkov1=False, useRank=False, noMerge=False)
-
+    parser = getParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 5:
@@ -51,6 +38,37 @@ def main(argv=None):
                 options.useRank, options.noMerge)
 
 
+def getParser(usage):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--dataset", dest="chipfilename")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+    parser.add_option("--best", action="store_true", dest="bestOnly",
+                      help="only report the best position for each region")
+    parser.add_option("--usepeak", action="store_true", dest="usePeak",
+                      help="use peak position and height from regions file")
+    parser.add_option("--printseq", action="store_true", dest="printSeq")
+    parser.add_option("--nomerge", action="store_true", dest="noMerge")
+    parser.add_option("--markov1", action="store_true", dest="doMarkov1")
+    parser.add_option("--rank", type="int", dest="useRank",
+                      help="return region ranking based on peak height ranking [requires --usepeak]")
+
+    configParser = getConfigParser()
+    section = "getallsites"
+    chipfilename = getConfigOption(configParser, section, "chipfilename", "")
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+    bestOnly = getConfigBoolOption(configParser, section, "bestOnly", False)
+    usePeak = getConfigBoolOption(configParser, section, "usePeak", False)
+    printSeq = getConfigBoolOption(configParser, section, "printSeq", False)
+    doMarkov1 = getConfigBoolOption(configParser, section, "doMarkov1", False)
+    useRank = getConfigBoolOption(configParser, section, "useRank", False)
+    noMerge = getConfigBoolOption(configParser, section, "noMerge", False)
+
+    parser.set_defaults(chipfilename=chipfilename, doCache=doCache, bestOnly=bestOnly, usePeak=usePeak,
+                        printSeq=printSeq, doMarkov1=doMarkov1, useRank=useRank, noMerge=noMerge)
+
+    return parser
+
+
 def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chipfilename="",
                 doCache=False, bestOnly=False, usePeak=False, printSeq=False, doMarkov1=False,
                 useRank=False, noMerge=False):
@@ -90,7 +108,7 @@ def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chip
         doRDS = True
 
     if doRDS:
-        hitRDS = readDataset(chipfilename, verbose = True, cache=doCache)
+        hitRDS = ReadDataset.ReadDataset(chipfilename, verbose = True, cache=doCache)
 
     outfile = open(outfilename, "w")
 
@@ -101,11 +119,11 @@ def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chip
             continue
 
         if usePeak:
-            for (start, stop, length, peakPos, peakHeight) in regions[chrom]:
-                regionList.append((peakHeight, chrom, start, length, peakPos))
+            for region in regions[chrom]:
+                regionList.append((region.peakHeight, chrom, region.start, region.length, region.peakPos))
         else:
-            for (start, stop, length) in regions[chrom]:
-                regionList.append((chrom, start, length))
+            for region in regions[chrom]:
+                regionList.append((chrom, region.start, region.length))
 
     if usePeak:
         regionList.sort()