erange version 4.0a dev release
[erange.git] / partition.py
index 89148fd8a5cfe1b075650e328d4c6e4180cdcada..cbaa64336f35ce02a2bd75e68c53a9be5bcbb1ff 100755 (executable)
@@ -2,7 +2,7 @@
 #  partition.py
 #  ENRAGE
 #
-""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [--minFeature bp] [--padregion bp] [--mergeregion bp] [--nomerge] [--log altlogfile] [--locid] [--ignorerandom] [--chromField fieldNum]
+""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]
            where the regionfiles must be comma-separated with no white space
            -minFeature controls the size of the smallest partition
 """
@@ -13,10 +13,12 @@ try:
 except:
     pass
 
-import sys, string, optparse
-from commoncode import getMergedRegions, writeLog
+import sys
+import string
+import optparse
+from commoncode import getMergedRegions, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
 
-versionString = '%s: version 2.0' % sys.argv[0]
+versionString = "partition: version 2.1"
 print versionString
 
 
@@ -26,24 +28,7 @@ def main(argv=None):
 
     usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--minFeature", type="int", dest="minFeature",
-                      help="size of smallest partition")
-    parser.add_option("--chromField", type="int", dest="cField",
-                      help="num chromosome fields")
-    parser.add_option("--padregion", type="int", dest="padregion",
-                      help="padding on each side of region")
-    parser.add_option("--mergeregion", type="int", dest="mergeregion",
-                      help="bp threshold to merge regions")
-    parser.add_option("--nomerge", action="store_false", dest="merging",
-                      help="do not merge regions")
-    parser.add_option("--log", dest="logfilename",
-                      help="log file")
-    parser.add_option("--locID", action="store_true", dest="locID",
-                      help="use location as region ID")
-    parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
-                      help="ignore 'random' chromosomes")
-    parser.set_defaults(minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log")
+    parser = getParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -66,10 +51,51 @@ def main(argv=None):
     if options.ignoreRandom:
         print "ignoring 'random' chromosomes"
 
-    partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField, options.padregion, options.locID, options.ignoreRandom, options.mergeregion, options.merging, options.logfilename)
+    partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField,
+              options.padregion, options.locID, options.ignoreRandom, options.mergeregion,
+              options.merging, options.logfilename)
 
 
-def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log"):
+def getParser(usage):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--minFeature", type="int", dest="minFeature",
+                      help="size of smallest partition")
+    parser.add_option("--chromField", type="int", dest="cField",
+                      help="num chromosome fields")
+    parser.add_option("--padregion", type="int", dest="padregion",
+                      help="padding on each side of region")
+    parser.add_option("--mergeregion", type="int", dest="mergeregion",
+                      help="bp threshold to merge regions")
+    parser.add_option("--nomerge", action="store_false", dest="merging",
+                      help="do not merge regions")
+    parser.add_option("--log", dest="logfilename",
+                      help="log file")
+    parser.add_option("--locID", action="store_true", dest="locID",
+                      help="use location as region ID")
+    parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
+                      help="ignore 'random' chromosomes")
+
+    configParser = getConfigParser()
+    section = "partition"
+    minFeature = getConfigIntOption(configParser, section, "minFeature", 25)
+    cField = getConfigIntOption(configParser, section, "cField", 1)
+    padregion = getConfigIntOption(configParser, section, "padregion", 1)
+    locID = getConfigBoolOption(configParser, section, "locID", False)
+    ignoreRandom = getConfigBoolOption(configParser, section, "ignoreRandom", False)
+    mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0)
+    merging = getConfigBoolOption(configParser, section, "merging", True)
+    logfilename = getConfigOption(configParser, section, "logfilename", "partition.log")
+
+    parser.set_defaults(minFeature=minFeature, cField=cField, padregion=padregion, locID=locID,
+                        ignoreRandom=ignoreRandom, mergeregion=mergeregion, merging=merging,
+                        logfilename=logfilename)
+
+    return parser
+
+
+def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0,
+              locID=False, ignoreRandom=False, mergeregion=0, merging=True,
+              logfilename="partition.log"):
 
     writeLog(logfilename, versionString, string.join(sys.argv[1:]))
 
@@ -78,7 +104,10 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg
     numRegions = len(regionFileList)
     chromList = []
     for regionID in range(numRegions):
-        allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion,  minHits=-1, fullChrom = True, verbose = True, chromField = cField, doMerge=merging, pad=padregion)
+        allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion,
+                                                    minHits=-1, fullChrom=True, verbose=True, chromField=cField,
+                                                    doMerge=merging, pad=padregion)
+
         for achrom in allregionsDict[regionID]:
             if achrom not in chromList:
                 chromList.append(achrom)
@@ -88,16 +117,16 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg
     chromList = sorted(chromList)
 
     for chrom in chromList:
-        if ignoreRandom and 'random' in chrom:
+        if ignoreRandom and "random" in chrom:
             continue
 
         outregionDict[chrom] = []
         pointList = []
         for regionID in range(numRegions):
             if chrom in allregionsDict[regionID]:
-                for (rstart, rstop, rlength) in allregionsDict[regionID][chrom]:
-                    pointList.append(rstart)
-                    pointList.append(rstop)
+                for region in allregionsDict[regionID][chrom]:
+                    pointList.append(region.start)
+                    pointList.append(region.stop)
 
         pointList.sort()
         start = 0
@@ -106,20 +135,22 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg
                 outregionDict[chrom].append((start, point - 1, point - 1 - start))
                 start = point
 
-    outfile = open(outfilename, 'w')
+    outfile = open(outfilename, "w")
     if locID:
-        outfile.write('#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n')
+        outfile.write("#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n")
     else:
-        outfile.write('#labelID\tchrom\tstart\tstop\tlength_kb\n')
+        outfile.write("#labelID\tchrom\tstart\tstop\tlength_kb\n")
 
     index = 0
     for chrom in outregionDict:
         for (start, stop, length) in outregionDict[chrom]:
             index += 1
             if locID:
-                outfile.write("%s:%d-%d\t%s\t%d\t%d\t%.3f\n" % (chrom, start, stop, chrom, start, stop, length/1000.))
+                label = "%s:%d-%d" % (chrom, start, stop)
             else:
-                outfile.write("%s%d\t%s\t%d\t%d\t%.3f\n" % (mergeID, index, chrom, start, stop, length/1000.))
+                label = "%s%d" % (mergeID, index)
+
+            outfile.write("%s\t%s\t%d\t%d\t%.3f\n" % (label, chrom, start, stop, length/1000.))
 
     message = "%s was partitioned into %d regions" % (mergeID, index)
     print message