# partition.py
# ENRAGE
#
-""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [--minFeature bp] [--padregion bp] [--mergeregion bp] [--nomerge] [--log altlogfile] [--locid] [--ignorerandom] [--chromField fieldNum]
+""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]
where the regionfiles must be comma-separated with no white space
-minFeature controls the size of the smallest partition
"""
except:
pass
-import sys, string, optparse
-from commoncode import getMergedRegions, writeLog
+import sys
+import string
+import optparse
+from commoncode import getMergedRegions, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
-versionString = '%s: version 2.0' % sys.argv[0]
+versionString = "partition: version 2.1"
print versionString
usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--minFeature", type="int", dest="minFeature",
- help="size of smallest partition")
- parser.add_option("--chromField", type="int", dest="cField",
- help="num chromosome fields")
- parser.add_option("--padregion", type="int", dest="padregion",
- help="padding on each side of region")
- parser.add_option("--mergeregion", type="int", dest="mergeregion",
- help="bp threshold to merge regions")
- parser.add_option("--nomerge", action="store_false", dest="merging",
- help="do not merge regions")
- parser.add_option("--log", dest="logfilename",
- help="log file")
- parser.add_option("--locID", action="store_true", dest="locID",
- help="use location as region ID")
- parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
- help="ignore 'random' chromosomes")
- parser.set_defaults(minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log")
+ parser = getParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
if options.ignoreRandom:
print "ignoring 'random' chromosomes"
- partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField, options.padregion, options.locID, options.ignoreRandom, options.mergeregion, options.merging, options.logfilename)
+ partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField,
+ options.padregion, options.locID, options.ignoreRandom, options.mergeregion,
+ options.merging, options.logfilename)
-def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log"):
+def getParser(usage):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--minFeature", type="int", dest="minFeature",
+ help="size of smallest partition")
+ parser.add_option("--chromField", type="int", dest="cField",
+ help="num chromosome fields")
+ parser.add_option("--padregion", type="int", dest="padregion",
+ help="padding on each side of region")
+ parser.add_option("--mergeregion", type="int", dest="mergeregion",
+ help="bp threshold to merge regions")
+ parser.add_option("--nomerge", action="store_false", dest="merging",
+ help="do not merge regions")
+ parser.add_option("--log", dest="logfilename",
+ help="log file")
+ parser.add_option("--locID", action="store_true", dest="locID",
+ help="use location as region ID")
+ parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
+ help="ignore 'random' chromosomes")
+
+ configParser = getConfigParser()
+ section = "partition"
+ minFeature = getConfigIntOption(configParser, section, "minFeature", 25)
+ cField = getConfigIntOption(configParser, section, "cField", 1)
+ padregion = getConfigIntOption(configParser, section, "padregion", 1)
+ locID = getConfigBoolOption(configParser, section, "locID", False)
+ ignoreRandom = getConfigBoolOption(configParser, section, "ignoreRandom", False)
+ mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0)
+ merging = getConfigBoolOption(configParser, section, "merging", True)
+ logfilename = getConfigOption(configParser, section, "logfilename", "partition.log")
+
+ parser.set_defaults(minFeature=minFeature, cField=cField, padregion=padregion, locID=locID,
+ ignoreRandom=ignoreRandom, mergeregion=mergeregion, merging=merging,
+ logfilename=logfilename)
+
+ return parser
+
+
+def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0,
+ locID=False, ignoreRandom=False, mergeregion=0, merging=True,
+ logfilename="partition.log"):
writeLog(logfilename, versionString, string.join(sys.argv[1:]))
numRegions = len(regionFileList)
chromList = []
for regionID in range(numRegions):
- allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion, minHits=-1, fullChrom = True, verbose = True, chromField = cField, doMerge=merging, pad=padregion)
+ allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion,
+ minHits=-1, fullChrom=True, verbose=True, chromField=cField,
+ doMerge=merging, pad=padregion)
+
for achrom in allregionsDict[regionID]:
if achrom not in chromList:
chromList.append(achrom)
chromList = sorted(chromList)
for chrom in chromList:
- if ignoreRandom and 'random' in chrom:
+ if ignoreRandom and "random" in chrom:
continue
outregionDict[chrom] = []
pointList = []
for regionID in range(numRegions):
if chrom in allregionsDict[regionID]:
- for (rstart, rstop, rlength) in allregionsDict[regionID][chrom]:
- pointList.append(rstart)
- pointList.append(rstop)
+ for region in allregionsDict[regionID][chrom]:
+ pointList.append(region.start)
+ pointList.append(region.stop)
pointList.sort()
start = 0
outregionDict[chrom].append((start, point - 1, point - 1 - start))
start = point
- outfile = open(outfilename, 'w')
+ outfile = open(outfilename, "w")
if locID:
- outfile.write('#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n')
+ outfile.write("#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n")
else:
- outfile.write('#labelID\tchrom\tstart\tstop\tlength_kb\n')
+ outfile.write("#labelID\tchrom\tstart\tstop\tlength_kb\n")
index = 0
for chrom in outregionDict:
for (start, stop, length) in outregionDict[chrom]:
index += 1
if locID:
- outfile.write("%s:%d-%d\t%s\t%d\t%d\t%.3f\n" % (chrom, start, stop, chrom, start, stop, length/1000.))
+ label = "%s:%d-%d" % (chrom, start, stop)
else:
- outfile.write("%s%d\t%s\t%d\t%d\t%.3f\n" % (mergeID, index, chrom, start, stop, length/1000.))
+ label = "%s%d" % (mergeID, index)
+
+ outfile.write("%s\t%s\t%d\t%d\t%.3f\n" % (label, chrom, start, stop, length/1000.))
message = "%s was partitioned into %d regions" % (mergeID, index)
print message