X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=partition.py;h=cbaa64336f35ce02a2bd75e68c53a9be5bcbb1ff;hp=89148fd8a5cfe1b075650e328d4c6e4180cdcada;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/partition.py b/partition.py index 89148fd..cbaa643 100755 --- a/partition.py +++ b/partition.py @@ -2,7 +2,7 @@ # partition.py # ENRAGE # -""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [--minFeature bp] [--padregion bp] [--mergeregion bp] [--nomerge] [--log altlogfile] [--locid] [--ignorerandom] [--chromField fieldNum] +""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options] where the regionfiles must be comma-separated with no white space -minFeature controls the size of the smallest partition """ @@ -13,10 +13,12 @@ try: except: pass -import sys, string, optparse -from commoncode import getMergedRegions, writeLog +import sys +import string +import optparse +from commoncode import getMergedRegions, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption -versionString = '%s: version 2.0' % sys.argv[0] +versionString = "partition: version 2.1" print versionString @@ -26,24 +28,7 @@ def main(argv=None): usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--minFeature", type="int", dest="minFeature", - help="size of smallest partition") - parser.add_option("--chromField", type="int", dest="cField", - help="num chromosome fields") - parser.add_option("--padregion", type="int", dest="padregion", - help="padding on each side of region") - parser.add_option("--mergeregion", type="int", dest="mergeregion", - help="bp threshold to merge regions") - parser.add_option("--nomerge", action="store_false", dest="merging", - help="do not merge regions") - parser.add_option("--log", dest="logfilename", - help="log file") - parser.add_option("--locID", action="store_true", dest="locID", - help="use location as region ID") - parser.add_option("--norandom", action="store_true", dest="ignoreRandom", - help="ignore 'random' chromosomes") - parser.set_defaults(minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log") + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -66,10 +51,51 @@ def main(argv=None): if options.ignoreRandom: print "ignoring 'random' chromosomes" - partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField, options.padregion, options.locID, options.ignoreRandom, options.mergeregion, options.merging, options.logfilename) + partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField, + options.padregion, options.locID, options.ignoreRandom, options.mergeregion, + options.merging, options.logfilename) -def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log"): +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--minFeature", type="int", dest="minFeature", + help="size of smallest partition") + parser.add_option("--chromField", type="int", dest="cField", + help="num chromosome fields") + parser.add_option("--padregion", type="int", dest="padregion", + help="padding on each side of region") + parser.add_option("--mergeregion", type="int", dest="mergeregion", + help="bp threshold to merge regions") + parser.add_option("--nomerge", action="store_false", dest="merging", + help="do not merge regions") + parser.add_option("--log", dest="logfilename", + help="log file") + parser.add_option("--locID", action="store_true", dest="locID", + help="use location as region ID") + parser.add_option("--norandom", action="store_true", dest="ignoreRandom", + help="ignore 'random' chromosomes") + + configParser = getConfigParser() + section = "partition" + minFeature = getConfigIntOption(configParser, section, "minFeature", 25) + cField = getConfigIntOption(configParser, section, "cField", 1) + padregion = getConfigIntOption(configParser, section, "padregion", 1) + locID = getConfigBoolOption(configParser, section, "locID", False) + ignoreRandom = getConfigBoolOption(configParser, section, "ignoreRandom", False) + mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0) + merging = getConfigBoolOption(configParser, section, "merging", True) + logfilename = getConfigOption(configParser, section, "logfilename", "partition.log") + + parser.set_defaults(minFeature=minFeature, cField=cField, padregion=padregion, locID=locID, + ignoreRandom=ignoreRandom, mergeregion=mergeregion, merging=merging, + logfilename=logfilename) + + return parser + + +def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0, + locID=False, ignoreRandom=False, mergeregion=0, merging=True, + logfilename="partition.log"): writeLog(logfilename, versionString, string.join(sys.argv[1:])) @@ -78,7 +104,10 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg numRegions = len(regionFileList) chromList = [] for regionID in range(numRegions): - allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion, minHits=-1, fullChrom = True, verbose = True, chromField = cField, doMerge=merging, pad=padregion) + allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion, + minHits=-1, fullChrom=True, verbose=True, chromField=cField, + doMerge=merging, pad=padregion) + for achrom in allregionsDict[regionID]: if achrom not in chromList: chromList.append(achrom) @@ -88,16 +117,16 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg chromList = sorted(chromList) for chrom in chromList: - if ignoreRandom and 'random' in chrom: + if ignoreRandom and "random" in chrom: continue outregionDict[chrom] = [] pointList = [] for regionID in range(numRegions): if chrom in allregionsDict[regionID]: - for (rstart, rstop, rlength) in allregionsDict[regionID][chrom]: - pointList.append(rstart) - pointList.append(rstop) + for region in allregionsDict[regionID][chrom]: + pointList.append(region.start) + pointList.append(region.stop) pointList.sort() start = 0 @@ -106,20 +135,22 @@ def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padreg outregionDict[chrom].append((start, point - 1, point - 1 - start)) start = point - outfile = open(outfilename, 'w') + outfile = open(outfilename, "w") if locID: - outfile.write('#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n') + outfile.write("#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n") else: - outfile.write('#labelID\tchrom\tstart\tstop\tlength_kb\n') + outfile.write("#labelID\tchrom\tstart\tstop\tlength_kb\n") index = 0 for chrom in outregionDict: for (start, stop, length) in outregionDict[chrom]: index += 1 if locID: - outfile.write("%s:%d-%d\t%s\t%d\t%d\t%.3f\n" % (chrom, start, stop, chrom, start, stop, length/1000.)) + label = "%s:%d-%d" % (chrom, start, stop) else: - outfile.write("%s%d\t%s\t%d\t%d\t%.3f\n" % (mergeID, index, chrom, start, stop, length/1000.)) + label = "%s%d" % (mergeID, index) + + outfile.write("%s\t%s\t%d\t%d\t%.3f\n" % (label, chrom, start, stop, length/1000.)) message = "%s was partitioned into %d regions" % (mergeID, index) print message