5 """ usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [--minFeature bp] [--padregion bp] [--mergeregion bp] [--nomerge] [--log altlogfile] [--locid] [--ignorerandom] [--chromField fieldNum]
6 where the regionfiles must be comma-separated with no white space
7 -minFeature controls the size of the smallest partition
16 import sys, string, optparse
17 from commoncode import getMergedRegions, writeLog
19 versionString = '%s: version 2.0' % sys.argv[0]
27 usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]"
29 parser = optparse.OptionParser(usage=usage)
30 parser.add_option("--minFeature", type="int", dest="minFeature",
31 help="size of smallest partition")
32 parser.add_option("--chromField", type="int", dest="cField",
33 help="num chromosome fields")
34 parser.add_option("--padregion", type="int", dest="padregion",
35 help="padding on each side of region")
36 parser.add_option("--mergeregion", type="int", dest="mergeregion",
37 help="bp threshold to merge regions")
38 parser.add_option("--nomerge", action="store_false", dest="merging",
39 help="do not merge regions")
40 parser.add_option("--log", dest="logfilename",
42 parser.add_option("--locID", action="store_true", dest="locID",
43 help="use location as region ID")
44 parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
45 help="ignore 'random' chromosomes")
46 parser.set_defaults(minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log")
47 (options, args) = parser.parse_args(argv[1:])
58 print "padding %d bp on each side of a region" % options.padregion
60 if options.mergeregion:
61 print "merging regions closer than %d bp" % options.mergeregion
64 print "using locations as region ID"
66 if options.ignoreRandom:
67 print "ignoring 'random' chromosomes"
69 partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField, options.padregion, options.locID, options.ignoreRandom, options.mergeregion, options.merging, options.logfilename)
72 def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0, locID=False, ignoreRandom=False, mergeregion=0, merging=True, logfilename="partition.log"):
74 writeLog(logfilename, versionString, string.join(sys.argv[1:]))
77 regionFileList = regionfiles.split(',')
78 numRegions = len(regionFileList)
80 for regionID in range(numRegions):
81 allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion, minHits=-1, fullChrom = True, verbose = True, chromField = cField, doMerge=merging, pad=padregion)
82 for achrom in allregionsDict[regionID]:
83 if achrom not in chromList:
84 chromList.append(achrom)
88 chromList = sorted(chromList)
90 for chrom in chromList:
91 if ignoreRandom and 'random' in chrom:
94 outregionDict[chrom] = []
96 for regionID in range(numRegions):
97 if chrom in allregionsDict[regionID]:
98 for (rstart, rstop, rlength) in allregionsDict[regionID][chrom]:
99 pointList.append(rstart)
100 pointList.append(rstop)
104 for point in pointList:
105 if (point - start) > minFeature:
106 outregionDict[chrom].append((start, point - 1, point - 1 - start))
109 outfile = open(outfilename, 'w')
111 outfile.write('#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n')
113 outfile.write('#labelID\tchrom\tstart\tstop\tlength_kb\n')
116 for chrom in outregionDict:
117 for (start, stop, length) in outregionDict[chrom]:
120 outfile.write("%s:%d-%d\t%s\t%d\t%d\t%.3f\n" % (chrom, start, stop, chrom, start, stop, length/1000.))
122 outfile.write("%s%d\t%s\t%d\t%d\t%.3f\n" % (mergeID, index, chrom, start, stop, length/1000.))
124 message = "%s was partitioned into %d regions" % (mergeID, index)
126 writeLog(logfilename, versionString, message)
128 if __name__ == "__main__":