5 """ usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]
6 where the regionfiles must be comma-separated with no white space
7 -minFeature controls the size of the smallest partition
19 from commoncode import getMergedRegions, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
21 versionString = "partition: version 2.1"
29 usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]"
31 parser = getParser(usage)
32 (options, args) = parser.parse_args(argv[1:])
43 print "padding %d bp on each side of a region" % options.padregion
45 if options.mergeregion:
46 print "merging regions closer than %d bp" % options.mergeregion
49 print "using locations as region ID"
51 if options.ignoreRandom:
52 print "ignoring 'random' chromosomes"
54 partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField,
55 options.padregion, options.locID, options.ignoreRandom, options.mergeregion,
56 options.merging, options.logfilename)
60 parser = optparse.OptionParser(usage=usage)
61 parser.add_option("--minFeature", type="int", dest="minFeature",
62 help="size of smallest partition")
63 parser.add_option("--chromField", type="int", dest="cField",
64 help="num chromosome fields")
65 parser.add_option("--padregion", type="int", dest="padregion",
66 help="padding on each side of region")
67 parser.add_option("--mergeregion", type="int", dest="mergeregion",
68 help="bp threshold to merge regions")
69 parser.add_option("--nomerge", action="store_false", dest="merging",
70 help="do not merge regions")
71 parser.add_option("--log", dest="logfilename",
73 parser.add_option("--locID", action="store_true", dest="locID",
74 help="use location as region ID")
75 parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
76 help="ignore 'random' chromosomes")
78 configParser = getConfigParser()
80 minFeature = getConfigIntOption(configParser, section, "minFeature", 25)
81 cField = getConfigIntOption(configParser, section, "cField", 1)
82 padregion = getConfigIntOption(configParser, section, "padregion", 1)
83 locID = getConfigBoolOption(configParser, section, "locID", False)
84 ignoreRandom = getConfigBoolOption(configParser, section, "ignoreRandom", False)
85 mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0)
86 merging = getConfigBoolOption(configParser, section, "merging", True)
87 logfilename = getConfigOption(configParser, section, "logfilename", "partition.log")
89 parser.set_defaults(minFeature=minFeature, cField=cField, padregion=padregion, locID=locID,
90 ignoreRandom=ignoreRandom, mergeregion=mergeregion, merging=merging,
91 logfilename=logfilename)
96 def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0,
97 locID=False, ignoreRandom=False, mergeregion=0, merging=True,
98 logfilename="partition.log"):
100 writeLog(logfilename, versionString, string.join(sys.argv[1:]))
103 regionFileList = regionfiles.split(',')
104 numRegions = len(regionFileList)
106 for regionID in range(numRegions):
107 allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion,
108 minHits=-1, fullChrom=True, verbose=True, chromField=cField,
109 doMerge=merging, pad=padregion)
111 for achrom in allregionsDict[regionID]:
112 if achrom not in chromList:
113 chromList.append(achrom)
117 chromList = sorted(chromList)
119 for chrom in chromList:
120 if ignoreRandom and "random" in chrom:
123 outregionDict[chrom] = []
125 for regionID in range(numRegions):
126 if chrom in allregionsDict[regionID]:
127 for region in allregionsDict[regionID][chrom]:
128 pointList.append(region.start)
129 pointList.append(region.stop)
133 for point in pointList:
134 if (point - start) > minFeature:
135 outregionDict[chrom].append((start, point - 1, point - 1 - start))
138 outfile = open(outfilename, "w")
140 outfile.write("#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n")
142 outfile.write("#labelID\tchrom\tstart\tstop\tlength_kb\n")
145 for chrom in outregionDict:
146 for (start, stop, length) in outregionDict[chrom]:
149 label = "%s:%d-%d" % (chrom, start, stop)
151 label = "%s%d" % (mergeID, index)
153 outfile.write("%s\t%s\t%d\t%d\t%.3f\n" % (label, chrom, start, stop, length/1000.))
155 message = "%s was partitioned into %d regions" % (mergeID, index)
157 writeLog(logfilename, versionString, message)
159 if __name__ == "__main__":