13 from commoncode import getMergedRegions, findPeak, getConfigParser, getConfigOption, getConfigBoolOption
16 print "regionintersects: version 3.1"
22 usage = "usage: python %prog rdsfile1 regionfile1 rdsfile2 regionfile2 outfile [--reject1 File1] [--reject2 File2] [--union] [--cache] [--raw]"
24 parser = getParser(usage)
25 (options, args) = parser.parse_args(argv[1:])
32 regionOneName = args[1]
34 regionTwoName = args[3]
37 regionintersects(readOneName, regionOneName, readTwoName, regionTwoName,
38 outfilename, options.rejectOneName, options.rejectTwoName,
39 options.trackReject, options.doCache, options.normalize,
44 parser = optparse.OptionParser(usage=usage)
45 parser.add_option("--reject1", dest="rejectOneName")
46 parser.add_option("--reject2", dest="rejectTwoName")
47 parser.add_option("--union", action="store_true", dest="trackReject")
48 parser.add_option("--cache", action="store_true", dest="doCache")
49 parser.add_option("--raw", action="store_false", dest="normalize")
50 parser.add_option("--verbose", action="store_true", dest="doVerbose")
52 configParser = getConfigParser()
53 section = "regionintersects"
54 rejectOneName = getConfigOption(configParser, section, "rejectOneName", None)
55 rejectTwoName = getConfigOption(configParser, section, "rejectTwoName", None)
56 trackReject = getConfigBoolOption(configParser, section, "trackReject", False)
57 doCache = getConfigBoolOption(configParser, section, "doCache", False)
58 normalize = getConfigBoolOption(configParser, section, "normalize", True)
59 doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False)
61 parser.set_defaults(rejectOneName=rejectOneName, rejectTwoName=rejectTwoName,
62 trackReject=trackReject, doCache=doCache, normalize=normalize,
68 def regionintersects(readOneName, regionOneName, readTwoName, regionTwoName,
69 outfilename, rejectOneName=None, rejectTwoName=None,
70 trackReject=False, doCache=False, normalize=True, doVerbose=False):
74 outfile = open(outfilename, "w")
77 if rejectOneName is not None:
80 rejectOne = open(rejectOneName, "w")
82 if rejectTwoName is not None:
85 rejectTwo = open(rejectTwoName, "w")
87 oneDict = getMergedRegions(regionOneName, mergedist, verbose=doVerbose)
88 twoDict = getMergedRegions(regionTwoName, mergedist, verbose=doVerbose)
90 oneRDS = ReadDataset.ReadDataset(readOneName, verbose=doVerbose, cache=doCache)
91 twoRDS = ReadDataset.ReadDataset(readTwoName, verbose=doVerbose, cache=doCache)
94 normalize1 = len(oneRDS) / 1000000.
95 normalize2 = len(twoRDS) / 1000000.
109 commonChromosomeList = set(oneDict.keys())
110 for rchrom in oneDict:
111 numRegionsOne += len(oneDict[rchrom])
113 for rchrom in twoDict:
114 commonChromosomeList.add(rchrom)
115 numRegionsTwo += len(twoDict[rchrom])
117 outfile.write("#%d\tregions in\t%s\n#%d\tregions in\t%s\n" % (numRegionsOne, regionOneName, numRegionsTwo, regionTwoName))
119 for chromosome in commonChromosomeList:
123 fullchrom = "chr%s" % chromosome
124 oneReads = oneRDS.getReadsDict(fullChrom=True, chrom=fullchrom, withWeight=True, doMulti=True)
125 dictLen1 = len(oneReads[fullchrom])
126 twoReads = twoRDS.getReadsDict(fullChrom=True, chrom=fullchrom, withWeight=True, doMulti=True)
127 dictLen2 = len(twoReads[fullchrom])
128 onePeaksDict[chromosome] = []
129 oneFoundDict[chromosome] = []
130 for region in oneDict[chromosome]:
133 length = region.length
135 for localIndex in xrange(rindex, dictLen1):
136 read = oneReads[fullchrom][localIndex]
137 if read["start"] < start:
139 elif start <= read["start"] <= stop:
140 readList.append(read)
144 if len(readList) < 1:
149 peak = findPeak(readList, start, length, doWeight=True)
150 onePeakScore = peak.smoothArray[peak.topPos[0]]
151 onePeaksDict[chromosome].append((peak.topPos[0] + start, length/2, start, stop, peak.numHits/normalize1, onePeakScore/normalize1))
153 for region in twoDict[chromosome]:
156 length = region.length
158 for localIndex in xrange(rindex2, dictLen2):
159 read = twoReads[fullchrom][localIndex]
160 if read["start"] < start:
162 elif start <= read["start"] <= stop:
163 readList2.append(read)
167 if len(readList2) < 1:
171 peak2 = findPeak(readList2, start, length, doWeight=True)
172 numHits = peak2.numHits
173 numHits /= normalize2
175 twoPeak = peak2.topPos[0] + start
177 twoPeakScore = peak2.smoothArray[peak2.topPos[0]] / normalize2
178 for (onePeak, oneRadius, ostart, ostop, ohits, opeakScore) in onePeaksDict[chromosome]:
179 if abs(twoPeak - onePeak) < (twoRadius + oneRadius):
180 if (onePeak, oneRadius, ostart, ostop, ohits) not in oneFoundDict:
181 oneFoundDict[chromosome].append((onePeak, oneRadius, ostart, ostop, ohits))
185 outline = "common%d\tchr%s\t%d\t%d\t%.1f\t%.1f\tchr%s\t%d\t%d\t%.1f\t%.1f" % (commonRegions, chromosome, ostart, ostop, ohits, opeakScore, chromosome, start, stop, numHits, twoPeakScore)
189 print >> outfile, outline
191 if trackReject and not twoIsCommon:
193 outline = "rejectTwo%d\tchr%s\t%d\t%d\t%.1f\t%.1f" % (twoRejectIndex, chromosome, start, stop, numHits, twoPeakScore)
195 print >> rejectTwo, outline
197 print >> outfile, outline
203 for (onePeak, oneRadius, ostart, ostop, ohits, opeakScore) in onePeaksDict[chromosome]:
204 if (onePeak, oneRadius, ostart, ostop, ohits) not in oneFoundDict[chromosome]:
206 outline = "rejectOne%d\tchr%s\t%d\t%d\t%.1f\t%.1f" % (oneRejectIndex, chromosome, ostart, ostop, ohits, opeakScore)
208 print >> rejectOne, outline
210 print >> outfile, outline
216 print "common: %d one-only: %d two-only: %d" % (commonRegions, oneRejectIndex, twoRejectIndex)
217 outfile.write("#common: %d\tone-only: %d\ttwo-only: %d\n" % (commonRegions, oneRejectIndex, twoRejectIndex))
219 print "common: %d" % commonRegions
220 outfile.write("#common: %d\n" % commonRegions)
225 if __name__ == "__main__":