12 from commoncode import readDataset, getMergedRegions, findPeak
14 print "%prog: version 3.0"
20 usage = "usage: python %prog rdsfile1 regionfile1 rdsfile2 regionfile2 outfile [--reject1 File1] [--reject2 File2] [--union] [--cache] [--raw]"
22 parser = optparse.OptionParser(usage=usage)
23 parser.add_option("--reject1", dest="rejectOneName")
24 parser.add_option("--reject2", dest="rejectTwoName")
25 parser.add_option("--union", action="store_true", dest="trackReject")
26 parser.add_option("--cache", action="store_true", dest="doCache")
27 parser.add_option("--raw", action="store_false", dest="normalize")
28 parser.add_option("--verbose", action="store_true", dest="doVerbose")
29 parser.set_defaults(rejectOneName=None, rejectTwoName=None, trackReject=False,
30 doCache=False, normalize=True, doVerbose=False)
32 (options, args) = parser.parse_args(argv[1:])
39 regionOneName = args[1]
41 regionTwoName = args[3]
44 regionintersects(readOneName, regionOneName, readTwoName, regionTwoName,
45 outfilename, options.rejectOneName, options.rejectTwoName,
46 options.trackReject, options.doCache, options.normalize,
50 def regionintersects(readOneName, regionOneName, readTwoName, regionTwoName,
51 outfilename, rejectOneName=None, rejectTwoName=None,
52 trackReject=False, doCache=False, normalize=True, doVerbose=False):
56 outfile = open(outfilename, "w")
59 if rejectOneName is not None:
62 rejectOne = open(rejectOneName, "w")
64 if rejectTwoName is not None:
67 rejectTwo = open(rejectTwoName, "w")
69 oneDict = getMergedRegions(regionOneName, mergedist, verbose=doVerbose)
70 twoDict = getMergedRegions(regionTwoName, mergedist, verbose=doVerbose)
72 oneRDS = readDataset(readOneName, verbose=doVerbose, cache=doCache)
73 twoRDS = readDataset(readTwoName, verbose=doVerbose, cache=doCache)
76 normalize1 = len(oneRDS) / 1000000.
77 normalize2 = len(twoRDS) / 1000000.
91 for rchrom in oneDict:
92 numRegionsOne += len(oneDict[rchrom])
94 for rchrom in twoDict:
95 numRegionsTwo += len(twoDict[rchrom])
97 outfile.write("#%d\tregions in\t%s\n#%d\tregions in\t%s\n" % (numRegionsOne, regionOneName, numRegionsTwo, regionTwoName))
99 for rchrom in oneDict:
100 if rchrom not in twoDict:
106 fullchrom = "chr" + rchrom
107 oneReads = oneRDS.getReadsDict(fullChrom=True, chrom=fullchrom, withWeight=True, doMulti=True)
108 dictLen1 = len(oneReads[fullchrom])
109 twoReads = twoRDS.getReadsDict(fullChrom=True, chrom=fullchrom, withWeight=True, doMulti=True)
110 dictLen2 = len(twoReads[fullchrom])
112 onePeaksDict[chrom] = []
113 oneFoundDict[chrom] = []
114 for (start, stop, length) in oneDict[chrom]:
116 for localIndex in xrange(rindex, dictLen1):
117 read = oneReads[fullchrom][localIndex]
120 elif start <= read[0] <= stop:
121 readList.append(read)
125 if len(readList) < 1:
130 (topPos, numHits, smoothArray, numPlus) = findPeak(readList, start, length, doWeight=True)
131 onePeakScore = smoothArray[topPos[0]]
132 onePeaksDict[chrom].append((topPos[0] + start, length/2, start, stop, numHits/normalize1, onePeakScore/normalize1))
134 for (start, stop, length) in twoDict[chrom]:
136 for localIndex in xrange(rindex2, dictLen2):
137 read = twoReads[fullchrom][localIndex]
140 elif start <= read[0] <= stop:
141 readList2.append(read)
145 if len(readList2) < 1:
149 (topPos, numHits, smoothArray, numPlus) = findPeak(readList2, start, length, doWeight=True)
150 numHits /= normalize2
152 twoPeak = topPos[0] + start
154 twoPeakScore = smoothArray[topPos[0]] / normalize2
155 for (onePeak, oneRadius, ostart, ostop, ohits, opeakScore) in onePeaksDict[chrom]:
156 if abs(twoPeak - onePeak) < (twoRadius + oneRadius):
157 if (onePeak, oneRadius, ostart, ostop, ohits) not in oneFoundDict:
158 oneFoundDict[chrom].append((onePeak, oneRadius, ostart, ostop, ohits))
162 outline = "common%d\tchr%s\t%d\t%d\t%.1f\t%.1f\tchr%s\t%d\t%d\t%.1f\t%.1f" % (commonRegions, chrom, ostart, ostop, ohits, opeakScore, chrom, start, stop, numHits, twoPeakScore)
166 outfile.write(outline + "\n")
168 if trackReject and not twoIsCommon:
170 outline = "rejectTwo%d\tchr%s\t%d\t%d\t%.1f\t%.1f" % (twoRejectIndex, chrom, start, stop, numHits, twoPeakScore)
172 rejectTwo.write(outline + "\n")
174 outfile.write(outline + "\n")
180 for (onePeak, oneRadius, ostart, ostop, ohits, opeakScore) in onePeaksDict[chrom]:
181 if (onePeak, oneRadius, ostart, ostop, ohits) not in oneFoundDict[chrom]:
183 outline = "rejectOne%d\tchr%s\t%d\t%d\t%.1f\t%.1f" % (oneRejectIndex, chrom, ostart, ostop, ohits, opeakScore)
185 rejectOne.write(outline + "\n")
187 outfile.write(outline + "\n")
193 print "common: %d one-only: %d two-only: %d" % (commonRegions, oneRejectIndex, twoRejectIndex)
194 outfile.write("#common: %d\tone-only: %d\ttwo-only: %d\n" % (commonRegions, oneRejectIndex, twoRejectIndex))
196 print "common: %d" % commonRegions
197 outfile.write("#common: %d\n" % commonRegions)
202 if __name__ == "__main__":