X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=distalPairs.py;fp=distalPairs.py;h=be403109110c3fb40f7bba831f6c2ea48e496536;hp=5bc25321b25b8a30e9b09511cabbee4355010b71;hb=77dccd7c98d8cdb60caaf178b1123df71ea662c9;hpb=bc30aca13e5ec397c92e67002fbf7a103130b828 diff --git a/distalPairs.py b/distalPairs.py index 5bc2532..be40310 100755 --- a/distalPairs.py +++ b/distalPairs.py @@ -85,33 +85,17 @@ def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=Fa print time.ctime() - if doSplices: - print "getting splices" - splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True) - print "got splices" - print "getting uniq reads" uniqDict = RDS.getReadsDict(withChrom=True, withPairID=True, doUniqs=True, readIDDict=True) print "got uniqs" if doSplices: - for readID in splicesDict: - theRead = splicesDict[readID] - read0 = theRead[0] - del read0[1] - try: - uniqDict[readID].append(read0) - except: - if len(theRead) == 4: - read2 = theRead[2] - del read2[1] - uniqDict[readID] = [read0,read2] + addSplicesToUniqReads(RDS, uniqDict) if doVerbose: print len(uniqDict), time.ctime() outfile = open(outfilename,"w") - diffChrom = 0 distal = 0 total = 0 @@ -132,16 +116,15 @@ def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=Fa continue else: outline = "%s\t%s\t%d\t%s\t%s\t%d\t%s" % (readID, chrom1, start1, sense1, chrom2, start2, sense2) - outfile.write(outline + "\n") + print >> outfile, outline if doVerbose: print diffChrom, outline else: dist = abs(start1 - start2) - if minDist < dist < maxDist: distal += 1 outline = "%s\t%s\t%d\t%s\t%d\t%s\t%d" % (readID, chrom1, start1, sense1, start2, sense2, dist) - outfile.write(outline + "\n") + print >> outfile, outline if doVerbose: print distal, outline @@ -157,5 +140,22 @@ def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=Fa print time.ctime() +def addSplicesToUniqReads(RDS, uniqDict): + print "getting splices" + splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True) + print "got splices" + for readID in splicesDict: + theRead = splicesDict[readID] + read0 = theRead[0] + del read0[1] + try: + uniqDict[readID].append(read0) + except: + if len(theRead) == 4: + read2 = theRead[2] + del read2[1] + uniqDict[readID] = [read0,read2] + + if __name__ == "__main__": main(sys.argv) \ No newline at end of file