5 # Created by Ali Mortazavi on 10/14/08.
19 from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
26 print "distalPairs: version 3.4"
27 print "looks at all chromosomes simultaneously: is both slow and takes up large amount of RAM"
28 usage = "usage: python %prog minDist rdsfile outfile [--sameChrom] [--splices] [--maxDist bp] [--verbose] [--cache cachepages]"
30 parser = optparse.OptionParser(usage=usage)
31 parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly")
32 parser.add_option("--splices", action="store_true", dest="doSplices")
33 parser.add_option("--verbose", action="store_true", dest="doVerbose")
34 parser.add_option("--maxDist", type="int", dest="maxDist")
35 parser.add_option("--cache", type="int", dest="cachePages")
36 parser.set_defaults(sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None)
37 (options, args) = parser.parse_args(argv[1:])
43 minDist = int(args[0])
47 distalPairs(minDist, rdsfile, outfilename, options.sameChromOnly, options.doSplices, options.doVerbose, options.maxDist, options.cachePages)
50 def makeParser(usage=""):
51 parser = optparse.OptionParser(usage=usage)
52 parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly")
53 parser.add_option("--splices", action="store_true", dest="doSplices")
54 parser.add_option("--verbose", action="store_true", dest="doVerbose")
55 parser.add_option("--maxDist", type="int", dest="maxDist")
56 parser.add_option("--cache", type="int", dest="cachePages")
58 configParser = getConfigParser()
59 section = "distalPairs"
60 sameChromOnly = getConfigBoolOption(configParser, section, "sameChromOnly", False)
61 doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
62 doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False)
63 maxDist = getConfigIntOption(configParser, section, "maxDist", 1000000000)
64 cachePages = getConfigOption(configParser, section, "cachePages", None)
66 parser.set_defaults(sameChromOnly=sameChromOnly, doSplices=doSplices, doVerbose=doVerbose, maxDist=maxDist, cachePages=cachePages)
71 def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None):
72 if cachePages is not None:
78 RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache)
79 if not RDS.hasIndex():
80 print "Will not attempt to run on unIndexed dataset - please index with rdsmetadata.py and rerun"
83 if cachePages > RDS.getDefaultCacheSize():
84 RDS.setDBcache(cachePages)
89 print "getting splices"
90 splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True)
93 print "getting uniq reads"
94 uniqDict = RDS.getReadsDict(withChrom=True, withPairID=True, doUniqs=True, readIDDict=True)
98 for readID in splicesDict:
99 theRead = splicesDict[readID]
103 uniqDict[readID].append(read0)
105 if len(theRead) == 4:
108 uniqDict[readID] = [read0,read2]
111 print len(uniqDict), time.ctime()
113 outfile = open(outfilename,"w")
118 for readID in uniqDict:
119 readList = uniqDict[readID]
120 if len(readList) == 2:
122 start1 = readList[0]["start"]
123 sense1 = readList[0]["sense"]
124 chrom1 = readList[0]["chrom"]
125 start2 = readList[1]["start"]
126 sense2 = readList[1]["sense"]
127 chrom2 = readList[1]["chrom"]
134 outline = "%s\t%s\t%d\t%s\t%s\t%d\t%s" % (readID, chrom1, start1, sense1, chrom2, start2, sense2)
135 outfile.write(outline + "\n")
137 print diffChrom, outline
139 dist = abs(start1 - start2)
141 if minDist < dist < maxDist:
143 outline = "%s\t%s\t%d\t%s\t%d\t%s\t%d" % (readID, chrom1, start1, sense1, start2, sense2, dist)
144 outfile.write(outline + "\n")
146 print distal, outline
148 outfile.write("#distal: %d\tdiffChrom: %d\tpossible: %d\n" % (distal, diffChrom, total))
153 outfile.write("#distal %2.2f pct\tdiffChrom %2.2f pct\n" % ((100. * distal/total), (100. * diffChrom/total)))
155 print "distal: %d\tdiffChrom: %d\tpossible: %d" % (distal, diffChrom, int(total))
156 print "distal: %2.2f pct\tdiffChrom: %2.2f pct\n" % ((100. * distal/total), (100. * diffChrom/total))
160 if __name__ == "__main__":