except:
pass
-from commoncode import readDataset, getMergedRegions, findPeak, getLocusByChromDict
+import sys
+import optparse
+from commoncode import getMergedRegions, findPeak, getLocusByChromDict
+import ReadDataset
from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
-import sys, optparse
+from commoncode import getGeneInfoDict, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
-print "%prog: version 2.0"
+
+print "geneLocusPeaks: version 2.1"
def main(argv=None):
if not argv:
usage = "usage: python %prog genome rdsfile outfilename [--up upstream] [--down downstream] [--regions acceptfile] [--raw]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--up", type="int", dest="upstream")
- parser.add_option("--down", type="int", dest="downstream")
- parser.add_option("--regions", dest="acceptfile")
- parser.add_option("--raw", action="store_false", dest="normalize")
- parser.add_option("--cache", action="store_true", dest="doCache")
- parser.set_defaults(upstream=0, downstream=0, acceptfile="", normalize=True, doCache=False)
+ parser = makeParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
geneLocusPeaks(genome, hitfile, outfilename, options.upstream, options.downstream, options.acceptfile, options.normalize, options.doCache)
+def makeParser(usage=""):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--up", type="int", dest="upstream")
+ parser.add_option("--down", type="int", dest="downstream")
+ parser.add_option("--regions", dest="acceptfile")
+ parser.add_option("--raw", action="store_false", dest="normalize")
+ parser.add_option("--cache", action="store_true", dest="doCache")
+
+ configParser = getConfigParser()
+ section = "geneLocusPeaks"
+ upstream = getConfigIntOption(configParser, section, "upstream", 0)
+ downstream = getConfigIntOption(configParser, section, "downstream", 0)
+ acceptfile = getConfigOption(configParser, section, "acceptfile", "")
+ normalize = getConfigBoolOption(configParser, section, "normalize", True)
+ doCache = getConfigBoolOption(configParser, section, "doCache", False)
+
+ parser.set_defaults(upstream=upstream, downstream=downstream, acceptfile=acceptfile, normalize=normalize, doCache=doCache)
+
+ return parser
+
+
def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, acceptfile="", normalize=True, doCache=False):
acceptDict = {}
print "upstream = %d downstream = %d" % (upstream, downstream)
- hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
readlen = hitRDS.getReadSize()
normalizationFactor = 1.0
if normalize:
hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
hg = Genome(genome)
- idb = geneinfoDB(cache=True)
-
gidCount = {}
gidPos = {}
- geneinfoDict = idb.getallGeneInfo(genome)
+ geneinfoDict = getGeneInfoDict(genome, cache=True)
locusByChromDict = getLocusByChromDict(hg, upstream, downstream, useCDS=True, additionalRegionsDict=acceptDict)
gidList = hg.allGIDs()
gidList.sort()
for chrom in acceptDict:
- for (label, start, stop, length) in acceptDict[chrom]:
- if label not in gidList:
- gidList.append(label)
+ for region in acceptDict[chrom]:
+ if region.label not in gidList:
+ gidList.append(region.label)
for gid in gidList:
gidCount[gid] = 0
print chrom
for (start, stop, gid, glen) in locusByChromDict[chrom]:
gidCount[gid] = 0.
- (topPos, numHits, smoothArray, numPlus) = findPeak(hitDict[chrom], start, glen, readlen)
- if len(topPos) > 0:
- gidCount[gid] = smoothArray[topPos[0]]
- gidPos[gid] = (chrom, start + topPos[0])
+ peak = findPeak(hitDict[chrom], start, glen, readlen)
+ if len(peak.topPos) > 0:
+ gidCount[gid] = peak.smoothArray[peak.topPos[0]]
+ gidPos[gid] = (chrom, start + peak.topPos[0])
else:
gidPos[gid] = (chrom, start)