X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getallNRSE.py;fp=getallNRSE.py;h=c02bab11ccfcc53c059d4796b1029d796f55bae8;hp=c2e639f5c09b1ad4660256f49fe3d4aab7cc9bab;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getallNRSE.py b/getallNRSE.py index c2e639f..c02bab1 100755 --- a/getallNRSE.py +++ b/getallNRSE.py @@ -9,11 +9,12 @@ except: from cistematic.core import complement from cistematic.core.motif import Motif from cistematic.genomes import Genome -from commoncode import readDataset, getMergedRegions, findPeak +from commoncode import getMergedRegions, findPeak, getConfigParser, getConfigOption, getConfigBoolOption, getConfigFloatOption +import ReadDataset from pylab import * import matplotlib -print '%s: version 3.4' % sys.argv[0] +print 'getallNRSE: version 3.5' def main(argv=None): if not argv: @@ -21,21 +22,7 @@ def main(argv=None): usage = "usage: python %s genome regionfile siteOutfile [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--dataset", dest="chipfilename") - parser.add_option("--min", type="float", dest="minHeight") - parser.add_option("--minfraction", type="float", dest="minFraction") - parser.add_option("--plot", dest="plotname") - parser.add_option("--cache", action="store_true", dest="doCache") - parser.add_option("--raw", action="store_false", dest="normalize") - parser.add_option("--verbose", action="store_true", dest="doVerbose") - parser.add_option("--markov1", action="store_true", dest="doMarkov1") - parser.add_option("--peakdist", type="int", dest="maxpeakdist") - parser.add_option("--fullOnly", action="store_true", dest="fullOnly") - parser.add_option("--motifdir", dest="motifDir") - parser.set_defaults(chipfilename="", minHeight=-2., minFraction=-2., plotname="", - doCache=False, normalize=True, doVerbose=False, doMarkov1=False, - maxpeakdist=None, fullOnly=False, motifDir="./") + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -53,6 +40,42 @@ def main(argv=None): options.motifDir) +def getParser(usage): + + parser = optparse.OptionParser(usage=usage) + parser.add_option("--dataset", dest="chipfilename") + parser.add_option("--min", type="float", dest="minHeight") + parser.add_option("--minfraction", type="float", dest="minFraction") + parser.add_option("--plot", dest="plotname") + parser.add_option("--cache", action="store_true", dest="doCache") + parser.add_option("--raw", action="store_false", dest="normalize") + parser.add_option("--verbose", action="store_true", dest="doVerbose") + parser.add_option("--markov1", action="store_true", dest="doMarkov1") + parser.add_option("--peakdist", type="int", dest="maxpeakdist") + parser.add_option("--fullOnly", action="store_true", dest="fullOnly") + parser.add_option("--motifdir", dest="motifDir") + + configParser = getConfigParser() + section = "getallNRSE" + chipfilename = getConfigOption(configParser, section, "chipfilename", "") + minHeight = getConfigFloatOption(configParser, section, "minHeight", -2.) + minFraction = getConfigFloatOption(configParser, section, "minFraction", -2.) + plotname = getConfigOption(configParser, section, "plotname", "") + doCache = getConfigBoolOption(configParser, section, "doCache", False) + normalize = getConfigBoolOption(configParser, section, "normalize", True) + doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False) + doMarkov1 = getConfigBoolOption(configParser, section, "doMarkov1", False) + maxpeakdist = getConfigOption(configParser, section, "maxpeakdist", None) + fullOnly = getConfigBoolOption(configParser, section, "fullOnly", False) + motifDir = getConfigOption(configParser, section, "motifDir", "./") + + parser.set_defaults(chipfilename=chipfilename, minHeight=minHeight, minFraction=minFraction, plotname=plotname, + doCache=doCache, normalize=normalize, doVerbose=doVerbose, doMarkov1=doMarkov1, + maxpeakdist=maxpeakdist, fullOnly=fullOnly, motifDir=motifDir) + + return parser + + def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2., minFraction=-2., plotname="", doCache=False, normalize=True, doVerbose=False, doMarkov1=False, maxpeakdist=None, fullOnly=False, @@ -69,7 +92,7 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2., doDataset = False normalizeBy = 1 if chipfilename: - hitRDS = readDataset(chipfilename, verbose=doVerbose, cache=doCache) + hitRDS = ReadDataset.ReadDataset(chipfilename, verbose=doVerbose, cache=doCache) doDataset = True if normalize: normalizeBy = len(hitRDS) / 1000000. @@ -107,8 +130,8 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2., if "rand" in rchrom or "M" in rchrom or "hap" in rchrom: continue - for (start, stop, length) in regions[rchrom]: - regionList.append((rchrom, start, length)) + for region in regions[rchrom]: + regionList.append((rchrom, region.start, region.length)) notFoundIndex = 0 currentChrom = "" @@ -120,12 +143,14 @@ def getallNRSE(genome, infilename, outfilename, chipfilename="", minHeight=-2., hitDict = hitRDS.getReadsDict(chrom=fullchrom, withWeight=True, doMulti=True) currentChrom = rchrom - (topPos, numHits, smoothArray, numPlus) = findPeak(hitDict[rchrom], start, length, doWeight=True) + peak = findPeak(hitDict[rchrom], start, length, doWeight=True) + topPos = peak.topPos + numHits = peak.numHits if len(topPos) == 0: print "topPos error" peakpos = topPos[0] - peakscore = smoothArray[peakpos] + peakscore = peak.smoothArray[peakpos] if peakscore == 0.: peakscore = -1.