X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getallsites.py;fp=getallsites.py;h=4d6707615d9d7ea95c5caf64a48c532e934c7e5f;hp=39335e481f3b30be49f46f4889bf2435cfa9b109;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getallsites.py b/getallsites.py index 39335e4..4d67076 100755 --- a/getallsites.py +++ b/getallsites.py @@ -8,9 +8,10 @@ except: from cistematic.core.motif import Motif, hasMotifExtension from cistematic.core import complement from cistematic.genomes import Genome -from commoncode import readDataset, getMergedRegions, findPeak +from commoncode import getMergedRegions, findPeak, getConfigParser, getConfigOption, getConfigBoolOption +import ReadDataset -print "%prog: version 2.4" +print "getallsites: version 2.5" def main(argv=None): @@ -19,21 +20,7 @@ def main(argv=None): usage = "usage: python %prog genome motifFile motThreshold regionfile siteOutfile [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--dataset", dest="chipfilename") - parser.add_option("--cache", action="store_true", dest="doCache") - parser.add_option("--best", action="store_true", dest="bestOnly", - help="only report the best position for each region") - parser.add_option("--usepeak", action="store_true", dest="usePeak", - help="use peak position and height from regions file") - parser.add_option("--printseq", action="store_true", dest="printSeq") - parser.add_option("--nomerge", action="store_true", dest="noMerge") - parser.add_option("--markov1", action="store_true", dest="doMarkov1") - parser.add_option("--rank", type="int", dest="useRank", - help="return region ranking based on peak height ranking [requires --usepeak]") - parser.set_defaults(chipfilename="", doCache=False, bestOnly=False, usePeak=False, - printSeq=False, doMarkov1=False, useRank=False, noMerge=False) - + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 5: @@ -51,6 +38,37 @@ def main(argv=None): options.useRank, options.noMerge) +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--dataset", dest="chipfilename") + parser.add_option("--cache", action="store_true", dest="doCache") + parser.add_option("--best", action="store_true", dest="bestOnly", + help="only report the best position for each region") + parser.add_option("--usepeak", action="store_true", dest="usePeak", + help="use peak position and height from regions file") + parser.add_option("--printseq", action="store_true", dest="printSeq") + parser.add_option("--nomerge", action="store_true", dest="noMerge") + parser.add_option("--markov1", action="store_true", dest="doMarkov1") + parser.add_option("--rank", type="int", dest="useRank", + help="return region ranking based on peak height ranking [requires --usepeak]") + + configParser = getConfigParser() + section = "getallsites" + chipfilename = getConfigOption(configParser, section, "chipfilename", "") + doCache = getConfigBoolOption(configParser, section, "doCache", False) + bestOnly = getConfigBoolOption(configParser, section, "bestOnly", False) + usePeak = getConfigBoolOption(configParser, section, "usePeak", False) + printSeq = getConfigBoolOption(configParser, section, "printSeq", False) + doMarkov1 = getConfigBoolOption(configParser, section, "doMarkov1", False) + useRank = getConfigBoolOption(configParser, section, "useRank", False) + noMerge = getConfigBoolOption(configParser, section, "noMerge", False) + + parser.set_defaults(chipfilename=chipfilename, doCache=doCache, bestOnly=bestOnly, usePeak=usePeak, + printSeq=printSeq, doMarkov1=doMarkov1, useRank=useRank, noMerge=noMerge) + + return parser + + def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chipfilename="", doCache=False, bestOnly=False, usePeak=False, printSeq=False, doMarkov1=False, useRank=False, noMerge=False): @@ -90,7 +108,7 @@ def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chip doRDS = True if doRDS: - hitRDS = readDataset(chipfilename, verbose = True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(chipfilename, verbose = True, cache=doCache) outfile = open(outfilename, "w") @@ -101,11 +119,11 @@ def getallsites(genome, motfilename, motThreshold, infilename, outfilename, chip continue if usePeak: - for (start, stop, length, peakPos, peakHeight) in regions[chrom]: - regionList.append((peakHeight, chrom, start, length, peakPos)) + for region in regions[chrom]: + regionList.append((region.peakHeight, chrom, region.start, region.length, region.peakPos)) else: - for (start, stop, length) in regions[chrom]: - regionList.append((chrom, start, length)) + for region in regions[chrom]: + regionList.append((chrom, region.start, region.length)) if usePeak: regionList.sort()