3 import chksnp, getSNPs, getSNPGeneInfo, analyzego, getNovelSNPs, makeSNPtrack, rnaAToIFilter
4 from commoncode import countDuplicatesInList, getConfigParser, getConfigOption
11 usage = "usage: python %prog dbfile snpsfile genome rpkmfile [options]"
13 parser = getParser(usage)
14 (options, args) = parser.parse_args(argv[1:])
23 rpkmfilename = args[3]
25 rnaEditing(dbfile, hitfile, genome, rpkmfilename, options)
29 parser = optparse.OptionParser(usage=usage)
30 parser.add_option("--goprefix", dest="prefix")
31 parser.add_option("--novelsnp", dest="novelsnpoutfilename")
32 parser.add_option("--bedfile", dest="bedoutfilename")
33 parser.add_option("--cache", type="int", dest="cachePages")
34 parser.add_option("--snpDB", action="append", dest="snpDBList",
35 help="additional snp db files to check will be searched in order given")
37 configParser = getConfigParser()
38 section = "rnaEditing"
39 prefix = getConfigOption(configParser, section, "prefix", None)
40 novelsnpoutfilename = getConfigOption(configParser, section, "novelsnpoutfilename", None)
41 bedoutfilename = getConfigOption(configParser, section, "bedoutfilename", None)
42 cachePages = getConfigOption(configParser, section, "cachePages", None)
44 parser.set_defaults(prefix=prefix, novelsnpoutfilename=novelsnpoutfilename, bedoutfilename=bedoutfilename,
45 cachePages=cachePages, snpDBList=[])
50 def rnaEditing(dbfile, hitfile, genome, rpkmfilename, options):
51 if options.cachePages is not None:
57 snpList = getSNPs.getSNPs(hitfile, 3, 0.25, doCache, options.cachePages, forceChr=True)
59 # check for existing SNPs
61 for dbFileName in options.snpDBList:
62 dbList.append(dbFileName)
64 snpPropertiesList = chksnp.chkSNP(dbList, snpList, options.cachePages)
66 # get the neighboring genes
67 geneInfoList = getSNPGeneInfo.getSNPGeneInfo(genome, snpPropertiesList, rpkmfilename, doCache, flankBP=10000)
69 # filter out for the A-to-I events in the same direction as the genes
70 filteredSNPs = rnaAToIFilter.rnaAToIFilter(geneInfoList)
72 # count the number of different bases that have been called for each gene
73 # pick a set of genes with a high number of sites (here 5)
74 geneList = getGenesWithMultipleSNPs(filteredSNPs, minCount=5)
76 if options.prefix is not None:
77 analyzego.analyzeGO(genome, geneList, options.prefix, translateGene=True, fieldID=1)
79 if options.novelsnpoutfilename is not None:
80 getNovelSNPs.writeNovelSNPFile(genome, filteredSNPs, options.novelsnpoutfilename)
82 if options.bedoutfilename is not None:
83 makeSNPtrack.writeSNPsBedfile(filteredSNPs, "rnaEdit_sample", options.bedoutfilename)
86 def getGenesWithMultipleSNPs(snpList, minCount=1):
88 for snpEntry in snpList:
89 geneList.append(snpEntry[11])
91 duplicateCountList = countDuplicatesInList(geneList)
94 for (gene, count) in duplicateCountList:
101 if __name__ == "__main__":