2 Based on shell script provided by Ali.
7 from Erange import chksnp, getSNPs, getSNPGeneInfo, analyzego, getNovelSNPs, makeSNPtrack, rnaAToIFilter
8 from Erange.commoncode import countDuplicatesInList
15 usage = "usage: python %prog dbfile snpsfile genome rpkmfile [options]"
17 parser = optparse.OptionParser(usage=usage)
18 parser.add_option("--goprefix", dest="prefix")
19 parser.add_option("--novelsnp", dest="novelsnpoutfilename")
20 parser.add_option("--bedfile", dest="bedoutfilename")
21 parser.add_option("--cache", type="int", dest="cachePages")
22 parser.add_option("--snpDB", action="append", dest="snpDBList",
23 help="additional snp db files to check will be searched in order given")
24 parser.set_defaults(prefix=None, novelsnpoutfilename=None, bedoutfilename=None, cachePages=None, snpDBList=[])
25 (options, args) = parser.parse_args(argv[1:])
34 rpkmfilename = args[3]
36 if options.cachePages is not None:
42 snpList = getSNPs.getSNPs(hitfile, 3, 0.25, doCache, options.cachePages, forceChr=True)
44 # check for existing SNPs
46 for dbFileName in options.snpDBList:
47 dbList.append(dbFileName)
49 snpPropertiesList = chksnp.chkSNP(dbList, snpList, options.cachePages)
51 # get the neighboring genes
52 geneInfoList = getSNPGeneInfo.getSNPGeneInfo(genome, snpPropertiesList, rpkmfilename, doCache, flankBP=10000)
54 # filter out for the A-to-I events in the same direction as the genes
55 filteredSNPs = rnaAToIFilter.rnaAToIFilter(geneInfoList)
57 # count the number of different bases that have been called for each gene
58 # pick a set of genes with a high number of sites (here 5)
59 geneList = getGenesWithMultipleSNPs(filteredSNPs, minCount=5)
61 if options.prefix is not None:
62 analyzego.analyzeGO(genome, geneList, options.prefix, translateGene=True, fieldID=1)
64 if options.novelsnpoutfilename is not None:
65 getNovelSNPs.writeNovelSNPFile(genome, filteredSNPs, options.novelsnpoutfilename)
67 if options.bedoutfilename is not None:
68 makeSNPtrack.writeSNPsBedfile(filteredSNPs, "rnaEdit_sample", options.bedoutfilename)
71 def getGenesWithMultipleSNPs(snpList, minCount=1):
73 for snpEntry in snpList:
74 geneList.append(snpEntry[11])
76 duplicateCountList = countDuplicatesInList(geneList)
79 for (gene, count) in duplicateCountList:
86 if __name__ == "__main__":