5 # This script attempts to annotate the novel sncs/snps from the snp summary file
6 # Written by: Wendy Lee
7 # Written on: Aug 7th, 2008
11 from cistematic.genomes import Genome
12 from commoncode import writeLog
14 print "%prog: version 1.5"
27 usage = "usage: python %s genome snpsfile nondbsnp_geneinfo_outfile" % argv[0]
37 getNovelSNPsFromFile(genome, snpfile, outfilename)
40 def getNovelSNPsFromFile(genome, snpfile, outfilename):
41 infile = file(snpfile, "r")
42 writeNovelSNPFile(genome, infile, outfilename)
43 writeLog("snp.log", sys.argv[0], "outputfile: %s" % outfilename)
47 def writeNovelSNPFile(genome, snpPropertiesList, outfilename):
50 outfile = open(outfilename, "w")
51 outfile.write("#Sl\tCl\tchrom\tmis pos\t\tmatch\tuniq_mis\ttot_mis\tbase_chg\tknown_snp\tfunction\tgene\tgeneId\trpkm\n")
52 for line in snpPropertiesList:
53 if doNotProcessLine(line):
56 outString = getNovelSNPInfo(genome, line, hg)
58 outfile.write(outString)
60 outfile.write("%s\n" % outString)
65 def doNotProcessLine(line):
69 def getNovelSNPInfo(genome, snpEntry, hg):
70 fields = snpEntry.split()
71 #TODO: refactor naming. is fields[8] rpkm?
72 if fields[8].find("N\A") == -1:
77 snc_start = int(fields[3])
78 featuresList = hg.getGeneFeatures((genome, gid))
80 for (ftype, chromosome, start, stop, orientation) in featuresList:
81 if int(start) <= snc_start <= int(stop):
85 for i in range (0, 9):
86 snpInfo = string.join([snpInfo, fields[i]], "\t")
88 snpInfo = string.join([snpInfo, func], "\t")
89 for i in range (10, 13):
90 snpInfo = string.join([snpInfo, fields[i]], "\t")
95 if __name__ == "__main__":