X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fgenomes%2Fxtropicalis.py;fp=cistematic%2Fgenomes%2Fxtropicalis.py;h=bda0dbef0cd472f9573a271124afac7bddad0023;hp=eb37500e68a5cfb6f54cf8d054ad47397d2c312b;hb=4522d28194e3d1c048bced84038760d394038285;hpb=4ad5495359e4322da39868020a7398676261679e diff --git a/cistematic/genomes/xtropicalis.py b/cistematic/genomes/xtropicalis.py index eb37500..bda0dbe 100644 --- a/cistematic/genomes/xtropicalis.py +++ b/cistematic/genomes/xtropicalis.py @@ -1,7 +1,7 @@ ########################################################################### # # # C O P Y R I G H T N O T I C E # -# Copyright (c) 2003-10 by: # +# Copyright (c) 2003-13 by: # # * California Institute of Technology # # # # All Rights Reserved. # @@ -33,51 +33,44 @@ from cistematic.genomes import Genome from os import environ if environ.get("CISTEMATIC_ROOT"): - cisRoot = environ.get("CISTEMATIC_ROOT") + cisRoot = environ.get("CISTEMATIC_ROOT") else: cisRoot = "/proj/genome" geneDB = "%s/X_tropicalis/xtropicalis.genedb" % cisRoot -def loadChromosome(db, chromPath, chromOutPath): - seqArray = [] - seqLen = 0 - xtGenome = Genome("xtropicalis", dbFile=db) - inFile = open(chromPath, "r") - header = inFile.readline() - while header != "": - seqArray = [] - seqLen = 0 - chromID = header.strip()[1:] - currentLine = inFile.readline() - while currentLine != "" and currentLine[0] != ">": - lineSeq = currentLine.strip() - seqLen += len(lineSeq) - seqArray.append(lineSeq) - currentLine = inFile.readline() +def buildXtropicalisDB(db=geneDB): + genePath = "%s/download/xt1/jgiFilteredModels.txt" % cisRoot + chromoPath = "%s/download/xt1/xenTro1.softmask2.fa" % cisRoot + chromoOutPath = "/X_tropicalis/" - seq = string.join(seqArray, "") - if seqLen < 500000: - print "Added contig %s to database" % chromID - xtGenome.addSequence(("xtropicalis", chromID), seq, "chromosome", str(seqLen)) - xtGenome.addChromosomeEntry(chromID, chromID, "db") - else: - outFileName = "%s%s.bin" % (chromOutPath, chromID) - outFile = open("%s%s" % (cisRoot, outFileName), "w") - outFile.write(seq) - outFile.close() - print "Added contig file %s to database" % outFileName - xtGenome.addChromosomeEntry(chromID, outFileName, "file") + print "Creating database %s" % db + createDBFile(db) - header = currentLine + print "Adding gene entries" + loadGeneEntries(db, genePath) - inFile.close() + print "Adding gene features" + loadGeneFeatures(db, genePath) + + print "Loading sequences" + loadChromosome(db, chromoPath, chromoOutPath) + + print "Creating Indices" + createDBindices(db) + + print "Finished creating database %s" % db + + +def createDBFile(db): + xtGenome = Genome("xtropicalis", dbFile=db) + xtGenome.createGeneDB(db) def loadGeneEntries(db, gFile): - """ FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES - """ + #TODO: - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES + geneEntries = [] xtGenome = Genome("xtropicalis", dbFile=db) geneFile = open(gFile, "r") @@ -233,34 +226,41 @@ def loadGeneOntology(db, goPath, goDefPath, annotPath): xtGenome.addGoInfoBatch(goArray) -def createDBFile(db): - xtGenome = Genome("xtropicalis", dbFile=db) - xtGenome.createGeneDB(db) - - -def createDBindices(db): +def loadChromosome(db, chromPath, chromOutPath): + seqArray = [] + seqLen = 0 xtGenome = Genome("xtropicalis", dbFile=db) - xtGenome.createIndices() - - -def buildXtropicalisDB(db=geneDB): - genePath = "%s/download/xt1/jgiFilteredModels.txt" % cisRoot - chromoPath = "%s/download/xt1/xenTro1.softmask2.fa" % cisRoot - chromoOutPath = "/X_tropicalis/" - - print "Creating database %s" % db - createDBFile(db) + inFile = open(chromPath, "r") + header = inFile.readline() + while header != "": + seqArray = [] + seqLen = 0 + chromID = header.strip()[1:] + currentLine = inFile.readline() + while currentLine != "" and currentLine[0] != ">": + lineSeq = currentLine.strip() + seqLen += len(lineSeq) + seqArray.append(lineSeq) + currentLine = inFile.readline() - print "Adding gene entries" - loadGeneEntries(db, genePath) + seq = string.join(seqArray, "") + if seqLen < 500000: + print "Added contig %s to database" % chromID + xtGenome.addSequence(("xtropicalis", chromID), seq, "chromosome", str(seqLen)) + xtGenome.addChromosomeEntry(chromID, chromID, "db") + else: + outFileName = "%s%s.bin" % (chromOutPath, chromID) + outFile = open("%s%s" % (cisRoot, outFileName), "w") + outFile.write(seq) + outFile.close() + print "Added contig file %s to database" % outFileName + xtGenome.addChromosomeEntry(chromID, outFileName, "file") - print "Adding gene features" - loadGeneFeatures(db, genePath) + header = currentLine - print "Loading sequences" - loadChromosome(db, chromoPath, chromoOutPath) + inFile.close() - print "Creating Indices" - createDBindices(db) - print "Finished creating database %s" % db \ No newline at end of file +def createDBindices(db): + xtGenome = Genome("xtropicalis", dbFile=db) + xtGenome.createIndices()