X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fgenomes%2Fcbriggsae.py;h=00468edd08ada512ed641255bc130fb0f4875b8d;hp=b913728a739bd5a43fb8c5a7447f272dfb65678c;hb=HEAD;hpb=4ad5495359e4322da39868020a7398676261679e diff --git a/cistematic/genomes/cbriggsae.py b/cistematic/genomes/cbriggsae.py index b913728..00468ed 100644 --- a/cistematic/genomes/cbriggsae.py +++ b/cistematic/genomes/cbriggsae.py @@ -1,7 +1,7 @@ ########################################################################### # # # C O P Y R I G H T N O T I C E # -# Copyright (c) 2003-10 by: # +# Copyright (c) 2003-13 by: # # * California Institute of Technology # # # # All Rights Reserved. # @@ -33,46 +33,39 @@ from cistematic.genomes import Genome from os import environ if environ.get("CISTEMATIC_ROOT"): - cisRoot = environ.get("CISTEMATIC_ROOT") + cisRoot = environ.get("CISTEMATIC_ROOT") else: cisRoot = "/proj/genome" geneDB = "%s/C_briggsae/cbriggsae.genedb" % cisRoot -def loadChromosome(db, chromPath, chromOutPath): - seqArray = [] - seqLen = 0 - cbGenome = Genome("cbriggsae", dbFile=db) - inFile = open(chromPath, "r") - header = inFile.readline() - while header != "": - seqArray = [] - seqLen = 0 - chromID = header.strip()[1:] - currentLine = inFile.readline() - while currentLine != "" and currentLine[0] != ">": - lineSeq = currentLine.strip() - seqLen += len(lineSeq) - seqArray.append(lineSeq) - currentLine = inFile.readline() +def buildCbriggsaeDB(db=geneDB): + gffPath = "%s/download/briggsae_25.WS132.gff" % cisRoot + chromoPath = "%s/download/briggsae_25.fa" % cisRoot + chromoOutPath = "/C_briggsae/" - seq = string.join(seqArray, "") - if seqLen < 900000: - print "Added contig %s to database" % chromID - cbGenome.addSequence(("cbriggsae", chromID), seq, "chromosome", str(seqLen)) - cbGenome.addChromosomeEntry(chromID, chromID, "db") - else: - outFileName = "%s%s.bin" % (chromOutPath, chromID) - outFile = open("%s%s" % (cisRoot, outFileName), "w") - outFile.write(seq) - outFile.close() - print "Added contig file %s to database" % outFileName - cbGenome.addChromosomeEntry(chromID, outFileName, "file") + print "Creating database %s" % db + createDBFile(db) - header = currentLine + print "Adding gene entries" + loadGeneEntries(db, gffPath) - inFile.close() + print "Adding feature entries" + loadFeatureEntries(db, gffPath) + + print "Loading genomic sequence" + loadChromosome(db, chromoPath, chromoOutPath) + + print "Creating Indices" + createDBindices(db) + + print "Finished creating database %s" % db + + +def createDBFile(db): + cbGenome = Genome("cbriggsae", version="CB25", dbFile=db) + cbGenome.createGeneDB(db) def loadGeneEntries(db, gffFile): @@ -125,7 +118,7 @@ def loadFeatureEntries(db, gffFile): featureEntries = [] seenFeatures = {} featureTranslation = {"coding_exon": "CDS", - "three_prime_UTR": "3UTR", + "three_prime_UTR": "3UTR", "five_prime_UTR": "5UTR" } @@ -161,34 +154,41 @@ def loadFeatureEntries(db, gffFile): cbGenome.addFeatureEntryBatch(featureEntries) -def createDBFile(db): - cbGenome = Genome("cbriggsae", version="CB25", dbFile=db) - cbGenome.createGeneDB(db) - - -def createDBindices(db): - cbGenome = Genome("cbriggsae", version="CB25", dbFile=db) - cbGenome.createIndices() - - -def buildCbriggsaeDB(db=geneDB): - gffPath = "%s/download/briggsae_25.WS132.gff" % cisRoot - chromoPath = "%s/download/briggsae_25.fa" % cisRoot - chromoOutPath = "/C_briggsae/" - - print "Creating database %s" % db - createDBFile(db) +def loadChromosome(db, chromPath, chromOutPath): + seqArray = [] + seqLen = 0 + cbGenome = Genome("cbriggsae", dbFile=db) + inFile = open(chromPath, "r") + header = inFile.readline() + while header != "": + seqArray = [] + seqLen = 0 + chromID = header.strip()[1:] + currentLine = inFile.readline() + while currentLine != "" and currentLine[0] != ">": + lineSeq = currentLine.strip() + seqLen += len(lineSeq) + seqArray.append(lineSeq) + currentLine = inFile.readline() - print "Adding gene entries" - loadGeneEntries(db, gffPath) + seq = string.join(seqArray, "") + if seqLen < 900000: + print "Added contig %s to database" % chromID + cbGenome.addSequence(("cbriggsae", chromID), seq, "chromosome", str(seqLen)) + cbGenome.addChromosomeEntry(chromID, chromID, "db") + else: + outFileName = "%s%s.bin" % (chromOutPath, chromID) + outFile = open("%s%s" % (cisRoot, outFileName), "w") + outFile.write(seq) + outFile.close() + print "Added contig file %s to database" % outFileName + cbGenome.addChromosomeEntry(chromID, outFileName, "file") - print "Adding feature entries" - loadFeatureEntries(db, gffPath) + header = currentLine - print "Loading genomic sequence" - loadChromosome(db, chromoPath, chromoOutPath) + inFile.close() - print "Creating Indices" - createDBindices(db) - print "Finished creating database %s" % db \ No newline at end of file +def createDBindices(db): + cbGenome = Genome("cbriggsae", version="CB25", dbFile=db) + cbGenome.createIndices()