X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fgenomes%2Fcremanei.py;h=3bc6b8f6a0dd9c7ab6aa61a8ce3bf8e00b21fd5c;hp=86e5991b112b69a095760b5eebf056de0621c0a1;hb=HEAD;hpb=4ad5495359e4322da39868020a7398676261679e diff --git a/cistematic/genomes/cremanei.py b/cistematic/genomes/cremanei.py index 86e5991..3bc6b8f 100644 --- a/cistematic/genomes/cremanei.py +++ b/cistematic/genomes/cremanei.py @@ -1,7 +1,7 @@ ########################################################################### # # # C O P Y R I G H T N O T I C E # -# Copyright (c) 2003-10 by: # +# Copyright (c) 2003-13 by: # # * California Institute of Technology # # # # All Rights Reserved. # @@ -33,51 +33,39 @@ from cistematic.genomes import Genome from os import environ if environ.get("CISTEMATIC_ROOT"): - cisRoot = environ.get("CISTEMATIC_ROOT") + cisRoot = environ.get("CISTEMATIC_ROOT") else: cisRoot = "/proj/genome" geneDB = "%s/C_remanei/cremanei.genedb" % cisRoot -def loadChromosomes(db, inPath, chromOutPath): - crGenome = Genome("cremanei", dbFile=db) - scontigList = os.listdir(inPath) - for scontig in scontigList: - seq = '' - seqArray = [] - seqLen = 0 - inFile = open("%s/%s" % (inPath, scontig), "r") - index = 0 - header = inFile.readline() - chromID = header.strip()[1:] - while header != "": - seqArray = [] - seqLen = 0 - currentLine = inFile.readline() - while currentLine != "" and currentLine[0] != ">": - lineSeq = currentLine.strip() - seqLen += len(lineSeq) - seqArray.append(lineSeq) - currentLine = inFile.readline() +def buildCremaneiDB(db=geneDB): + gffPath = "%s/download/cr01_wu_merged_gff" % cisRoot # using 20050824 version + chromoPath = "%s/download/sctg_masked_seqs/seqs" % cisRoot + chromoOutPath = "/C_remanei/" - seq = string.join(seqArray, "") - if seqLen < 100000: - print "Added contig %s to database" % chromID - crGenome.addSequence(("cremanei", chromID), seq, "chromosome", str(seqLen)) - crGenome.addChromosomeEntry(chromID, chromID, "db") - else: - outFileName = "%s%s.bin" % (chromOutPath, chromID) - outFile = open("%s%s" % (cisRoot, outFileName), "w") - outFile.write(seq) - outFile.close() - print "Added contig file %s to database" % outFileName - crGenome.addChromosomeEntry(chromID, outFileName, "file") + print "Creating database %s" % db + createDBFile(db) - index += 1 - header = currentLine + print "Adding gene entries" + loadGeneEntries(db, gffPath) - inFile.close() + print "Adding feature entries" + loadFeatureEntries(db, gffPath) + + print "Loading genomic sequence" + loadChromosomes(db, chromoPath, chromoOutPath) + + print "Creating Indices" + createDBindices(db) + + print "Finished creating database %s" % db + + +def createDBFile(db): + crGenome = Genome("cremanei", version="CR20050824", dbFile=db) + crGenome.createGeneDB(db) def loadGeneEntries(db, gffFile): @@ -155,34 +143,46 @@ def loadFeatureEntries(db, gffFile): crGenome.addFeatureEntryBatch(featureEntries) -def createDBFile(db): - crGenome = Genome("cremanei", version="CR20050824", dbFile=db) - crGenome.createGeneDB(db) - - -def createDBindices(db): - crGenome = Genome("cremanei", version="CR20050824", dbFile=db) - crGenome.createIndices() - - -def buildCremaneiDB(db=geneDB): - gffPath = "%s/download/cr01_wu_merged_gff" % cisRoot # using 20050824 version - chromoPath = "%s/download/sctg_masked_seqs/seqs" % cisRoot - chromoOutPath = "/C_remanei/" - - print "Creating database %s" % db - createDBFile(db) +def loadChromosomes(db, inPath, chromOutPath): + crGenome = Genome("cremanei", dbFile=db) + scontigList = os.listdir(inPath) + for scontig in scontigList: + seq = '' + seqArray = [] + seqLen = 0 + inFile = open("%s/%s" % (inPath, scontig), "r") + index = 0 + header = inFile.readline() + chromID = header.strip()[1:] + while header != "": + seqArray = [] + seqLen = 0 + currentLine = inFile.readline() + while currentLine != "" and currentLine[0] != ">": + lineSeq = currentLine.strip() + seqLen += len(lineSeq) + seqArray.append(lineSeq) + currentLine = inFile.readline() - print "Adding gene entries" - loadGeneEntries(db, gffPath) + seq = string.join(seqArray, "") + if seqLen < 100000: + print "Added contig %s to database" % chromID + crGenome.addSequence(("cremanei", chromID), seq, "chromosome", str(seqLen)) + crGenome.addChromosomeEntry(chromID, chromID, "db") + else: + outFileName = "%s%s.bin" % (chromOutPath, chromID) + outFile = open("%s%s" % (cisRoot, outFileName), "w") + outFile.write(seq) + outFile.close() + print "Added contig file %s to database" % outFileName + crGenome.addChromosomeEntry(chromID, outFileName, "file") - print "Adding feature entries" - loadFeatureEntries(db, gffPath) + index += 1 + header = currentLine - print "Loading genomic sequence" - loadChromosomes(db, chromoPath, chromoOutPath) + inFile.close() - print "Creating Indices" - createDBindices(db) - print "Finished creating database %s" % db \ No newline at end of file +def createDBindices(db): + crGenome = Genome("cremanei", version="CR20050824", dbFile=db) + crGenome.createIndices()