X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fgenomes%2Fdrerio.py;fp=cistematic%2Fgenomes%2Fdrerio.py;h=f26884eea87d0d463037a0a91f0ae768d974edbd;hp=7541db8d9043e2f4a082c73f6f938eb4ff639d9c;hb=4522d28194e3d1c048bced84038760d394038285;hpb=4ad5495359e4322da39868020a7398676261679e diff --git a/cistematic/genomes/drerio.py b/cistematic/genomes/drerio.py index 7541db8..f26884e 100644 --- a/cistematic/genomes/drerio.py +++ b/cistematic/genomes/drerio.py @@ -1,7 +1,7 @@ ########################################################################### # # # C O P Y R I G H T N O T I C E # -# Copyright (c) 2003-10 by: # +# Copyright (c) 2003-13 by: # # * California Institute of Technology # # # # All Rights Reserved. # @@ -34,48 +34,40 @@ from cistematic.core.geneinfo import geneinfoDB from os import environ if environ.get("CISTEMATIC_ROOT"): - cisRoot = environ.get("CISTEMATIC_ROOT") + cisRoot = environ.get("CISTEMATIC_ROOT") else: cisRoot = "/proj/genome" geneDB = "%s/D_rerio/drerio.genedb" % cisRoot -def loadChromosome(db, chromPath, chromOutPath): - seqArray = [] - seqLen = 0 - drGenome = Genome("drerio", dbFile=db) - files = os.listdir(chromPath) - for filename in files: - inFile = open("%s/%s" % (chromPath, filename), "r") - header = inFile.readline() - while header != "": - seqArray = [] - seqLen = 0 - chromID = header.strip()[1:] - currentLine = inFile.readline() - while currentLine != "" and currentLine[0] != ">": - lineSeq = currentLine.strip() - seqLen += len(lineSeq) - seqArray.append(lineSeq) - currentLine = inFile.readline() +def buildDrerioDB(db=geneDB): + """ genes and annotations are from UCSC (dr3). + """ + #genePath = "%s/download/xenoRefFlat.txt" % cisRoot + chromoPath = "%s/download/dr3" % cisRoot + chromoOutPath = "/D_rerio/" + print "Creating database %s" % db + createDBFile(db) - seq = string.join(seqArray, "") - if seqLen < 250000: - print "Added contig %s to database" % chromID - drGenome.addSequence(("drerio", chromID), seq, "chromosome", str(seqLen)) - drGenome.addChromosomeEntry(chromID, chromID, "db") - else: - outFileName = "%s%s.bin" % (chromOutPath, chromID) - outFile = open("%s%s" % (cisRoot, outFileName), "w") - outFile.write(seq) - outFile.close() - print "Added contig file %s to database" % outFileName - drGenome.addChromosomeEntry(chromID, outFileName, "file") + #print "Adding gene entries" + #loadGeneEntries(db, genePath) - header = currentLine + #print "Adding gene features" + #loadGeneFeatures(db, genePath) - inFile.close() + print "Loading chromosomes" + loadChromosome(db, chromoPath, chromoOutPath) + + print "Creating Indices" + createDBindices(db) + + print "Finished creating database %s" % db + + +def createDBFile(db): + drGenome = Genome("drerio", dbFile=db) + drGenome.createGeneDB(db) def loadGeneEntries(db, gFile): @@ -216,35 +208,43 @@ def loadGeneFeatures(db, gfile): drGenome.addFeatureEntryBatch(insertArray) -def createDBFile(db): - drGenome = Genome("drerio", dbFile=db) - drGenome.createGeneDB(db) - - -def createDBindices(db): +def loadChromosome(db, chromPath, chromOutPath): + seqArray = [] + seqLen = 0 drGenome = Genome("drerio", dbFile=db) - drGenome.createIndices() - - -def buildDrerioDB(db=geneDB): - """ genes and annotations are from UCSC (dr3). - """ - #genePath = "%s/download/xenoRefFlat.txt" % cisRoot - chromoPath = "%s/download/dr3" % cisRoot - chromoOutPath = "/D_rerio/" - print "Creating database %s" % db - createDBFile(db) + files = os.listdir(chromPath) + for filename in files: + inFile = open("%s/%s" % (chromPath, filename), "r") + header = inFile.readline() + while header != "": + seqArray = [] + seqLen = 0 + chromID = header.strip()[1:] + currentLine = inFile.readline() + while currentLine != "" and currentLine[0] != ">": + lineSeq = currentLine.strip() + seqLen += len(lineSeq) + seqArray.append(lineSeq) + currentLine = inFile.readline() - #print "Adding gene entries" - #loadGeneEntries(db, genePath) + seq = string.join(seqArray, "") + if seqLen < 250000: + print "Added contig %s to database" % chromID + drGenome.addSequence(("drerio", chromID), seq, "chromosome", str(seqLen)) + drGenome.addChromosomeEntry(chromID, chromID, "db") + else: + outFileName = "%s%s.bin" % (chromOutPath, chromID) + outFile = open("%s%s" % (cisRoot, outFileName), "w") + outFile.write(seq) + outFile.close() + print "Added contig file %s to database" % outFileName + drGenome.addChromosomeEntry(chromID, outFileName, "file") - #print "Adding gene features" - #loadGeneFeatures(db, genePath) + header = currentLine - print "Loading chromosomes" - loadChromosome(db, chromoPath, chromoOutPath) + inFile.close() - print "Creating Indices" - createDBindices(db) - print "Finished creating database %s" % db \ No newline at end of file +def createDBindices(db): + drGenome = Genome("drerio", dbFile=db) + drGenome.createIndices()