X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fgenomes%2Fscerevisiae.py;fp=cistematic%2Fgenomes%2Fscerevisiae.py;h=d670c148773fc86b95acbe6d42bf5ae47a1a375f;hp=5866f80963fac6914c81048fb1460548a4ef8a3b;hb=4522d28194e3d1c048bced84038760d394038285;hpb=4ad5495359e4322da39868020a7398676261679e diff --git a/cistematic/genomes/scerevisiae.py b/cistematic/genomes/scerevisiae.py index 5866f80..d670c14 100644 --- a/cistematic/genomes/scerevisiae.py +++ b/cistematic/genomes/scerevisiae.py @@ -1,7 +1,7 @@ ########################################################################### # # # C O P Y R I G H T N O T I C E # -# Copyright (c) 2003-10 by: # +# Copyright (c) 2003-13 by: # # * California Institute of Technology # # # # All Rights Reserved. # @@ -33,33 +33,44 @@ from cistematic.genomes import Genome from os import environ if environ.get("CISTEMATIC_ROOT"): - cisRoot = environ.get("CISTEMATIC_ROOT") + cisRoot = environ.get("CISTEMATIC_ROOT") else: cisRoot = "/proj/genome" geneDB = "%s/S_cerevisiae/scerevisiae.genedb" % cisRoot -def loadChromosome(db, chromID, chromPath, chromOut): - seqArray = [] - scGenome = Genome("scerevisiae", dbFile=db) - inFile = open(chromPath, "r") - line = inFile.readline() - for line in inFile: - seqArray.append(line.strip()) +def buildScerevisiaeDB(db=geneDB): + genePath = "%s/download/SGD_features.tab" % cisRoot + goDefPath = "%s/download/GO.terms_and_ids" % cisRoot + goPath = "%s/download/gene_association.sgd" % cisRoot - seq = string.join(seqArray, "") - seqLen = len(seq) - if seqLen < 1: - print "Problems reading sequence from file" + print "Creating database %s" % db + createDBFile(db) - print "writing to file %s" % chromOut - outFile = open("%s%s" % (cisRoot, chromOut), "w") - outFile.write(seq) - outFile.close() - seq = "" - print "calling scGenome()" - scGenome.addChromosomeEntry(chromID, chromOut, "file") + print "Adding gene entries" + loadGeneEntries(db, genePath) + + print "Adding gene annotations" + loadGeneAnnotations(db, genePath) + + print "Adding gene ontology" + loadGeneOntology(db, goPath, goDefPath) + + for chromID in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16"]: + print "Loading chromosome %s" % chromID + chromPath = "%s/download/chr%s.fsa" % (cisRoot, chromID) + loadChromosome(db, chromID, chromPath, "/S_cerevisiae/chr%s.bin" % chromID) + + print "Creating Indices" + createDBindices(db) + + print "Finished creating database %s" % db + + +def createDBFile(db): + scGenome = Genome("scerevisiae", version="SGD1", dbFile=db) + scGenome.createGeneDB(db) def loadGeneEntries(db, gFile): @@ -105,7 +116,7 @@ def loadGeneEntries(db, gFile): def loadGeneAnnotations(db, annotPath): geneAnnotations = [] - annotFile = open(annotPath, "r") + annotFile = open(annotPath, "r") lines = annotFile.readlines() annotFile.close() scGenome = Genome("scerevisiae", dbFile=db) @@ -162,55 +173,28 @@ def loadGeneOntology(db, goPath, goDefPath): scGenome.addGoInfoBatch(goArray) -def createDBFile(db): - scGenome = Genome("scerevisiae", version="SGD1", dbFile=db) - scGenome.createGeneDB(db) +def loadChromosome(db, chromID, chromPath, chromOut): + seqArray = [] + scGenome = Genome("scerevisiae", dbFile=db) + inFile = open(chromPath, "r") + line = inFile.readline() + for line in inFile: + seqArray.append(line.strip()) + + seq = string.join(seqArray, "") + seqLen = len(seq) + if seqLen < 1: + print "Problems reading sequence from file" + + print "writing to file %s" % chromOut + outFile = open("%s%s" % (cisRoot, chromOut), "w") + outFile.write(seq) + outFile.close() + seq = "" + print "calling scGenome()" + scGenome.addChromosomeEntry(chromID, chromOut, "file") def createDBindices(db): scGenome = Genome("scerevisiae", version="SGD1", dbFile=db) scGenome.createIndices() - - -def buildScerevisiaeDB(db=geneDB): - genePath = "%s/download/SGD_features.tab" % cisRoot - goDefPath = "%s/download/GO.terms_and_ids" % cisRoot - goPath = "%s/download/gene_association.sgd" % cisRoot - chromos = {"1": "%s/download/chr01.fsa" % cisRoot, - "2": "%s/download/chr02.fsa" % cisRoot, - "3": "%s/download/chr03.fsa" % cisRoot, - "4": "%s/download/chr04.fsa" % cisRoot, - "5": "%s/download/chr05.fsa" % cisRoot, - "6": "%s/download/chr06.fsa" % cisRoot, - "7": "%s/download/chr07.fsa" % cisRoot, - "8": "%s/download/chr08.fsa" % cisRoot, - "9": "%s/download/chr09.fsa" % cisRoot, - "10": "%s/download/chr10.fsa" % cisRoot, - "11": "%s/download/chr11.fsa" % cisRoot, - "12": "%s/download/chr12.fsa" % cisRoot, - "13": "%s/download/chr13.fsa" % cisRoot, - "14": "%s/download/chr14.fsa" % cisRoot, - "15": "%s/download/chr15.fsa" % cisRoot, - "16": "%s/download/chr16.fsa" % cisRoot - } - - print "Creating database %s" % db - createDBFile(db) - - print "Adding gene entries" - loadGeneEntries(db, genePath) - - print "Adding gene annotations" - loadGeneAnnotations(db, genePath) - - print "Adding gene ontology" - loadGeneOntology(db, goPath, goDefPath) - - for chromID in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16"]: - print "Loading chromosome %s" % chromID - loadChromosome(db, chromID, chromos[chromID], "/S_cerevisiae/chr%s.bin" % chromID) - - print "Creating Indices" - createDBindices(db) - - print "Finished creating database %s" % db \ No newline at end of file