first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / cistematic / genomes / xtropicalis.py
index eb37500e68a5cfb6f54cf8d054ad47397d2c312b..bda0dbef0cd472f9573a271124afac7bddad0023 100644 (file)
@@ -1,7 +1,7 @@
 ###########################################################################
 #                                                                         #
 # C O P Y R I G H T   N O T I C E                                         #
-#  Copyright (c) 2003-10 by:                                              #
+#  Copyright (c) 2003-13 by:                                              #
 #    * California Institute of Technology                                 #
 #                                                                         #
 #    All Rights Reserved.                                                 #
@@ -33,51 +33,44 @@ from cistematic.genomes import Genome
 from os import environ
 
 if environ.get("CISTEMATIC_ROOT"):
-    cisRoot = environ.get("CISTEMATIC_ROOT") 
+    cisRoot = environ.get("CISTEMATIC_ROOT")
 else:
     cisRoot = "/proj/genome"
 
 geneDB = "%s/X_tropicalis/xtropicalis.genedb" % cisRoot
 
 
-def loadChromosome(db, chromPath, chromOutPath):
-    seqArray = []
-    seqLen = 0
-    xtGenome = Genome("xtropicalis", dbFile=db)
-    inFile = open(chromPath, "r")
-    header = inFile.readline()
-    while header != "":
-        seqArray = []
-        seqLen = 0
-        chromID = header.strip()[1:]
-        currentLine = inFile.readline()
-        while currentLine != "" and currentLine[0] != ">":
-            lineSeq = currentLine.strip()
-            seqLen += len(lineSeq)
-            seqArray.append(lineSeq)
-            currentLine = inFile.readline()
+def buildXtropicalisDB(db=geneDB):
+    genePath = "%s/download/xt1/jgiFilteredModels.txt" % cisRoot
+    chromoPath = "%s/download/xt1/xenTro1.softmask2.fa" % cisRoot
+    chromoOutPath = "/X_tropicalis/"
 
-        seq = string.join(seqArray, "")
-        if seqLen < 500000:
-            print "Added contig %s to database" % chromID
-            xtGenome.addSequence(("xtropicalis", chromID), seq, "chromosome", str(seqLen))
-            xtGenome.addChromosomeEntry(chromID, chromID, "db")
-        else:
-            outFileName = "%s%s.bin" % (chromOutPath, chromID)
-            outFile = open("%s%s" % (cisRoot, outFileName), "w")
-            outFile.write(seq)
-            outFile.close()
-            print "Added contig file %s to database" % outFileName
-            xtGenome.addChromosomeEntry(chromID, outFileName, "file")
+    print "Creating database %s" % db
+    createDBFile(db)
 
-        header = currentLine
+    print "Adding gene entries"
+    loadGeneEntries(db, genePath)
 
-    inFile.close()
+    print "Adding gene features"
+    loadGeneFeatures(db, genePath)
+
+    print "Loading sequences"
+    loadChromosome(db, chromoPath, chromoOutPath)
+
+    print "Creating Indices"
+    createDBindices(db)
+
+    print "Finished creating database %s" % db
+
+
+def createDBFile(db):
+    xtGenome = Genome("xtropicalis",  dbFile=db)
+    xtGenome.createGeneDB(db)
 
 
 def loadGeneEntries(db, gFile):
-    """ FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
-    """
+    #TODO: - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
+
     geneEntries = []
     xtGenome = Genome("xtropicalis", dbFile=db)
     geneFile = open(gFile, "r")
@@ -233,34 +226,41 @@ def loadGeneOntology(db, goPath, goDefPath, annotPath):
     xtGenome.addGoInfoBatch(goArray)
 
 
-def createDBFile(db):
-    xtGenome = Genome("xtropicalis",  dbFile=db)
-    xtGenome.createGeneDB(db)
-
-
-def createDBindices(db):
+def loadChromosome(db, chromPath, chromOutPath):
+    seqArray = []
+    seqLen = 0
     xtGenome = Genome("xtropicalis", dbFile=db)
-    xtGenome.createIndices()
-
-
-def buildXtropicalisDB(db=geneDB):
-    genePath = "%s/download/xt1/jgiFilteredModels.txt" % cisRoot
-    chromoPath = "%s/download/xt1/xenTro1.softmask2.fa" % cisRoot
-    chromoOutPath = "/X_tropicalis/"
-
-    print "Creating database %s" % db
-    createDBFile(db)
+    inFile = open(chromPath, "r")
+    header = inFile.readline()
+    while header != "":
+        seqArray = []
+        seqLen = 0
+        chromID = header.strip()[1:]
+        currentLine = inFile.readline()
+        while currentLine != "" and currentLine[0] != ">":
+            lineSeq = currentLine.strip()
+            seqLen += len(lineSeq)
+            seqArray.append(lineSeq)
+            currentLine = inFile.readline()
 
-    print "Adding gene entries"
-    loadGeneEntries(db, genePath)
+        seq = string.join(seqArray, "")
+        if seqLen < 500000:
+            print "Added contig %s to database" % chromID
+            xtGenome.addSequence(("xtropicalis", chromID), seq, "chromosome", str(seqLen))
+            xtGenome.addChromosomeEntry(chromID, chromID, "db")
+        else:
+            outFileName = "%s%s.bin" % (chromOutPath, chromID)
+            outFile = open("%s%s" % (cisRoot, outFileName), "w")
+            outFile.write(seq)
+            outFile.close()
+            print "Added contig file %s to database" % outFileName
+            xtGenome.addChromosomeEntry(chromID, outFileName, "file")
 
-    print "Adding gene features"
-    loadGeneFeatures(db, genePath)
+        header = currentLine
 
-    print "Loading sequences" 
-    loadChromosome(db, chromoPath, chromoOutPath)
+    inFile.close()
 
-    print "Creating Indices"
-    createDBindices(db)
 
-    print "Finished creating database %s" % db
\ No newline at end of file
+def createDBindices(db):
+    xtGenome = Genome("xtropicalis", dbFile=db)
+    xtGenome.createIndices()