erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / experiments / fasta.py
diff --git a/cistematic/experiments/fasta.py b/cistematic/experiments/fasta.py
new file mode 100644 (file)
index 0000000..1caf031
--- /dev/null
@@ -0,0 +1,88 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+# a simple use of the experiment class
+from experiment import Experiment
+from draw import Draw
+from analyzeMotifs import AnalyzeMotifs
+import sys
+
+class Fasta(Experiment, AnalyzeMotifs, Draw):
+    experimentType = "fasta"
+
+
+    def run(self, fastaFile):
+        self.loadFasta(fastaFile)
+        Experiment.run(self)
+        datasetID = self.genepoolID
+        for (prog, settingsID) in self.programs:
+            if 1:
+                prog.inputFile(fastaFile)
+                settings = self.getSettingsID(settingsID)[1]
+                prog.setSettings(eval(settings))
+                runID = self.setRun(prog.name(), datasetID, settingsID)
+                tag = str(runID)
+                prog.setTagID(tag)
+                prog.run()
+                theMotifs = prog.getMotifs()
+                for mot in theMotifs:
+                    self.appendResults(mot)
+            else:
+                self.mlog("Error running program %s with settings %s" % (prog, settingsID))
+
+
+    def loadFasta(self, ffile):
+        """ load fasta file into genepool
+        """
+        f=open(ffile, "r")
+        line = f.readline()
+        i = 1
+        while line != "":
+            seq = ""
+            templine = f.readline()
+            while templine != "" and templine[0] != ">":
+                seq = seq + templine[0:-1]
+                templine = f.readline()
+                name = "seq%s" % line.strip()[1:]
+                # "progress bar" of dots...
+                if (i % 10 == 0):
+                    sys.stderr.write(".")
+
+                if (i % 1000 ==  0):
+                    sys.stderr.write("%s\n" % i)
+                else:
+                    if (i % 100 == 0):
+                        sys.stderr.write(" ")
+
+                i = i + 1
+
+            self.genepool[(ffile, name)] = seq
+            line = templine
+            print 
+        f.close()
\ No newline at end of file