--- /dev/null
+###########################################################################
+# #
+# C O P Y R I G H T N O T I C E #
+# Copyright (c) 2003-10 by: #
+# * California Institute of Technology #
+# #
+# All Rights Reserved. #
+# #
+# Permission is hereby granted, free of charge, to any person #
+# obtaining a copy of this software and associated documentation files #
+# (the "Software"), to deal in the Software without restriction, #
+# including without limitation the rights to use, copy, modify, merge, #
+# publish, distribute, sublicense, and/or sell copies of the Software, #
+# and to permit persons to whom the Software is furnished to do so, #
+# subject to the following conditions: #
+# #
+# The above copyright notice and this permission notice shall be #
+# included in all copies or substantial portions of the Software. #
+# #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
+# SOFTWARE. #
+###########################################################################
+#
+# fastcomp.py
+from cistematic.programs import Program
+import os, time, tempfile
+from cistematic.core.motif import Motif
+from cistematic.core import complement, sanitize
+from os import environ
+
+if environ.get("CISTEMATIC_TEMP"):
+ cisTemp = environ.get("CISTEMATIC_TEMP")
+else:
+ cisTemp = "/tmp"
+
+tempfile.tempdir = cisTemp
+
+
+class Fastcomp(Program):
+ fastcompPath = Program.programRoot + "/fastcomp/"
+ motifs = []
+ argDict = {}
+ seq1name = ""
+ seq2name = ""
+ pairdir = ""
+ argLetter = {"windowsize": "-w", "threshold": "-t"}
+
+
+ def getSettings(self):
+ return self.argDict
+
+
+ def setSettings(self, settings):
+ self.motifs = []
+ self.argDict = settings
+
+
+ def buildInputFiles(self):
+ """ given a Fasta file (via the constructor or the inputFile() method), grab the
+ first two sequences and save them as two temporary sequences.
+ """
+ line = " "
+ seq1 = ""
+ seq2 = ""
+ inFile = open(self.inputFilePath, "r")
+ try:
+ while line[0] != ">":
+ line = inFile.readline()
+
+ self.seq1name = line[:-1]
+ line = inFile.readline()
+ while line[0] != ">":
+ seq1 += line.strip()
+ line = inFile.readline()
+
+ self.seq2name = line[:-1]
+ line = inFile.readline()
+ while line[0] != ">":
+ seq2 += line.strip()
+ line = inFile.readline()
+ except:
+ if len(seq2) > 0:
+ pass
+ else:
+ print "Error processing input file"
+ return ("", "")
+
+ inFile.close()
+ self.fastdir = tempfile.mktemp()
+ try:
+ os.mkdir(self.fastdir)
+ except:
+ self.fastdir = cisTemp
+
+ self.inputFile1 = self.fastdir + "/seq1.fsa"
+ self.inputFile2 = self.fastdir + "/seq2.fsa"
+ seq1 = sanitize(seq1)
+ seq2 = sanitize(seq2)
+ seq1File = open(self.inputFile1, "w")
+ seq1File.write(seq1 + "\n")
+ seq1File.close()
+ seq2File = open(self.inputFile2, "w")
+ seq2File.write(seq2 + "\n")
+ seq2File.close()
+
+ return (seq1, seq2)
+
+
+ def buildCommand(self):
+ self.buildInputFiles()
+ cmd = self.fastcompPath + "fastcomp -1 " + self.inputFile1 + " -2 " + self.inputFile2
+ for arg in ["windowsize", "threshold"]:
+ cmd = cmd + " " + self.argLetter[arg] + " " + str(self.argDict[arg])
+
+ cmd += " -o %s/fastcomp.out" % self.fastdir
+ print "cmd is %s" % (cmd)
+
+ return cmd
+
+
+ def setWindowSize(self, wsize):
+ self.argDict["windowsize"] = str(wsize)
+
+
+ def setThreshold(self, thresh):
+ self.argDict["threshold"] = thresh
+
+
+ def checkDefaults(self):
+ if "threshold" not in self.argDict:
+ self.argDict["threshold"] = 0.9
+
+ if "windowsize" not in self.argDict:
+ self.argDict["windowsize"] = 10
+
+
+ # run the program - preferrably after some of the other options have been set
+ def run(self):
+ startTime = time.time()
+ self.checkDefaults()
+ self.contents = os.popen(Fastcomp.buildCommand(self)).readlines()
+ outFile = open( self.fastdir + "/fastcomp.out")
+ self.contents = outFile.readlines()
+ outFile.close()
+ try:
+ filenames = os.listdir(self.fastdir)
+ for entry in filenames:
+ os.remove(self.fastdir + "/" + entry)
+ os.rmdir(self.fastdir)
+ except:
+ print "error cleaning up directory %s" % self.fastdir
+
+ stopTime = time.time()
+
+ print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime)
+
+
+ def getWindows(self, seqNum="1"):
+ results = []
+ for line in self.contents:
+ (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
+ if seqNum == "1":
+ results.append((seq1pos, seq2pos, matches, sense))
+ else:
+ results.append((seq2pos, seq1pos, matches, sense))
+
+ return results
+
+
+ def getMotifs(self):
+ self.motifs = []
+ thePWM = []
+ motSize = self.argDict["windowsize"]
+ matrixRow = {"A": 0, "C": 1, "G": 2, "T": 3}
+ (seq1, seq2) = self.buildInputFiles()
+ currentPos = -1
+ for line in self.contents:
+ (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
+ if int(seq1pos) > currentPos:
+ currentPos = int(seq1pos)
+ if len(thePWM) > 0:
+ for pos in range(len(thePWM)):
+ thePWM[pos][matrixRow["A"]] /= seqNum
+ thePWM[pos][matrixRow["C"]] /= seqNum
+ thePWM[pos][matrixRow["G"]] /= seqNum
+ thePWM[pos][matrixRow["T"]] /= seqNum
+
+ self.motifs.append(Motif(self.tagID + "-FASTCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))
+
+ thePWM = []
+ motseq1 = seq1[currentPos:currentPos+motSize]
+ seqNum = 1.0
+ for pos in range(motSize):
+ thePWM.append([0.0, 0.0, 0.0, 0.0])
+ NT = motseq1[pos]
+ thePWM[pos][matrixRow[NT]] += 1
+
+ if sense == "1":
+ motseq2 = seq2[int(seq2pos):int(seq2pos) + motSize]
+ else:
+ motseq2 = complement(seq2[int(seq2pos) + 1 - motSize:int(seq2pos) + 1], motSize)
+
+ for pos in range(motSize):
+ NT = motseq2[pos]
+ thePWM[pos][matrixRow[NT]] += 1
+
+ seqNum += 1.0
+
+ if len(thePWM) > 0:
+ for pos in range(len(thePWM)):
+ thePWM[pos][matrixRow["A"]] /= seqNum
+ thePWM[pos][matrixRow["C"]] /= seqNum
+ thePWM[pos][matrixRow["G"]] /= seqNum
+ thePWM[pos][matrixRow["T"]] /= seqNum
+
+ self.motifs.append(Motif(self.tagID + "-FASTCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))
+
+ return self.motifs
\ No newline at end of file