1 ###########################################################################
3 # C O P Y R I G H T N O T I C E #
4 # Copyright (c) 2003-10 by: #
5 # * California Institute of Technology #
7 # All Rights Reserved. #
9 # Permission is hereby granted, free of charge, to any person #
10 # obtaining a copy of this software and associated documentation files #
11 # (the "Software"), to deal in the Software without restriction, #
12 # including without limitation the rights to use, copy, modify, merge, #
13 # publish, distribute, sublicense, and/or sell copies of the Software, #
14 # and to permit persons to whom the Software is furnished to do so, #
15 # subject to the following conditions: #
17 # The above copyright notice and this permission notice shall be #
18 # included in all copies or substantial portions of the Software. #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
28 ###########################################################################
31 from cistematic.programs import Program
32 import os, time, tempfile
33 from cistematic.core.motif import Motif
34 from cistematic.core import complement, sanitize
35 from os import environ
37 if environ.get("CISTEMATIC_TEMP"):
38 cisTemp = environ.get("CISTEMATIC_TEMP")
42 tempfile.tempdir = cisTemp
45 class Fastcomp(Program):
46 fastcompPath = Program.programRoot + "/fastcomp/"
52 argLetter = {"windowsize": "-w", "threshold": "-t"}
55 def getSettings(self):
59 def setSettings(self, settings):
61 self.argDict = settings
64 def buildInputFiles(self):
65 """ given a Fasta file (via the constructor or the inputFile() method), grab the
66 first two sequences and save them as two temporary sequences.
71 inFile = open(self.inputFilePath, "r")
74 line = inFile.readline()
76 self.seq1name = line[:-1]
77 line = inFile.readline()
80 line = inFile.readline()
82 self.seq2name = line[:-1]
83 line = inFile.readline()
86 line = inFile.readline()
91 print "Error processing input file"
95 self.fastdir = tempfile.mktemp()
97 os.mkdir(self.fastdir)
99 self.fastdir = cisTemp
101 self.inputFile1 = self.fastdir + "/seq1.fsa"
102 self.inputFile2 = self.fastdir + "/seq2.fsa"
103 seq1 = sanitize(seq1)
104 seq2 = sanitize(seq2)
105 seq1File = open(self.inputFile1, "w")
106 seq1File.write(seq1 + "\n")
108 seq2File = open(self.inputFile2, "w")
109 seq2File.write(seq2 + "\n")
115 def buildCommand(self):
116 self.buildInputFiles()
117 cmd = self.fastcompPath + "fastcomp -1 " + self.inputFile1 + " -2 " + self.inputFile2
118 for arg in ["windowsize", "threshold"]:
119 cmd = cmd + " " + self.argLetter[arg] + " " + str(self.argDict[arg])
121 cmd += " -o %s/fastcomp.out" % self.fastdir
122 print "cmd is %s" % (cmd)
127 def setWindowSize(self, wsize):
128 self.argDict["windowsize"] = str(wsize)
131 def setThreshold(self, thresh):
132 self.argDict["threshold"] = thresh
135 def checkDefaults(self):
136 if "threshold" not in self.argDict:
137 self.argDict["threshold"] = 0.9
139 if "windowsize" not in self.argDict:
140 self.argDict["windowsize"] = 10
143 # run the program - preferrably after some of the other options have been set
145 startTime = time.time()
147 self.contents = os.popen(Fastcomp.buildCommand(self)).readlines()
148 outFile = open( self.fastdir + "/fastcomp.out")
149 self.contents = outFile.readlines()
152 filenames = os.listdir(self.fastdir)
153 for entry in filenames:
154 os.remove(self.fastdir + "/" + entry)
155 os.rmdir(self.fastdir)
157 print "error cleaning up directory %s" % self.fastdir
159 stopTime = time.time()
161 print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime)
164 def getWindows(self, seqNum="1"):
166 for line in self.contents:
167 (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
169 results.append((seq1pos, seq2pos, matches, sense))
171 results.append((seq2pos, seq1pos, matches, sense))
179 motSize = self.argDict["windowsize"]
180 matrixRow = {"A": 0, "C": 1, "G": 2, "T": 3}
181 (seq1, seq2) = self.buildInputFiles()
183 for line in self.contents:
184 (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
185 if int(seq1pos) > currentPos:
186 currentPos = int(seq1pos)
188 for pos in range(len(thePWM)):
189 thePWM[pos][matrixRow["A"]] /= seqNum
190 thePWM[pos][matrixRow["C"]] /= seqNum
191 thePWM[pos][matrixRow["G"]] /= seqNum
192 thePWM[pos][matrixRow["T"]] /= seqNum
194 self.motifs.append(Motif(self.tagID + "-FASTCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))
197 motseq1 = seq1[currentPos:currentPos+motSize]
199 for pos in range(motSize):
200 thePWM.append([0.0, 0.0, 0.0, 0.0])
202 thePWM[pos][matrixRow[NT]] += 1
205 motseq2 = seq2[int(seq2pos):int(seq2pos) + motSize]
207 motseq2 = complement(seq2[int(seq2pos) + 1 - motSize:int(seq2pos) + 1], motSize)
209 for pos in range(motSize):
211 thePWM[pos][matrixRow[NT]] += 1
216 for pos in range(len(thePWM)):
217 thePWM[pos][matrixRow["A"]] /= seqNum
218 thePWM[pos][matrixRow["C"]] /= seqNum
219 thePWM[pos][matrixRow["G"]] /= seqNum
220 thePWM[pos][matrixRow["T"]] /= seqNum
222 self.motifs.append(Motif(self.tagID + "-FASTCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))