1 ###########################################################################
3 # C O P Y R I G H T N O T I C E #
4 # Copyright (c) 2003-10 by: #
5 # * California Institute of Technology #
7 # All Rights Reserved. #
9 # Permission is hereby granted, free of charge, to any person #
10 # obtaining a copy of this software and associated documentation files #
11 # (the "Software"), to deal in the Software without restriction, #
12 # including without limitation the rights to use, copy, modify, merge, #
13 # publish, distribute, sublicense, and/or sell copies of the Software, #
14 # and to permit persons to whom the Software is furnished to do so, #
15 # subject to the following conditions: #
17 # The above copyright notice and this permission notice shall be #
18 # included in all copies or substantial portions of the Software. #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
28 ###########################################################################
31 from cistematic.programs import Program
32 import os, time, tempfile
33 from cistematic.core.motif import Motif
34 from cistematic.core import complement
35 from os import environ
37 if environ.get("CISTEMATIC_TEMP"):
38 cisTemp = environ.get("CISTEMATIC_TEMP")
42 tempfile.tempdir = cisTemp
45 class Paircomp(Program):
46 paircompPath = Program.programRoot + "/paircomp/"
54 def getSettings(self):
58 def setSettings(self, settings):
60 self.argDict = settings
63 def buildInputFiles(self):
64 """ given a Fasta file (via the constructor or the inputFile() method), grab the
65 first two sequences and save them as two temporary sequences.
70 inFile = open(self.inputFilePath, "r")
73 line = inFile.readline()
75 self.seq1name = line[:-1]
76 line = inFile.readline()
79 line = inFile.readline()
81 self.seq2name = line[:-1]
82 line = inFile.readline()
85 line = inFile.readline()
90 print "Error processing input file"
94 self.pairdir = tempfile.mktemp()
96 os.mkdir(self.pairdir)
98 self.pairdir = cisTemp
100 self.inputFile1 = self.pairdir + "/seq1.fsa"
101 self.inputFile2 = self.pairdir + "/seq2.fsa"
103 seq1File = open(self.inputFile1, "w")
104 seq1File.write(self.seq1name + "\n")
105 seq1File.write(seq1 + "\n")
108 seq2File = open(self.inputFile2, "w")
109 seq2File.write(self.seq2name + "\n")
110 seq2File.write(seq2 + "\n")
116 def buildCommand(self):
117 self.buildInputFiles()
118 cmd = self.paircompPath + "paircomp " + self.inputFile1 + " " + self.inputFile2
119 for arg in ["windowsize", "threshold"]:
120 cmd = cmd + " " + str(self.argDict[arg])
122 cmd += " %s/paircomp.out" % self.pairdir
123 print "cmd is %s" % (cmd)
129 def setWindowSize(self, wsize):
130 self.argDict["windowsize"] = str(wsize)
134 def setThreshold(self, thresh):
135 self.argDict["threshold"] = thresh
138 def checkDefaults(self):
139 if "threshold" not in self.argDict:
140 self.argDict["threshold"] = 0.9
142 if "windowsize" not in self.argDict:
143 self.argDict["windowsize"] = 10
147 startTime = time.time()
149 self.contents = os.popen(Paircomp.buildCommand(self)).readlines()
150 outFile = open( self.pairdir + "/paircomp.out")
151 self.contents = outFile.readlines()
154 filenames = os.listdir(self.pairdir)
155 for entry in filenames:
156 os.remove(self.pairdir + "/" + entry)
157 os.rmdir(self.pairdir)
159 print "error cleaning up directory %s" % self.pairdir
161 stopTime = time.time()
162 print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime)
165 def getWindows(self, seqNum="1"):
167 for line in self.contents:
168 (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
170 results.append((seq1pos, seq2pos, matches, sense))
172 results.append((seq2pos, seq1pos, matches, sense))
180 motSize = self.argDict["windowsize"]
181 matrixRow = {"A": 0, "C": 1, "G": 2, "T": 3}
182 (seq1, seq2) = self.buildInputFiles()
184 for line in self.contents:
185 (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t")
186 if int(seq1pos) > currentPos:
187 currentPos = int(seq1pos)
189 for pos in range(len(thePWM)):
190 thePWM[pos][matrixRow["A"]] /= seqNum
191 thePWM[pos][matrixRow["C"]] /= seqNum
192 thePWM[pos][matrixRow["G"]] /= seqNum
193 thePWM[pos][matrixRow["T"]] /= seqNum
195 self.motifs.append(Motif(self.tagID + "-PAIRCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))
198 motseq1 = seq1[currentPos:currentPos+motSize]
200 for pos in range(motSize):
201 thePWM.append([0.0, 0.0, 0.0, 0.0])
203 thePWM[pos][matrixRow[NT]] += 1
206 motseq2 = seq2[int(seq2pos):int(seq2pos) + motSize]
208 motseq2 = complement(seq2[int(seq2pos) + 1 - motSize:int(seq2pos) + 1], motSize)
210 for pos in range(motSize):
212 thePWM[pos][matrixRow[NT]] += 1
217 for pos in range(len(thePWM)):
218 thePWM[pos][matrixRow["A"]] /= seqNum
219 thePWM[pos][matrixRow["C"]] /= seqNum
220 thePWM[pos][matrixRow["G"]] /= seqNum
221 thePWM[pos][matrixRow["T"]] /= seqNum
223 self.motifs.append(Motif(self.tagID + "-PAIRCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"])))