X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fprograms%2Fpaircomp.py;fp=cistematic%2Fprograms%2Fpaircomp.py;h=3793311137b713d83ece3f43f227c14d3a5ec570;hp=0000000000000000000000000000000000000000;hb=bc30aca13e5ec397c92e67002fbf7a103130b828;hpb=0d3e3112fd04c2e6b44a25cacef1d591658ad181 diff --git a/cistematic/programs/paircomp.py b/cistematic/programs/paircomp.py new file mode 100644 index 0000000..3793311 --- /dev/null +++ b/cistematic/programs/paircomp.py @@ -0,0 +1,225 @@ +########################################################################### +# # +# C O P Y R I G H T N O T I C E # +# Copyright (c) 2003-10 by: # +# * California Institute of Technology # +# # +# All Rights Reserved. # +# # +# Permission is hereby granted, free of charge, to any person # +# obtaining a copy of this software and associated documentation files # +# (the "Software"), to deal in the Software without restriction, # +# including without limitation the rights to use, copy, modify, merge, # +# publish, distribute, sublicense, and/or sell copies of the Software, # +# and to permit persons to whom the Software is furnished to do so, # +# subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be # +# included in all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +########################################################################### +# +# paircomp.py +from cistematic.programs import Program +import os, time, tempfile +from cistematic.core.motif import Motif +from cistematic.core import complement +from os import environ + +if environ.get("CISTEMATIC_TEMP"): + cisTemp = environ.get("CISTEMATIC_TEMP") +else: + cisTemp = "/tmp" + +tempfile.tempdir = cisTemp + + +class Paircomp(Program): + paircompPath = Program.programRoot + "/paircomp/" + motifs = [] + argDict = {} + seq1name = "" + seq2name = "" + pairdir = "" + + + def getSettings(self): + return self.argDict + + + def setSettings(self, settings): + self.motifs = [] + self.argDict = settings + + + def buildInputFiles(self): + """ given a Fasta file (via the constructor or the inputFile() method), grab the + first two sequences and save them as two temporary sequences. + """ + line = " " + seq1 = "" + seq2 = "" + inFile = open(self.inputFilePath, "r") + try: + while line[0] != ">": + line = inFile.readline() + + self.seq1name = line[:-1] + line = inFile.readline() + while line[0] != ">": + seq1 += line.strip() + line = inFile.readline() + + self.seq2name = line[:-1] + line = inFile.readline() + while line[0] != ">": + seq2 += line.strip() + line = inFile.readline() + except: + if len(seq2) > 0: + pass + else: + print "Error processing input file" + return ("", "") + + inFile.close() + self.pairdir = tempfile.mktemp() + try: + os.mkdir(self.pairdir) + except: + self.pairdir = cisTemp + + self.inputFile1 = self.pairdir + "/seq1.fsa" + self.inputFile2 = self.pairdir + "/seq2.fsa" + + seq1File = open(self.inputFile1, "w") + seq1File.write(self.seq1name + "\n") + seq1File.write(seq1 + "\n") + seq1File.close() + + seq2File = open(self.inputFile2, "w") + seq2File.write(self.seq2name + "\n") + seq2File.write(seq2 + "\n") + seq2File.close() + + return (seq1, seq2) + + + def buildCommand(self): + self.buildInputFiles() + cmd = self.paircompPath + "paircomp " + self.inputFile1 + " " + self.inputFile2 + for arg in ["windowsize", "threshold"]: + cmd = cmd + " " + str(self.argDict[arg]) + + cmd += " %s/paircomp.out" % self.pairdir + print "cmd is %s" % (cmd) + + return cmd + + + #set window size + def setWindowSize(self, wsize): + self.argDict["windowsize"] = str(wsize) + + + # set threshold + def setThreshold(self, thresh): + self.argDict["threshold"] = thresh + + + def checkDefaults(self): + if "threshold" not in self.argDict: + self.argDict["threshold"] = 0.9 + + if "windowsize" not in self.argDict: + self.argDict["windowsize"] = 10 + + + def run(self): + startTime = time.time() + self.checkDefaults() + self.contents = os.popen(Paircomp.buildCommand(self)).readlines() + outFile = open( self.pairdir + "/paircomp.out") + self.contents = outFile.readlines() + outFile.close() + try: + filenames = os.listdir(self.pairdir) + for entry in filenames: + os.remove(self.pairdir + "/" + entry) + os.rmdir(self.pairdir) + except: + print "error cleaning up directory %s" % self.pairdir + + stopTime = time.time() + print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime) + + + def getWindows(self, seqNum="1"): + results = [] + for line in self.contents: + (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t") + if seqNum == "1": + results.append((seq1pos, seq2pos, matches, sense)) + else: + results.append((seq2pos, seq1pos, matches, sense)) + + return results + + + def getMotifs(self): + self.motifs = [] + thePWM = [] + motSize = self.argDict["windowsize"] + matrixRow = {"A": 0, "C": 1, "G": 2, "T": 3} + (seq1, seq2) = self.buildInputFiles() + currentPos = -1 + for line in self.contents: + (seq1pos, seq2pos, matches, sense) = line[:-1].split("\t") + if int(seq1pos) > currentPos: + currentPos = int(seq1pos) + if len(thePWM) > 0: + for pos in range(len(thePWM)): + thePWM[pos][matrixRow["A"]] /= seqNum + thePWM[pos][matrixRow["C"]] /= seqNum + thePWM[pos][matrixRow["G"]] /= seqNum + thePWM[pos][matrixRow["T"]] /= seqNum + + self.motifs.append(Motif(self.tagID + "-PAIRCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"]))) + + thePWM = [] + motseq1 = seq1[currentPos:currentPos+motSize] + seqNum = 1.0 + for pos in range(motSize): + thePWM.append([0.0, 0.0, 0.0, 0.0]) + NT = motseq1[pos] + thePWM[pos][matrixRow[NT]] += 1 + + if sense == "1": + motseq2 = seq2[int(seq2pos):int(seq2pos) + motSize] + else: + motseq2 = complement(seq2[int(seq2pos) + 1 - motSize:int(seq2pos) + 1], motSize) + + for pos in range(motSize): + NT = motseq2[pos] + thePWM[pos][matrixRow[NT]] += 1 + + seqNum += 1.0 + + if len(thePWM) > 0: + for pos in range(len(thePWM)): + thePWM[pos][matrixRow["A"]] /= seqNum + thePWM[pos][matrixRow["C"]] /= seqNum + thePWM[pos][matrixRow["G"]] /= seqNum + thePWM[pos][matrixRow["T"]] /= seqNum + + self.motifs.append(Motif(self.tagID + "-PAIRCOMP-" + str(len(self.motifs) + 1), "", thePWM, [], 1.0, str(self.argDict["threshold"]))) + + return self.motifs \ No newline at end of file