cistematic/programs/Gibbs.py

   1 ##Sarah Aerni
   2 ##Created:  June 25, 2005
   3 ##Modified: July 11, 2005
   4 ##Motif Finder using Gibbs sampling
   5 ##convergence is measured when the difference in the sequences is negligible
   6
   7 import random
   8 import math
   9 import sys
  10 import copy
  11 from time import time
  12
  13 from math import ceil
  14
  15 Frequency = {}
  16 ConsensusScore= {True:2, False:1}
  17
  18 NTDIndices = {"A": 0, "C": 1, "G": 2, "T": 3}
  19 IndexNTDs  = {0: "A", 1: "C", 2: "G", 3: "T"}
  20 INSERTION_N = "N"
  21 global minSize
  22 global maxSize
  23 global thresholdPercent
  24 global sequences
  25 global sizeOfMotif
  26 global numOfMismatches
  27 global maxIterations
  28 global maxLoopsWithoutImprovement
  29
  30 """
  31 markov size - 1 = markov model
  32 example:
  33 if you want a markov 1 you want to check 2 ntds total
  34 markov_size = 2
  35 MARKOV_SIZE = 2
  36
  37 AlignmentScore
  38 input:     sequenceToCompare   List of sequences whose substrings will be
  39            sizeOfMotif         aligned integer size of the motif being found
  40                                (length of the subseqeunce that will be aligned
  41                                 from each sequence in above list (window size)
  42            startLocs           start locations of the motifs to be aligned to
  43                                each other
  44            CompareList         the indices of sequencesToCompare to be aligned
  45            numSeqs             the number of sequences -1 from
  46                                sequencesToCompare to be aligned, the indices
  47                                of sequenceToCompare stored in the first
  48                                 numSeqs indices of in CompareList.
  49            Frequency           markov model being used to calculate background
  50            originalSeqs        contain unmasked sequences used for checking
  51                                the markov score
  52 output:    integer             Score indicating the consensus score of these
  53                                sequences
  54            2D-List             contains the PSFM
  55            2D-List             contains the log markov scores
  56
  57 will be calculated as an ungapped consensus between those elements in the
  58 CompareList Consensus score is calculated by choosing the largest number of
  59 the same elements in each column, and adding all these numbers up across all
  60 columns
  61 """
  62
  63
  64 def AlignmentScore(sequencesToCompare, sizeOfMotif, startLocs, CompareList, numSeqs):
  65     TotalScore = 0;
  66     PWM = []
  67     maxScores=[]
  68     len(sequencesToCompare)
  69     for i in range (sizeOfMotif):
  70         PWMi = [0.0, 0.0, 0.0, 0.0]
  71         for j in range(numSeqs+1):
  72             SequenceIndex = CompareList[j]
  73             CurrSeq = sequencesToCompare[SequenceIndex]
  74
  75             #some sequences may not contain the motifs, if so you do not want
  76             #to include them in the consensus. These have uninitialized start
  77             #locations (ie startLocs would be -1
  78             if startLocs[SequenceIndex] != -1:
  79                 if sequencesToCompare[SequenceIndex]\
  80                     [startLocs[SequenceIndex]+i] == "N":
  81                     print sequencesToCompare
  82                     print "\nBAD HERE!"
  83                     print CurrSeq
  84                     print startLocs
  85                     print startLocs[SequenceIndex]
  86                     print CompareList
  87                     print j
  88                     print SequenceIndex
  89                     print numSeqs
  90                     print sizeOfMotif
  91                     print CurrSeq[startLocs[SequenceIndex]:startLocs[SequenceIndex]+sizeOfMotif]
  92                 PWMi[NTDIndices[CurrSeq[startLocs[SequenceIndex]+i]]] += 1.0
  93         maxHere=max(PWMi)
  94         TotalScore += maxHere
  95         maxScores.append(maxHere)
  96         PWM.append(PWMi)
  97
  98     return (TotalScore, PWM,maxScores)
  99
 100
 101 def MarkovFreq (prefix, actualNTD, Frequency,MARKOV_WINDOW):
 102     """ MarkovFreq
 103         input:     prefix      string of length MARKV_SIZE - 1 prefix used for model
 104                    actualNTD   character NTD at the en dof the prefix being calculated
 105                    Frequency   Markov model for calculations
 106         output:    float that gives the markov score for this specific sequence
 107
 108         The helper function will run through and find all possible words with the
 109         prefix and determine the markov score based on this
 110     """
 111     denominator = 0.0
 112     numerator = 0.0
 113     for NTD in ["A", "C", "G", "T"]:
 114         value = M_Score(prefix+NTD, Frequency, False, MARKOV_WINDOW)
 115         if NTD == actualNTD :
 116             numerator = value
 117         denominator += value
 118     retVal = numerator/denominator
 119
 120     return retVal
 121
 122
 123 def revComp (sequence):
 124     """ revComp
 125         input:     sequence    DNA sequence to be converted to reverse complement
 126         output:    string      reverse complement of input sequence
 127
 128         obtains the reverse complement of an input sequence
 129     """
 130     RevDict={"A": "T",
 131              "T": "A",
 132              "C": "G",
 133              "G": "C",
 134              "N": "N"
 135     }
 136     reverse = ""
 137     for i in range(len(sequence)):
 138         reverse = RevDict[sequence[i].upper()]+reverse
 139
 140     return reverse
 141
 142
 143 def Markov(sequences, IncludeRC, MARKOV):
 144     """ Markov3
 145         input:     sequences   list that are being used to create the background
 146         output:    dictionary of all 6mers (reverse complement also) and their -log2
 147                    proportion seen
 148
 149         background will build a markov model of the background in order to be able
 150         to differentiate the motifs from the pure background of a certain size
 151         they will be stored as -log(fraction)
 152     """
 153     MARKOV_WINDOW = MARKOV + 1
 154     WordDict = {}
 155     totalWindows = 0
 156     for i in sequences:
 157         totalWindows += (len(i)-MARKOV_WINDOW+1)*2
 158
 159     for seq in sequences:
 160         for index in range(len(seq)-MARKOV_WINDOW+1):
 161             subseq = seq[index:index+MARKOV_WINDOW].upper()
 162             if subseq not in WordDict:
 163                 WordDict[subseq] = 0.0
 164
 165             WordDict[subseq] += 1.0
 166             if IncludeRC:
 167                 RC = revComp(subseq)
 168                 if RC not in WordDict:
 169                     WordDict[RC] = 0.0
 170
 171                 WordDict[RC] += 1.0
 172
 173     for key in WordDict:
 174         WordDict[key] = 1.0*WordDict[key]/totalWindows
 175
 176     return WordDict
 177
 178
 179 def Average_M (sequence, Model, l, MARKOV_WINDOW):
 180     """ Average_M
 181         input:     sequences   List of sequences on which to find the average
 182                                markov score
 183                    Model       Dictionary containing pvalues for seeing 3mers
 184                    l           integer designating the word sizes from which to
 185                                determine average pvalue
 186         output:    average probability of all input lmers in sequences in the Model
 187
 188         finds the probability of seeing all subsequence in the total strings
 189         using the markov model created using the background. Markov3 is used
 190         (window size of 3) and from this determine the average. This function will
 191         also screen the background model
 192     """
 193     totalSum = 0.0;
 194     totalWords = 0.0;
 195     for seq in sequence:
 196         for i in range(MARKOV_WINDOW-1,len(seq)-l+1):
 197             totalWords += 1.0
 198             PVal = M_Score(seq[i-MARKOV_WINDOW+1:i+l], Model, True, MARKOV_WINDOW)
 199             totalSum += PVal
 200
 201     retVal = totalSum/totalWords
 202     print totalWords
 203     return retVal
 204
 205
 206 def M_Score (sequence, Model, check, MARKOV_WINDOW):
 207     """ M_Score
 208         input:     sequence    string for which the Pvalue is to be determined
 209                    Model       Dictionary containing log2 pvalues for seeing 6mers
 210                    check       Boolean which determines whether to also check for
 211                                completeness of markov model
 212         output:    log2 probability of seeing the input seqeunce in the Model
 213
 214         gives the probability of seeing the given subsequence in the total strings
 215         using the markov model created using the background. Markov6 is used
 216         (window size of 3)
 217     """
 218     PVal = 0.0
 219     for j in range(len(sequence)-MARKOV_WINDOW+1):
 220         if sequence[j:j+MARKOV_WINDOW] not in Model:
 221             if check:
 222                 print "The Markov Model is inadequate for your input",
 223                 print "sequences\n %s is"%sequence[j:j+MARKOV_WINDOW],
 224                 print "not contained in model provided\n",
 225                 print "Please revise your provided model or consider",
 226                 print "using Background Modelling provided"
 227                 sys.exit(0)
 228
 229             continue
 230
 231         PVal += -math.log(Model[sequence[j:j+MARKOV_WINDOW]],math.e)
 232
 233     return PVal
 234
 235
 236 def LogOdds(PWM, LogsM, sequence, Frequency, MARKOV_WINDOW):
 237     """ LogOdds
 238         input:     sequence    relevant part of the sequence being added to the PWM
 239                    PWM         information on sequences already in the motif
 240                    LogsM       frequency information on sequences already in motif
 241                    Frequency   markov model for background
 242                    sizeOfMotif size f the motif being found
 243         output     returns the log odds score for the consensus
 244         the equation used is as follow:
 245         S(j = 1 to sizeOfMotif (S(i = [A,C,G,T]) f_ij * ln(S(Prob each path))))
 246     """
 247     Score = 0
 248     PWMout = copy.deepcopy(PWM)
 249     LogsMout = copy.deepcopy(LogsM)
 250     #since each column of the PWM must add up to the total umber of sequences
 251     #in that PWM, in addition one must be added for the current sequence
 252     totalSeqs = PWM[0][0]+PWM[0][1]+PWM[0][2]+PWM[0][3] + 1
 253     for j in range(len(PWMout)):
 254         for i in ['A', 'C', 'G', 'T']:
 255             if i == sequence[j+MARKOV_WINDOW-1]:
 256                 PWMout[j][NTDIndices[i]] += 1.0
 257                 word = sequence[j:j+MARKOV_WINDOW]
 258                 LogsMout[j][NTDIndices[i]] += Frequency[word]
 259
 260             if PWMout[j][NTDIndices[i]]> 0:
 261                 Score += PWMout[j][NTDIndices[i]]/totalSeqs*math.log(PWMout[j][NTDIndices[i]]/(totalSeqs*LogsMout[j][NTDIndices[i]]/PWMout[j][NTDIndices[i]]),math.e)
 262
 263     return Score, PWMout, LogsMout
 264
 265
 266 def convert2motif(sequences, size):
 267     """ convert2motif
 268         input:     sequences   list containing the sequences in A,C,G,T alphabet
 269                                comprising the motif to be converted into symbols
 270                    size        size of the motif
 271         maybe add threshold?!?!
 272         output:    string      motif converted into descriptive symbols
 273
 274         takes in a list of motifs that were found at each point and converts them to
 275         an actual motif
 276     """
 277     #column composition is replaced by symbols
 278     SymbolDict = {'CGT':'B','AGT':'D','ACT':'H','GT':'K','AC':'M', 'ACGT':'N','AG':'R','CG':'S','ACG':'V','AT':'W', 'CT':'Y','A':'A','C':'C','G':'G','T':'T'}
 279     Motif = ""
 280     for i in range(size):
 281         A = 0
 282         C = 0
 283         G = 0
 284         T = 0
 285         for seq in sequences:
 286             if seq[i].upper() == "A":
 287                 A += 1
 288             elif seq[i].upper() == "C":
 289                 C += 1
 290             elif seq[i].upper() == "G":
 291                 G += 1
 292             else:
 293                 T += 1
 294
 295         characterCode = ""
 296
 297         #translate column composition into symbols
 298         ###########should we use percentages?! ie. A >certain percent##################
 299         if (A > 0):
 300             characterCode += "A"
 301
 302         if (C > 0):
 303             characterCode += "C"
 304
 305         if (G > 0):
 306             characterCode += "G"
 307
 308         if (T > 0):
 309             characterCode += "T"
 310
 311         Motif += SymbolDict[characterCode]
 312
 313     return Motif
 314
 315
 316 def convert2PSFM (sequences, NumOfSeqs):
 317     """ convert2PSFM
 318         input:     sequences   list containing the sequences in A,C,G,T alphabet
 319                                comprising the motif to be converted into symbols
 320                    size        size of the motif
 321         output:    2Darray     will contain the PSFM where indices 0-3 of each list
 322                                will be A,C,G,T respectively
 323
 324         takes in a list of motifs that were found at each point and converts them to
 325         a PSFM
 326     """
 327     PSFM = []
 328     PWM = convert2PWM(sequences, len(sequences[0]))
 329     for i in xrange(len(PWM)):
 330         index = []
 331         for j in [0,1,2,3]:
 332             index.append(PWM[i][j]/NumOfSeqs)
 333
 334         PSFM.append(index)
 335
 336     return PSFM
 337
 338
 339 def convert2PWM (sequences, size):
 340     """ convert2PWM
 341         input:     sequences   list containing the sequences in A,C,G,T alphabet
 342                                comprising the motif to be converted into symbols
 343                    size        size of the motif
 344         output:    2Darray     will contain the PSFM where indices 0-3 of each list
 345                                will be A,C,G,T respectively
 346
 347         takes in a list of motifs that were found at each point and converts them to
 348         a PWM
 349     """
 350     PWM = []
 351     for i in range(size):
 352         indices = [0.0, 0.0, 0.0, 0.0]
 353         for seq in sequences:
 354             indices[NTDIndices[seq[i].upper()]] += 1.0
 355
 356         PWM.append(indices)
 357
 358     return PWM
 359
 360
 361 def add2PWM(sequence, PWM):
 362     """ add2PWM
 363         input:     sequence    sequence to be added to PWM
 364                    PWM         PWM being modiifed
 365
 366         takes in a sequence and adds it to the PWM
 367     """
 368     #determine the composition to add
 369     for i in range(len(PWM)):
 370         PWM[i][NTDIndices[sequence[i].upper()]] += 1.0
 371
 372
 373 def Align2PWM(Motifi,PWM):
 374     """ Align2PWM
 375         input:     Motifi      Sequence being aligned to PWM
 376                    PWM         PWM to which the sequence is being aligned
 377         output:    float       alignment score to the matrix
 378
 379         takes in a PWM and aligns the sequence to this PWM and returns
 380         the consensus scoreo
 381     """
 382     Score = 0.0
 383     for i in range(len(PWM)):
 384         Score += PWM[i][NTDIndices[Motifi[i]]]
 385     return Score
 386
 387
 388 def Factorial(n):
 389     """ Factorial
 390         input:     n       Number for which we will calculate the factorial
 391         output:    float   factorial of input number
 392
 393         calculates the factorial of input number n
 394     """
 395     #by definition factorial should be 1
 396     Fact = 1
 397     for i in range(2,n+1):
 398         Fact *= i
 399     return Fact
 400
 401
 402 def Choose(n, k):
 403     """ Choose
 404         input:     n       integer for total number of elements in a set
 405                    k       integer for total number of elements to be chose from set
 406         output:    float   the binomial coefficient of the above number, ie nCk
 407
 408         calculates the number of ways to choose k elements from a set of n
 409     """
 410     return Factorial(n)/Factorial(n-k)/Factorial(k)
 411
 412
 413 getNTDVals = {0:'A',1:'C',2:'G', 3:'T'}
 414
 415 getNTDIndex = {'A':0,'C':1,'G':2, 'T':3}
 416
 417
 418 def MotifFinder(InputSequences, minS, maxS, numberOfMotifs, Markov_Size,
 419                         UseRC, Frequency,excludeBelowAve, percID,
 420                         maxIter, maxLoopsW_outImprovement):
 421     global minSize
 422     minSize=minS
 423     global maxSize
 424     maxSize=maxS
 425     global thresholdPercent
 426     thresholdPercent=percID
 427     global sequences
 428     global sizeOfMotif
 429     global numOfMismatches
 430     global maxIterations
 431     maxIterations=maxIter
 432
 433     global maxLoopsWithoutImprovement
 434     maxLoopsWithoutImprovement=maxLoopsW_outImprovement
 435
 436     print "Running Sampler with %i sequences"%(len(InputSequences))
 437     print "Finding %i motifs of size %i to %i using markov size %i" % (numberOfMotifs,minSize,maxSize,Markov_Size)
 438
 439     sequences = [InputSequences[i].upper() for i in range(len(InputSequences))]
 440     PSFMs = []
 441     if UseRC:
 442         for seqIndex in xrange(len(sequences)):
 443             RCSeq = revComp(sequences[seqIndex])
 444             sequences[seqIndex] += INSERTION_N+ RCSeq
 445
 446     #this will track the movement of each sequence
 447     #if a movement exceeds a certain threshold we are not finished
 448     for motifNum in range(numberOfMotifs):
 449
 450         #to improve speed shrink sequences by replacing strings of Ns by
 451         #a single N
 452         for i in xrange(len(sequences)):
 453             splitByN=sequences[i].split('N')
 454             j = 0;
 455             finalSequence=""
 456             max_j=len(splitByN)
 457             while j  < max_j :
 458                 if len(splitByN[j])==0:
 459                     finalSequence="".join([finalSequence,'N'])
 460                     while len(splitByN[j])==0:
 461                         j+=1
 462                         if j==max_j:
 463                             break
 464                 else:
 465                     finalSequence="".join([finalSequence,splitByN[j]])
 466                     j+=1
 467             sequences[i]=finalSequence
 468
 469         print "MOTIF NUMBER %i" %motifNum
 470         empty=min([len(sequences[i]) for i in xrange(len(sequences))])
 471         #pick motif size randomly within the range provided by the user
 472         sizeOfMotif = random.randint(minSize,maxSize)
 473         if empty < maxSize:
 474             return ALLPSFMS
 475
 476         numOfMismatches=sizeOfMotif-ceil(thresholdPercent/100.0*sizeOfMotif)
 477         (PWM,PWMScores,startLocs)=GibbsRunner(100)
 478         MaxVals=[0 for i in xrange(len(PWM))]
 479         for ConsI in xrange(len(PWM)):
 480             MaxVals[ConsI] = max(PWM[ConsI])
 481         PWMScores = [0 for i in range(len(sequences))]
 482         for SIndex in range(len(sequences)):
 483             subseq = sequences[SIndex][startLocs[SIndex]:startLocs[SIndex]+sizeOfMotif]
 484             PWMScores[SIndex] = 0
 485             #######################start here##########
 486             for subIndex in range(len(subseq)):
 487                 PWMScores[SIndex] += PWM[subIndex][NTDIndices[subseq[subIndex]]]
 488
 489         maxScore = max(PWMScores)
 490         #get rid of all the sequences that do not achieve a certain consensus
 491         #score defined by the top one
 492         thresh = thresholdPercent/100.0 * maxScore
 493         FinalPWMSeqs = []
 494         for SIndex in range(len(PWMScores)):
 495             if PWMScores[SIndex] > thresh:
 496                 FinalPWMSeqs.append(sequences[SIndex][startLocs[SIndex]:startLocs[SIndex]+sizeOfMotif])
 497             else:
 498                 startLocs[SIndex] = -1
 499
 500         FinalPSFM= convert2PSFM (FinalPWMSeqs, len(FinalPWMSeqs))
 501         PSFMs.append(FinalPSFM)
 502         for i in xrange(len(sequences)):
 503             if startLocs[i] != -1:
 504                 sequences[i] = sequences[i][:startLocs[i]]+INSERTION_N*sizeOfMotif+sequences[i][startLocs[i]+sizeOfMotif:]
 505                 sequences[i] = sequences[i][:len(sequences[i])-startLocs[i]-sizeOfMotif]+INSERTION_N*sizeOfMotif+sequences[i][len(sequences[i])-startLocs[i]:]
 506     return PSFMs
 507
 508
 509 def GibbsRunner(iterIn):
 510     iterAll = 0
 511     BestPWM=[]
 512     BestScore=0
 513     BestLocs=[]
 514     global minSize
 515     global maxSize
 516     global thresholdPercent
 517     global sequences
 518     global sizeOfMotif
 519     global numOfMismatches
 520     global maxIterations
 521     global maxLoopsWithoutImprovement
 522     maxScore=sizeOfMotif*len(sequences)
 523     st=time()
 524
 525     while iterAll < iterIn:
 526         en=time()
 527         print "%.03f\t"%(en-st),
 528         st=time()
 529         iterAll+=1
 530         startLocs = [-1 ] * len(sequences)
 531
 532         for i in range(len(sequences)):
 533             startLocs[i] = random.randint(0,len(sequences[i])-sizeOfMotif)
 534             while "N" in sequences[i][startLocs[i]:startLocs[i]+sizeOfMotif]:
 535                 startLocs[i] = random.randint(0,len(sequences[i])-sizeOfMotif)
 536
 537         (TotalScore, PWM,dummy)= AlignmentScore(sequences, sizeOfMotif, startLocs, [i for i in range(len(sequences))], len(sequences)-1)
 538         PWMScore=[Align2PWM(sequences[i][startLocs[i]:startLocs[i]+sizeOfMotif],PWM) for i in range(len(sequences))]
 539         print "PWM is right now"
 540         print PWM
 541         print "scores for each"
 542         print PWMScore
 543         SOi = -1
 544         ConsensusScore = 0
 545         PreviousBestScore = 0
 546         PreviousBestTime = -1
 547         iterations = 0
 548         while iterations < maxIterations and (ConsensusScore > PreviousBestScore or PreviousBestTime <= maxLoopsWithoutImprovement):
 549             iterations += 1
 550             SOi = random.randint(0,len(sequences)-1)
 551             SeqMotifs = []
 552             locs=startLocs[:]
 553             for i in range(len(sequences)):
 554                 if(SOi == i):
 555                     locs[i]=-1
 556                     continue
 557                 SeqMotifs.append(sequences[i][startLocs[i]:startLocs[i]+sizeOfMotif])
 558
 559             (TotalScore, PWM,maxScores)= AlignmentScore(sequences, sizeOfMotif, locs, [i for i in range(len(sequences))], len(sequences)-1)
 560             startLocsProb = []
 561             startLocsI= []
 562             SOSeq = sequences[SOi]
 563             total = 0
 564             start = 0
 565             endloc=len(SOSeq)-sizeOfMotif
 566             while(start<=endloc):
 567                 Motif = SOSeq[start:start+sizeOfMotif]
 568                 locOfN=Motif.rfind("N")
 569                 if locOfN>=0:
 570                     start+=locOfN+1
 571                     continue
 572                 probAtPosn=0
 573                 j=0
 574                 mmNum=0
 575                 while (j<sizeOfMotif):
 576                     letterScore=PWM[j][NTDIndices[Motif[j]]]
 577                     probAtPosn+=letterScore
 578                     mmNum+=int(letterScore!=maxScores[j])
 579                     if mmNum>numOfMismatches:
 580                         probAtPosn=0
 581                         break
 582                     j+=1
 583
 584                 if probAtPosn == 0:
 585                     start+=1
 586                     continue
 587                 startLocsI.append(start)
 588                 startLocsProb.append(probAtPosn)
 589                 total += probAtPosn
 590                 start+=1
 591
 592             if len(startLocsProb) == 0:
 593                 continue
 594
 595             choice = random.random()
 596             choiceLoc = choice*total
 597             totalToHere = 0
 598             for PrefI in range(len(startLocsProb)):
 599                 if totalToHere+startLocsProb[PrefI] == 0:
 600                     continue
 601                 if choiceLoc < totalToHere+startLocsProb[PrefI]:
 602                     break
 603                 totalToHere += startLocsProb[PrefI]
 604
 605             startLocs[SOi] = startLocsI[PrefI]
 606             PWMScore[SOi] = startLocsProb[PrefI]
 607             newMotif=SOSeq[startLocs[SOi]:startLocs[SOi]+sizeOfMotif]
 608             add2PWM (newMotif, PWM)
 609
 610             NewScores=[]
 611             PercentChange = []
 612             for i in range(len(sequences)):
 613                 NewScore = 0
 614                 Motif_i=sequences[i][startLocs[i]:startLocs[i]+sizeOfMotif]
 615                 for j in xrange(sizeOfMotif):
 616                     NewScore+=PWM[j][NTDIndices[Motif_i[j]]]
 617
 618                 NewScores.append(NewScore)
 619                 PercentChange.append(math.fabs(NewScore - PWMScore[i])/PWMScore[i])
 620
 621             TotConsensusScore = sum(NewScores)
 622             AveConsensusScore = TotConsensusScore/(len(sequences))
 623             if AveConsensusScore > PreviousBestScore:
 624                 PreviousBestScore = AveConsensusScore
 625                 PreviousBestTime = 0
 626             else:
 627                 PreviousBestTime += 1
 628
 629             PWMScore=NewScores[:]
 630
 631         Consensus=sum([max(PWM[i]) for i in xrange(len(PWM))])
 632         if Consensus> BestScore:
 633             BestScore=Consensus
 634             BestPWM=PWM[:]
 635             BestLocs=startLocs[:]
 636             if BestScore==maxScore:
 637                 break
 638
 639     print "iterated %i times to find"%iterAll
 640     print BestPWM
 641
 642     return(BestPWM,BestScore,BestLocs)
 643
 644
 645 def probFromPSFM(sequence, PSFM):
 646     probability = 1
 647     for i in range(len (sequence)):
 648         probability *= PSFM[i][getNTDIndex[sequence[i]]]
 649
 650     return probability