cistematic/cisstat/score.py

   1 ###########################################################################
   2 #                                                                         #
   3 # C O P Y R I G H T   N O T I C E                                         #
   4 #  Copyright (c) 2003-10 by:                                              #
   5 #    * California Institute of Technology                                 #
   6 #                                                                         #
   7 #    All Rights Reserved.                                                 #
   8 #                                                                         #
   9 # Permission is hereby granted, free of charge, to any person             #
  10 # obtaining a copy of this software and associated documentation files    #
  11 # (the "Software"), to deal in the Software without restriction,          #
  12 # including without limitation the rights to use, copy, modify, merge,    #
  13 # publish, distribute, sublicense, and/or sell copies of the Software,    #
  14 # and to permit persons to whom the Software is furnished to do so,       #
  15 # subject to the following conditions:                                    #
  16 #                                                                         #
  17 # The above copyright notice and this permission notice shall be          #
  18 # included in all copies or substantial portions of the Software.         #
  19 #                                                                         #
  20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
  21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
  22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
  23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
  24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
  25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
  26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
  27 # SOFTWARE.                                                               #
  28 ###########################################################################
  29 #
  30 # Score formulas
  31 #
  32 from math import log, sqrt, exp
  33
  34 try:
  35     import _stat
  36     hasStatExtension = True
  37 except:
  38     hasStatExtension = False
  39
  40
  41 def llScore(observed, expected):
  42     """ returns log likelihood for one pair of observed vs. expected.
  43     """
  44     score = float(observed) * log(float(observed) / float(expected))
  45     return score
  46
  47
  48 def zScore(observed, expected, prob):
  49     """ returns (observed - expected)/(standard deviation), where the standard deviation
  50         is calculated assuming normality as sqrt(obs * prob * (1 - prob)).
  51     """
  52     score = (float(observed) - float(expected)) / sqrt(float(observed) * prob * (1 - prob))
  53     return score
  54
  55
  56 def expected(length, prob):
  57     return length * prob
  58
  59
  60 def pvalue(zvalue):
  61     """ approximation for converting from zvalue to P-value using Hamaker's
  62         formula as described in Zar (App18). This is dependable down to
  63         abs(z) ~ 0.2
  64     """
  65     z = abs(zvalue)
  66     c = 0.806 * z * (1 - 0.018 * z)
  67     pval = (1 - sqrt(1 - exp(-1 * (c * c))))/2
  68
  69     if zvalue < 0:
  70         pval = 1 - pval
  71
  72     return pval
  73
  74
  75 def pearsonCorrelation(colA, colB):
  76     if hasStatExtension:
  77         return _stat.pearsonCorrelation(colA, colB)
  78     else:
  79         return localPearsonCorrelation(colA, colB)
  80
  81
  82 def localPearsonCorrelation(colA, colB):
  83     meanA = 0.0
  84     meanB = 0.0
  85
  86     length = len(colA)
  87     if length > len(colB):
  88         length = len(colB)
  89
  90     for index in range(length):
  91         meanA += colA[index]
  92         meanB += colB[index]
  93
  94     meanA /= length
  95     meanB /= length
  96
  97     numerator = 0.0
  98     denominatorA = 0.0
  99     denominatorB = 0.0
 100
 101     for index in range(length):
 102         numerator += (colA[index] - meanA) * (colB[index] - meanB)
 103         denominatorA += (colA[index] - meanA) * (colA[index] - meanA)
 104         denominatorB += (colB[index] - meanB) * (colB[index] - meanB)
 105
 106     if denominatorA == 0.0 or denominatorB == 0.0:
 107         return 0.0
 108
 109     return (numerator / sqrt(denominatorA * denominatorB))