X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fcisstat%2Fscore.py;fp=cistematic%2Fcisstat%2Fscore.py;h=36a747b9ceaa41c5b661e8b00accac4b26c248a4;hp=0000000000000000000000000000000000000000;hb=bc30aca13e5ec397c92e67002fbf7a103130b828;hpb=0d3e3112fd04c2e6b44a25cacef1d591658ad181 diff --git a/cistematic/cisstat/score.py b/cistematic/cisstat/score.py new file mode 100644 index 0000000..36a747b --- /dev/null +++ b/cistematic/cisstat/score.py @@ -0,0 +1,109 @@ +########################################################################### +# # +# C O P Y R I G H T N O T I C E # +# Copyright (c) 2003-10 by: # +# * California Institute of Technology # +# # +# All Rights Reserved. # +# # +# Permission is hereby granted, free of charge, to any person # +# obtaining a copy of this software and associated documentation files # +# (the "Software"), to deal in the Software without restriction, # +# including without limitation the rights to use, copy, modify, merge, # +# publish, distribute, sublicense, and/or sell copies of the Software, # +# and to permit persons to whom the Software is furnished to do so, # +# subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be # +# included in all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +########################################################################### +# +# Score formulas +# +from math import log, sqrt, exp + +try: + import _stat + hasStatExtension = True +except: + hasStatExtension = False + + +def llScore(observed, expected): + """ returns log likelihood for one pair of observed vs. expected. + """ + score = float(observed) * log(float(observed) / float(expected)) + return score + + +def zScore(observed, expected, prob): + """ returns (observed - expected)/(standard deviation), where the standard deviation + is calculated assuming normality as sqrt(obs * prob * (1 - prob)). + """ + score = (float(observed) - float(expected)) / sqrt(float(observed) * prob * (1 - prob)) + return score + + +def expected(length, prob): + return length * prob + + +def pvalue(zvalue): + """ approximation for converting from zvalue to P-value using Hamaker's + formula as described in Zar (App18). This is dependable down to + abs(z) ~ 0.2 + """ + z = abs(zvalue) + c = 0.806 * z * (1 - 0.018 * z) + pval = (1 - sqrt(1 - exp(-1 * (c * c))))/2 + + if zvalue < 0: + pval = 1 - pval + + return pval + + +def pearsonCorrelation(colA, colB): + if hasStatExtension: + return _stat.pearsonCorrelation(colA, colB) + else: + return localPearsonCorrelation(colA, colB) + + +def localPearsonCorrelation(colA, colB): + meanA = 0.0 + meanB = 0.0 + + length = len(colA) + if length > len(colB): + length = len(colB) + + for index in range(length): + meanA += colA[index] + meanB += colB[index] + + meanA /= length + meanB /= length + + numerator = 0.0 + denominatorA = 0.0 + denominatorB = 0.0 + + for index in range(length): + numerator += (colA[index] - meanA) * (colB[index] - meanB) + denominatorA += (colA[index] - meanA) * (colA[index] - meanA) + denominatorB += (colB[index] - meanB) * (colB[index] - meanB) + + if denominatorA == 0.0 or denominatorB == 0.0: + return 0.0 + + return (numerator / sqrt(denominatorA * denominatorB)) \ No newline at end of file