erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / cisstat / fisher.py
diff --git a/cistematic/cisstat/fisher.py b/cistematic/cisstat/fisher.py
new file mode 100644 (file)
index 0000000..9708c73
--- /dev/null
@@ -0,0 +1,96 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+#
+# Fisher's exact test, as described in Gopal Kanji's 100 Statistical Tests
+# Table has form:
+#                               A   |    B      | A + B
+#                           -----------------------------
+#                               C   |    D      | C + D
+#                           -----------------------------
+#                           A + C | B + D   | A + B + C + D
+#
+
+from math import exp
+from helper import factln
+
+def fisher(a, b, c, d):
+    lowest = 0
+    if a < b and a < c and a < d:
+        lowest = "a"
+        lowestNum = a
+    elif b < c and b < d:
+        lowest = "b"
+        lowestNum = b
+    elif c < d:
+        lowest = "c"
+        lowestNum = c
+    else:
+        lowest = "d"
+        lowestNum = d
+
+    origAB = a + b
+    origCD = c + d
+    origAC = a + c
+    origBD = b + d
+    N = a + b + c + d
+
+    first = factln(origAB) + factln(origCD) + factln(origAC) + factln(origBD) - factln(N)
+
+    prob = 0.0
+    for x in range(lowestNum + 1):
+        if lowest == "a":
+            newA = x
+            newC = origAC - newA
+            newB = origAB - newA
+            newD = origCD - newC
+        elif lowest == "b":
+            newB = x
+            newA = origAB - newB
+            newD = origBD - newB
+            newC = origAC - newA
+        elif lowest == "c":
+            newC = x
+            newA = origAC - newC
+            newD = origCD - newC
+            newB = origAB - newA
+        else:
+            newD = x
+            newB = origBD - newD
+            newC = origCD - newD
+            newA = origAB - newB
+
+        currentProb =  exp(first - ((factln(newA) + factln(newB) + factln(newC) + factln(newD))))
+        #print 'did %f %f %f %f: %f' % (newA, newB, newC, newD, currentProb)
+        prob += currentProb
+
+    if prob > 1.0:
+        prob = 1.0
+
+    return prob
\ No newline at end of file