erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / experiments / randomset.py
diff --git a/cistematic/experiments/randomset.py b/cistematic/experiments/randomset.py
new file mode 100644 (file)
index 0000000..a71e551
--- /dev/null
@@ -0,0 +1,78 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+from cistematic.core import getGenomeEntries
+import random
+
+
+def randomEntry(genome):
+    """ randomEntry() - pick an entry
+    """
+    (org, entries) = getGenomeEntries(genome)
+    if len(entries) > 0:
+        entry = random.choice(entries)
+    else:
+        entry = ""
+
+    return (org, entry)
+
+def randomSet(genomeList, number):
+    """ randomSet() - returns a number of random sequence identifiers for one or more genomes
+    """
+    setResults = {}
+    for genome in genomeList:
+        (org, entries) = getGenomeEntries(genome)
+        if len(entries) > 0:
+            random.shuffle(entries)
+            setResults[genome] = entries[:number]
+        else:
+            setResults[genome] = []
+
+    return setResults
+
+
+def randomSetPercentage(genomeList, percentage):
+    """ randomSetPercentage() - returns a percentage of random sequence identifiers 
+        for one or more genomes
+    """
+    setResults = {}
+    if percentage < 0 or percentage > 1:
+        return setResults
+
+    for genome in genomeList:
+        (org, entries) = getGenomeEntries(genome)
+        if len(entries) > 0:
+            number = int(round(len(entries) * percentage))
+            random.shuffle(entries)
+            setResults[genome] = entries[:number]
+        else:
+            setResults[genome] = []
+
+    return setResults
\ No newline at end of file