erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / experiments / randomset.py
1 ###########################################################################
2 #                                                                         #
3 # C O P Y R I G H T   N O T I C E                                         #
4 #  Copyright (c) 2003-10 by:                                              #
5 #    * California Institute of Technology                                 #
6 #                                                                         #
7 #    All Rights Reserved.                                                 #
8 #                                                                         #
9 # Permission is hereby granted, free of charge, to any person             #
10 # obtaining a copy of this software and associated documentation files    #
11 # (the "Software"), to deal in the Software without restriction,          #
12 # including without limitation the rights to use, copy, modify, merge,    #
13 # publish, distribute, sublicense, and/or sell copies of the Software,    #
14 # and to permit persons to whom the Software is furnished to do so,       #
15 # subject to the following conditions:                                    #
16 #                                                                         #
17 # The above copyright notice and this permission notice shall be          #
18 # included in all copies or substantial portions of the Software.         #
19 #                                                                         #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
27 # SOFTWARE.                                                               #
28 ###########################################################################
29 #
30 from cistematic.core import getGenomeEntries
31 import random
32
33
34 def randomEntry(genome):
35     """ randomEntry() - pick an entry
36     """
37     (org, entries) = getGenomeEntries(genome)
38     if len(entries) > 0:
39         entry = random.choice(entries)
40     else:
41         entry = ""
42
43     return (org, entry)
44
45  
46 def randomSet(genomeList, number):
47     """ randomSet() - returns a number of random sequence identifiers for one or more genomes
48     """
49     setResults = {}
50     for genome in genomeList:
51         (org, entries) = getGenomeEntries(genome)
52         if len(entries) > 0:
53             random.shuffle(entries)
54             setResults[genome] = entries[:number]
55         else:
56             setResults[genome] = []
57
58     return setResults
59
60
61 def randomSetPercentage(genomeList, percentage):
62     """ randomSetPercentage() - returns a percentage of random sequence identifiers 
63         for one or more genomes
64     """
65     setResults = {}
66     if percentage < 0 or percentage > 1:
67         return setResults
68
69     for genome in genomeList:
70         (org, entries) = getGenomeEntries(genome)
71         if len(entries) > 0:
72             number = int(round(len(entries) * percentage))
73             random.shuffle(entries)
74             setResults[genome] = entries[:number]
75         else:
76             setResults[genome] = []
77
78     return setResults