X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fexperiments%2Frandomset.py;fp=cistematic%2Fexperiments%2Frandomset.py;h=a71e551b2ab3f2b7fe45f1dedc8e8a6051f56006;hp=0000000000000000000000000000000000000000;hb=bc30aca13e5ec397c92e67002fbf7a103130b828;hpb=0d3e3112fd04c2e6b44a25cacef1d591658ad181 diff --git a/cistematic/experiments/randomset.py b/cistematic/experiments/randomset.py new file mode 100644 index 0000000..a71e551 --- /dev/null +++ b/cistematic/experiments/randomset.py @@ -0,0 +1,78 @@ +########################################################################### +# # +# C O P Y R I G H T N O T I C E # +# Copyright (c) 2003-10 by: # +# * California Institute of Technology # +# # +# All Rights Reserved. # +# # +# Permission is hereby granted, free of charge, to any person # +# obtaining a copy of this software and associated documentation files # +# (the "Software"), to deal in the Software without restriction, # +# including without limitation the rights to use, copy, modify, merge, # +# publish, distribute, sublicense, and/or sell copies of the Software, # +# and to permit persons to whom the Software is furnished to do so, # +# subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be # +# included in all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +########################################################################### +# +from cistematic.core import getGenomeEntries +import random + + +def randomEntry(genome): + """ randomEntry() - pick an entry + """ + (org, entries) = getGenomeEntries(genome) + if len(entries) > 0: + entry = random.choice(entries) + else: + entry = "" + + return (org, entry) + + +def randomSet(genomeList, number): + """ randomSet() - returns a number of random sequence identifiers for one or more genomes + """ + setResults = {} + for genome in genomeList: + (org, entries) = getGenomeEntries(genome) + if len(entries) > 0: + random.shuffle(entries) + setResults[genome] = entries[:number] + else: + setResults[genome] = [] + + return setResults + + +def randomSetPercentage(genomeList, percentage): + """ randomSetPercentage() - returns a percentage of random sequence identifiers + for one or more genomes + """ + setResults = {} + if percentage < 0 or percentage > 1: + return setResults + + for genome in genomeList: + (org, entries) = getGenomeEntries(genome) + if len(entries) > 0: + number = int(round(len(entries) * percentage)) + random.shuffle(entries) + setResults[genome] = entries[:number] + else: + setResults[genome] = [] + + return setResults \ No newline at end of file