Initial port to python3
[htsworkflow.git] / htsworkflow / frontend / reports / utils.py
1 def unique(s):
2   """Return a list of the elements in s, but without duplicates.                                                                                                                                                                                             
3   For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],                                                                                                                                                                                         
4   unique("abcabc") some permutation of ["a", "b", "c"], and                                                                                                                                                                                                  
5   unique(([1, 2], [2, 3], [1, 2])) some permutation of                                                                                                                                                                                                       
6   [[2, 3], [1, 2]].                                                                                                                                                                                                                                          
7   For best speed, all sequence elements should be hashable.  Then                                                                                                                                                                                            
8   unique() will usually work in linear time.                                                                                                                                                                                                                 
9   If not possible, the sequence elements should enjoy a total                                                                                                                                                                                                
10   ordering, and if list(s).sort() doesn't raise TypeError it's                                                                                                                                                                                               
11   assumed that they do enjoy a total ordering.  Then unique() will                                                                                                                                                                                           
12   usually work in O(N*log2(N)) time.                                                                                                                                                                                                                         
13   If that's not possible either, the sequence elements must support                                                                                                                                                                                          
14   equality-testing.  Then unique() will usually work in quadratic                                                                                                                                                                                            
15   time.                                                                                                                                                                                                                                                      
16   """
17
18   n = len(s)
19   if n == 0:
20       return []
21
22   # Try using a dict first, as that's the fastest and will usually                                                                                                                                                                                           
23   # work.  If it doesn't work, it will usually fail quickly, so it                                                                                                                                                                                           
24   # usually doesn't cost much to *try* it.  It requires that all the                                                                                                                                                                                         
25   # sequence elements be hashable, and support equality comparison.                                                                                                                                                                                          
26   u = {}
27   try:
28       for x in s:
29           u[x] = 1
30   except TypeError:
31       del u  # move on to the next method                                                                                                                                                                                                                    
32   else:
33       return list(u.keys())
34   # We can't hash all the elements.  Second fastest is to sort,                                                                                                                                                                                              
35   # which brings the equal elements together; then duplicates are                                                                                                                                                                                            
36   # easy to weed out in a single pass.                                                                                                                                                                                                                       
37   # NOTE:  Python's list.sort() was designed to be efficient in the                                                                                                                                                                                          
38   # presence of many duplicate elements.  This isn't true of all                                                                                                                                                                                             
39   # sort functions in all languages or libraries, so this approach                                                                                                                                                                                           
40   # is more effective in Python than it may be elsewhere.                                                                                                                                                                                                    
41   try:
42       t = list(s)
43       t.sort()
44   except TypeError:
45       del t  # move on to the next method                                                                                                                                                                                                                    
46   else:
47       assert n > 0
48       last = t[0]
49       lasti = i = 1
50       while i < n:
51           if t[i] != last:
52               t[lasti] = last = t[i]
53               lasti += 1
54           i += 1
55       return t[:lasti]
56   # Brute force is all that's left.                                                                                                                                                                                                                          
57   u = []
58   for x in s:
59       if x not in u:
60           u.append(x)
61   return u