-#
-# The Alphanum Algorithm is an improved sorting algorithm for strings
-# containing numbers. Instead of sorting numbers in ASCII order like
-# a standard sort, this algorithm sorts numbers in numeric order.
-#
-# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
-#
-#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)
-#* Distributed under same license as original
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-#
+# from http://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort
+# modified by Diane Trout
import re
-import types
-#
-# TODO: Make decimal points be considered in the same class as digits
-#
-
-def chunkify(str):
- """
- return a list of numbers and non-numeric substrings of +str+
- the numeric substrings are converted to integer, non-numeric are left as is
- """
- if type(str) in types.StringTypes:
- chunks = re.findall("(\d+|\D+)",str)
- #convert numeric strings to numbers
- chunks = [re.match('\d',x) and int(x) or x for x in chunks]
- return chunks
- elif type(str) in [types.IntType, types.LongType, types.FloatType]:
- return [str]
+def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
+ if isinstance(s, type("")) or isinstance(s, type(u"")):
+ return [int(text) if text.isdigit() else text.lower()
+ for text in re.split(_nsre, s)]
+ elif isinstance(s, int):
+ return [s]
else:
- raise ValueError("Unsupported type %s for input %s" % (type(str), str))
-
-def alphanum(a,b):
- """
- breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces
-
- +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters
- Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives
- Numeric sorting, comparison of non-numeric gives Lexicographic order
- """
- # split strings into chunks
- aChunks = chunkify(a)
- bChunks = chunkify(b)
-
- return cmp(aChunks,bChunks) #built in comparison works once data is prepared
+ raise ValueError("Unsupported type %s for input %s" % (type(s), s))
import os
from unittest import TestCase
-from htsworkflow.util.alphanum import alphanum
+from htsworkflow.util.alphanum import natural_sort_key
class testAlphanum(TestCase):
def test_string(self):
unsorted = ['z5', 'b3', 'b10', 'a001', 'a2']
sorted = [ 'a001', 'a2', 'b3', 'b10', 'z5']
scratch = copy.copy(unsorted)
- scratch.sort(alphanum)
+ scratch.sort(key=natural_sort_key)
- for i in xrange(len(scratch)):
- self.failIfEqual(scratch[i], unsorted[i])
- for i in xrange(len(scratch)):
- self.failUnlessEqual(scratch[i], sorted[i])
+ for i, s in enumerate(scratch):
+ self.failIfEqual(s, unsorted[i])
+ for i, s in enumerate(scratch):
+ self.failUnlessEqual(s, sorted[i])
def test_numbers(self):
unsorted = [5,7,10,18,-1,3]
sorted = [-1,3,5,7,10,18]
scratch = copy.copy(unsorted)
- scratch.sort(alphanum)
+ scratch.sort(key=natural_sort_key)
- for i in xrange(len(scratch)):
- self.failIfEqual(scratch[i], unsorted[i])
- for i in xrange(len(scratch)):
- self.failUnlessEqual(scratch[i], sorted[i])
+ for i, s in enumerate(scratch):
+ self.failIfEqual(s, unsorted[i])
+ for i, s in enumerate(scratch):
+ self.failUnlessEqual(s, sorted[i])
def test_long_names(self):
unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]
expected = ['10X Radonius', '20X Radonius', '20X Radonius Prime', '30X Radonius', '40X Radonius', '200X Radonius', '1000X Radonius Maximus', 'Allegia 50 Clasteron', 'Allegia 51 Clasteron', 'Allegia 51B Clasteron', 'Allegia 52 Clasteron', 'Allegia 60 Clasteron', 'Allegia 500 Clasteron', 'Alpha 2', 'Alpha 2A', 'Alpha 2A-900', 'Alpha 2A-8000', 'Alpha 100', 'Alpha 200', 'Callisto Morphamax', 'Callisto Morphamax 500', 'Callisto Morphamax 600', 'Callisto Morphamax 700', 'Callisto Morphamax 5000', 'Callisto Morphamax 7000', 'Callisto Morphamax 7000 SE', 'Callisto Morphamax 7000 SE2', 'QRS-60 Intrinsia Machine', 'QRS-60F Intrinsia Machine', 'QRS-62 Intrinsia Machine', 'QRS-62F Intrinsia Machine', 'Xiph Xlater 5', 'Xiph Xlater 40', 'Xiph Xlater 50', 'Xiph Xlater 58', 'Xiph Xlater 300', 'Xiph Xlater 500', 'Xiph Xlater 2000', 'Xiph Xlater 5000', 'Xiph Xlater 10000']
s = unsorted[:]
- s.sort(alphanum)
+ s.sort(key=natural_sort_key)
self.failUnlessEqual(s, expected)
def test_bad_input(self):
unsorted = [object(), (1,3j)]
s = unsorted[:]
- self.failUnlessRaises(ValueError, s.sort, alphanum)
+ self.failUnlessRaises(ValueError, s.sort, key=natural_sort_key)
def suite():