From e5101a9d6759a988099e9fa25e846c035e555ebd Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@caltech.edu>
Date: Mon, 22 Dec 2008 20:44:15 +0000
Subject: [PATCH] fix the multi-eland parser to strip off extensions and not
 the last 3 characters of the filename.

---
 htsworkflow/util/makebed.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/htsworkflow/util/makebed.py b/htsworkflow/util/makebed.py
index 4f2b17f..eeb4e88 100755
--- a/htsworkflow/util/makebed.py
+++ b/htsworkflow/util/makebed.py
@@ -30,18 +30,18 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom
   SENSE = 8
 
   write_bed_header(outstream, name, description)
+  prefix_len = len(chromosome_prefix)
 
   for line in instream:
     fields = line.split()
     # we need more than the CHR field, and it needs to match a chromosome
-    if len(fields) <= CHR or \
-          (chromosome_prefix is not None and \
-             fields[CHR][:3] != chromosome_prefix):
+    if len(fields) <= CHR or fields[CHR][:prefix_len] != chromosome_prefix:
       continue
     start = fields[START]
     stop = int(start) + len(fields[SEQ])
-    chromosome, extension = fields[CHR].split('.')
-    assert extension == "fa"
+    # strip off filename extension
+    chromosome = fields[CHR].split('.')[0]
+
     outstream.write('%s %s %d read 0 %s - - %s%s' % (
       chromosome,
       start,
@@ -85,7 +85,9 @@ def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
 
       for token in split_re.finditer(compressed_reads):
         if token.group('chr') is not None:
-          cur_chr =  token.group('chr')[:-3] # strip off .fa
+          cur_chr =  token.group('chr')
+	  # strip off extension if present
+	  cur_chr = os.path.splitext(cur_chr)[0] 
         elif token.group('fullloc') is not None:
           matches = int(token.group('count'))
           # only emit a bed line if 
-- 
2.30.2