12 from htsworkflow.pipelines.runfolder import ElementTree, LANE_LIST
13 from htsworkflow.util.ethelp import indent, flatten
14 from htsworkflow.util.opener import autoopen
16 LOGGER = logging.getLogger(__name__)
18 SAMPLE_NAME = 'SampleName'
23 GENOME_MAP = 'GenomeMap'
24 GENOME_ITEM = 'GenomeItem'
25 MAPPED_READS = 'MappedReads'
26 MAPPED_ITEM = 'MappedItem'
27 MATCH_CODES = 'MatchCodes'
36 class ResultLane(object):
38 Base class for result lanes
43 def __init__(self, pathnames=None, lane_id=None, end=None, xml=None):
44 self.pathnames = pathnames
45 self._sample_name = None
46 self.lane_id = lane_id
51 self.set_elements(xml)
55 Actually read the file and actually count the reads
59 def _update_name(self):
60 # extract the sample name
61 if self.pathnames is None or len(self.pathnames) == 0:
65 for pathname in self.pathnames:
66 path, name = os.path.split(pathname)
67 split_name = name.split('_')
68 sample_names.add(split_name[0])
69 if len(sample_names) > 1:
70 errmsg = "Attempting to update from more than one sample %s"
71 raise RuntimeError(errmsg % (",".join(sample_names)))
72 self._sample_name = sample_names.pop()
73 return self._sample_name
75 def _get_sample_name(self):
76 if self._sample_name is None:
78 return self._sample_name
79 sample_name = property(_get_sample_name)
82 if self._reads is None:
85 reads = property(_get_reads)
87 def get_elements(self):
90 class ElandLane(ResultLane):
92 Process an eland result file
96 MATCH_COUNTS_RE = re.compile("([\d]+):([\d]+):([\d]+)")
97 DESCRIPTOR_MISMATCH_RE = re.compile("[AGCT]")
98 DESCRIPTOR_INDEL_RE = re.compile("^[\dAGCT]$")
99 SCORE_UNRECOGNIZED = 0
103 def __init__(self, pathnames=None, lane_id=None, end=None, genome_map=None, eland_type=None, xml=None):
104 super(ElandLane, self).__init__(pathnames, lane_id, end)
106 self._mapped_reads = None
107 self._match_codes = None
108 if genome_map is None:
110 self.genome_map = genome_map
111 self.eland_type = None
114 self.set_elements(xml)
116 def _guess_eland_type(self, pathname):
117 if self.eland_type is None:
118 # attempt autodetect eland file type
119 pathn, name = os.path.split(pathname)
120 if re.search('result', name):
121 self.eland_type = ELAND_SINGLE
122 elif re.search('multi', name):
123 self.eland_type = ELAND_MULTI
124 elif re.search('extended', name):
125 self.eland_type = ELAND_EXTENDED
126 elif re.search('export', name):
127 self.eland_type = ELAND_EXPORT
129 self.eland_type = ELAND_SINGLE
133 Actually read the file and actually count the reads
135 # can't do anything if we don't have a file to process
136 if self.pathnames is None or len(self.pathnames) == 0:
138 pathname = self.pathnames[-1]
139 self._guess_eland_type(pathname)
141 if os.stat(pathname)[stat.ST_SIZE] == 0:
142 raise RuntimeError("Eland isn't done, try again later.")
144 LOGGER.debug("summarizing results for %s" % (pathname))
145 self._match_codes = MatchCodes()
146 self._mapped_reads = MappedReads()
149 for pathname in self.pathnames:
150 stream = autoopen(pathname, 'r')
151 if self.eland_type == ELAND_SINGLE:
152 result = self._update_eland_result(stream)
153 elif self.eland_type == ELAND_MULTI or \
154 self.eland_type == ELAND_EXTENDED:
155 result = self._update_eland_multi(stream)
156 elif self.eland_type == ELAND_EXPORT:
157 result = self._update_eland_export(stream)
159 errmsg = "Only support single/multi/extended eland files"
160 raise NotImplementedError(errmsg)
163 match, mapped, reads = result
164 self._match_codes += match
165 self._mapped_reads += mapped
168 def _update_eland_result(self, instream):
170 mapped_reads = MappedReads()
171 match_codes = MatchCodes()
173 for line in instream:
175 fields = line.split()
177 # match_codes[code] = match_codes.setdefault(code, 0) + 1
178 # the QC/NM etc codes are in the 3rd field and always present
179 match_codes[fields[2]] += 1
180 # ignore lines that don't have a fasta filename
183 fasta = self.genome_map.get(fields[6], fields[6])
184 mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
185 return match_codes, mapped_reads, reads
187 def _update_eland_multi(self, instream):
188 """Summarize an eland_extend."""
192 mapped_reads = MappedReads()
193 match_codes = MatchCodes()
195 for line in instream:
197 fields = line.split()
198 # fields[2] = QC/NM/or number of matches
199 score_type = self._score_mapped_mismatches(fields[MATCH_INDEX],
201 if score_type == ElandLane.SCORE_READ:
202 # when there are too many hits, eland writes a - where
203 # it would have put the list of hits.
204 # or in a different version of eland, it just leaves
205 # that column blank, and only outputs 3 fields.
206 if len(fields) < 4 or fields[LOCATION_INDEX] == '-':
209 self._count_mapped_multireads(mapped_reads, fields[LOCATION_INDEX])
211 return match_codes, mapped_reads, reads
213 def _update_eland_export(self, instream):
214 """Summarize a gerald export file."""
219 mapped_reads = MappedReads()
220 match_codes = MatchCodes()
222 for line in instream:
224 fields = line.split()
225 # fields[2] = QC/NM/or number of matches
226 score_type = self._score_mapped_mismatches(fields[MATCH_INDEX],
228 if score_type == ElandLane.SCORE_UNRECOGNIZED:
229 # export files have three states for the match field
230 # QC code, count of multi-reads, or a single
231 # read location. The score_mapped_mismatches function
232 # only understands the first two types.
233 # if we get unrecognized, that implies the field is probably
235 code = self._count_mapped_export(mapped_reads,
236 fields[LOCATION_INDEX],
237 fields[DESCRIPTOR_INDEX])
238 match_codes[code] += 1
240 return match_codes, mapped_reads, reads
243 def _score_mapped_mismatches(self, match, match_codes):
244 """Update match_codes with eland map counts, or failure code.
246 Returns True if the read mapped, false if it was an error code.
248 groups = ElandLane.MATCH_COUNTS_RE.match(match)
250 # match is not of the form [\d]+:[\d]+:[\d]+
251 if match in match_codes:
252 # match is one quality control codes QC/NM etc
253 match_codes[match] += 1
254 return ElandLane.SCORE_QC
256 return ElandLane.SCORE_UNRECOGNIZED
258 # match is of the form [\d]+:[\d]+:[\d]+
260 zero_mismatches = int(groups.group(1))
261 one_mismatches = int(groups.group(2))
262 two_mismatches = int(groups.group(3))
264 if zero_mismatches == 1:
265 match_codes['U0'] += 1
266 elif zero_mismatches < 255:
267 match_codes['R0'] += zero_mismatches
269 if one_mismatches == 1:
270 match_codes['U1'] += 1
271 elif one_mismatches < 255:
272 match_codes['R1'] += one_mismatches
274 if two_mismatches == 1:
275 match_codes['U2'] += 1
276 elif two_mismatches < 255:
277 match_codes['R2'] += two_mismatches
279 return ElandLane.SCORE_READ
282 def _count_mapped_multireads(self, mapped_reads, match_string):
284 for match in match_string.split(','):
285 match_fragment = match.split(':')
286 if len(match_fragment) == 2:
287 chromo = match_fragment[0]
288 pos = match_fragment[1]
290 fasta = self.genome_map.get(chromo, chromo)
291 assert fasta is not None
292 mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
295 def _count_mapped_export(self, mapped_reads, match_string, descriptor):
296 """Count a read as defined in an export file
298 match_string contains the chromosome
299 descriptor contains the an ecoding of bases that match, mismatch,
301 returns the "best" match code
303 Currently "best" match code is ignoring the possibility of in-dels
305 chromo = match_string
306 fasta = self.genome_map.get(chromo, chromo)
307 assert fasta is not None
308 mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
310 mismatch_bases = ElandLane.DESCRIPTOR_MISMATCH_RE.findall(descriptor)
311 if len(mismatch_bases) == 0:
313 elif len(mismatch_bases) == 1:
319 def _get_mapped_reads(self):
320 if self._mapped_reads is None:
322 return self._mapped_reads
323 mapped_reads = property(_get_mapped_reads)
325 def _get_match_codes(self):
326 if self._match_codes is None:
328 return self._match_codes
329 match_codes = property(_get_match_codes)
331 def _get_no_match(self):
332 if self._mapped_reads is None:
334 return self._match_codes['NM']
335 no_match = property(_get_no_match,
336 doc="total reads that didn't match the target genome.")
338 def _get_no_match_percent(self):
339 return float(self.no_match)/self.reads * 100
340 no_match_percent = property(_get_no_match_percent,
341 doc="no match reads as percent of total")
343 def _get_qc_failed(self):
344 if self._mapped_reads is None:
346 return self._match_codes['QC']
347 qc_failed = property(_get_qc_failed,
348 doc="total reads that didn't match the target genome.")
350 def _get_qc_failed_percent(self):
351 return float(self.qc_failed)/self.reads * 100
352 qc_failed_percent = property(_get_qc_failed_percent,
353 doc="QC failed reads as percent of total")
355 def _get_unique_reads(self):
356 if self._mapped_reads is None:
359 for code in ['U0','U1','U2']:
360 sum += self._match_codes[code]
362 unique_reads = property(_get_unique_reads,
363 doc="total unique reads")
365 def _get_repeat_reads(self):
366 if self._mapped_reads is None:
369 for code in ['R0','R1','R2']:
370 sum += self._match_codes[code]
372 repeat_reads = property(_get_repeat_reads,
373 doc="total repeat reads")
375 def get_elements(self):
376 lane = ElementTree.Element(ElandLane.LANE,
378 unicode(ElandLane.XML_VERSION)})
379 sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
380 sample_tag.text = self.sample_name
381 lane_tag = ElementTree.SubElement(lane, LANE_ID)
382 lane_tag.text = str(self.lane_id)
383 if self.end is not None:
384 end_tag = ElementTree.SubElement(lane, END)
385 end_tag.text = str(self.end)
386 genome_map = ElementTree.SubElement(lane, GENOME_MAP)
387 for k, v in self.genome_map.items():
388 item = ElementTree.SubElement(
389 genome_map, GENOME_ITEM,
390 {'name':k, 'value':unicode(v)})
391 mapped_reads = ElementTree.SubElement(lane, MAPPED_READS)
392 for k, v in self.mapped_reads.items():
393 item = ElementTree.SubElement(
394 mapped_reads, MAPPED_ITEM,
395 {'name':k, 'value':unicode(v)})
396 match_codes = ElementTree.SubElement(lane, MATCH_CODES)
397 for k, v in self.match_codes.items():
398 item = ElementTree.SubElement(
399 match_codes, MATCH_ITEM,
400 {'name':k, 'value':unicode(v)})
401 reads = ElementTree.SubElement(lane, READS)
402 reads.text = unicode(self.reads)
406 def set_elements(self, tree):
407 if tree.tag != ElandLane.LANE:
408 raise ValueError('Exptecting %s' % (ElandLane.LANE,))
411 self._mapped_reads = {}
412 self._match_codes = {}
415 tag = element.tag.lower()
416 if tag == SAMPLE_NAME.lower():
417 self._sample_name = element.text
418 elif tag == LANE_ID.lower():
419 self.lane_id = int(element.text)
420 elif tag == END.lower():
421 self.end = int(element.text)
422 elif tag == GENOME_MAP.lower():
423 for child in element:
424 name = child.attrib['name']
425 value = child.attrib['value']
426 self.genome_map[name] = value
427 elif tag == MAPPED_READS.lower():
428 for child in element:
429 name = child.attrib['name']
430 value = child.attrib['value']
431 self._mapped_reads[name] = int(value)
432 elif tag == MATCH_CODES.lower():
433 for child in element:
434 name = child.attrib['name']
435 value = int(child.attrib['value'])
436 self._match_codes[name] = value
437 elif tag == READS.lower():
438 self._reads = int(element.text)
440 LOGGER.warn("ElandLane unrecognized tag %s" % (element.tag,))
443 class MatchCodes(collections.MutableMapping):
444 """Mapping to hold match counts -
445 supports combining two match count sets together
447 def __init__(self, initializer=None):
448 self.match_codes = {'NM':0, 'QC':0, 'RM':0,
449 'U0':0, 'U1':0, 'U2':0,
450 'R0':0, 'R1':0, 'R2':0,
453 if initializer is not None:
454 if not isinstance(initializer, collections.Mapping):
455 raise ValueError("Expected dictionary like class")
456 for key in initializer:
457 if key not in self.match_codes:
458 errmsg = "Initializer can only contain: %s"
459 raise ValueError(errmsg % (",".join(self.match_codes.keys())))
460 self.match_codes[key] += initializer[key]
463 return iter(self.match_codes)
465 def __delitem__(self, key):
466 raise RuntimeError("delete not allowed")
468 def __getitem__(self, key):
469 return self.match_codes[key]
471 def __setitem__(self, key, value):
472 if key not in self.match_codes:
473 errmsg = "Unrecognized key, allowed values are: %s"
474 raise ValueError(errmsg % (",".join(self.match_codes.keys())))
475 self.match_codes[key] = value
478 return len(self.match_codes)
480 def __add__(self, other):
481 if not isinstance(other, MatchCodes):
482 raise ValueError("Expected a MatchCodes, got %s", str(type(other)))
484 newobj = MatchCodes(self)
485 for key, value in other.items():
486 newobj[key] = self.get(key, 0) + other[key]
491 class MappedReads(collections.MutableMapping):
492 """Mapping to hold mapped reads -
493 supports combining two mapped read sets together
495 def __init__(self, initializer=None):
496 self.mapped_reads = {}
498 if initializer is not None:
499 if not isinstance(initializer, collections.Mapping):
500 raise ValueError("Expected dictionary like class")
501 for key in initializer:
502 self[key] = self.setdefault(key, 0) + initializer[key]
505 return iter(self.mapped_reads)
507 def __delitem__(self, key):
508 del self.mapped_reads[key]
510 def __getitem__(self, key):
511 return self.mapped_reads[key]
513 def __setitem__(self, key, value):
514 self.mapped_reads[key] = value
517 return len(self.mapped_reads)
519 def __add__(self, other):
520 if not isinstance(other, MappedReads):
521 raise ValueError("Expected a MappedReads, got %s", str(type(other)))
523 newobj = MappedReads(self)
525 newobj[key] = self.get(key, 0) + other[key]
529 class SequenceLane(ResultLane):
531 LANE = 'SequenceLane'
532 SEQUENCE_TYPE = 'SequenceType'
537 SEQUENCE_DESCRIPTION = { NONE_TYPE: 'None', SCARF_TYPE: 'SCARF', FASTQ_TYPE: 'FASTQ' }
539 def __init__(self, pathname=None, lane_id=None, end=None, xml=None):
540 self.sequence_type = None
541 super(SequenceLane, self).__init__(pathname, lane_id, end, xml)
543 def _guess_sequence_type(self, pathname):
545 Determine if we have a scarf or fastq sequence file
547 f = open(pathname,'r')
552 # fastq starts with a @
553 self.sequence_type = SequenceLane.FASTQ_TYPE
555 self.sequence_type = SequenceLane.SCARF_TYPE
556 return self.sequence_type
560 Actually read the file and actually count the reads
562 # can't do anything if we don't have a file to process
563 if self.pathnames is None:
566 pathname = self.pathnames[-1]
567 if os.stat(pathname)[stat.ST_SIZE] == 0:
568 raise RuntimeError("Sequencing isn't done, try again later.")
570 self._guess_sequence_type(pathname)
572 LOGGER.info("summarizing results for %s" % (pathname))
575 for l in f.xreadlines():
579 if self.sequence_type == SequenceLane.SCARF_TYPE:
581 elif self.sequence_type == SequenceLane.FASTQ_TYPE:
582 self._reads = lines / 4
584 errmsg = "This only supports scarf or fastq squence files"
585 raise NotImplementedError(errmsg)
587 def get_elements(self):
588 lane = ElementTree.Element(SequenceLane.LANE,
590 unicode(SequenceLane.XML_VERSION)})
591 sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
592 sample_tag.text = self.sample_name
593 lane_tag = ElementTree.SubElement(lane, LANE_ID)
594 lane_tag.text = str(self.lane_id)
595 if self.end is not None:
596 end_tag = ElementTree.SubElement(lane, END)
597 end_tag.text = str(self.end)
598 reads = ElementTree.SubElement(lane, READS)
599 reads.text = unicode(self.reads)
600 sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE)
601 sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
605 def set_elements(self, tree):
606 if tree.tag != SequenceLane.LANE:
607 raise ValueError('Exptecting %s' % (SequenceLane.LANE,))
608 lookup_sequence_type = dict([ (v,k) for k,v in SequenceLane.SEQUENCE_DESCRIPTION.items()])
611 tag = element.tag.lower()
612 if tag == SAMPLE_NAME.lower():
613 self._sample_name = element.text
614 elif tag == LANE_ID.lower():
615 self.lane_id = int(element.text)
616 elif tag == END.lower():
617 self.end = int(element.text)
618 elif tag == READS.lower():
619 self._reads = int(element.text)
620 elif tag == SequenceLane.SEQUENCE_TYPE.lower():
621 self.sequence_type = lookup_sequence_type.get(element.text, None)
623 LOGGER.warn("SequenceLane unrecognized tag %s" % (element.tag,))
627 Summarize information from eland files
631 ELAND = 'ElandCollection'
636 def __init__(self, xml=None):
637 # we need information from the gerald config.xml
638 self.results = [{},{}]
641 self.set_elements(xml)
643 if len(self.results[0]) == 0:
644 # Initialize our eland object with meaningless junk
646 self.results[0][l] = ResultLane(lane_id=l, end=0)
649 def get_elements(self):
650 root = ElementTree.Element(ELAND.ELAND,
651 {'version': unicode(ELAND.XML_VERSION)})
652 for end in range(len(self.results)):
653 end_results = self.results[end]
654 for lane_id, lane in end_results.items():
655 eland_lane = lane.get_elements()
656 if eland_lane is not None:
657 eland_lane.attrib[ELAND.END] = unicode (end)
658 eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
659 root.append(eland_lane)
662 def set_elements(self, tree):
663 if tree.tag.lower() != ELAND.ELAND.lower():
664 raise ValueError('Expecting %s', ELAND.ELAND)
665 for element in list(tree):
666 lane_id = int(element.attrib[ELAND.LANE_ID])
667 end = int(element.attrib.get(ELAND.END, 0))
668 if element.tag.lower() == ElandLane.LANE.lower():
669 lane = ElandLane(xml=element)
670 elif element.tag.lower() == SequenceLane.LANE.lower():
671 lane = SequenceLane(xml=element)
673 self.results[end][lane_id] = lane
675 def check_for_eland_file(basedir, pattern, lane_id, end):
677 eland_pattern = pattern % (lane_id, end)
678 eland_re = re.compile(eland_pattern)
679 #LOGGER.debug("Eland pattern: %s" %(eland_pattern,))
680 for filename in os.listdir(basedir):
681 if eland_re.match(filename):
682 LOGGER.info('found eland file %s' % (filename,))
683 eland_files.append(os.path.join(basedir, filename))
687 def update_result_with_eland(gerald, results, lane_id, end, pathnames, genome_maps):
688 # yes the lane_id is also being computed in ElandLane._update
689 # I didn't want to clutter up my constructor
690 # but I needed to persist the sample_name/lane_id for
691 # runfolder summary_report
692 names = [ os.path.split(p)[1] for p in pathnames]
693 LOGGER.info("Adding eland files %s" %(",".join(names),))
696 if genome_maps is not None:
697 genome_map = genome_maps[lane_id]
698 elif gerald is not None:
699 genome_dir = gerald.lanes[lane_id].eland_genome
700 if genome_dir is not None:
701 genome_map = build_genome_fasta_map(genome_dir)
703 lane = ElandLane(pathnames, lane_id, end, genome_map)
708 effective_end = end - 1
710 results[effective_end][lane_id] = lane
712 def update_result_with_sequence(gerald, results, lane_id, end, pathname):
713 result = SequenceLane(pathname, lane_id, end)
718 effective_end = end - 1
720 results[effective_end][lane_id] = result
723 def eland(gerald_dir, gerald=None, genome_maps=None):
726 lane_ids = range(1,9)
729 basedirs = [gerald_dir]
731 # if there is a basedir/Temp change basedir to point to the temp
732 # directory, as 1.1rc1 moves most of the files we've historically
733 # cared about to that subdirectory.
734 # we should look into what the official 'result' files are.
735 # and 1.3 moves them back
736 basedir_temp = os.path.join(gerald_dir, 'Temp')
737 if os.path.isdir(basedir_temp):
738 basedirs.append(basedir_temp)
740 # So how about scanning for Project*/Sample* directories as well
741 sample_pattern = os.path.join(gerald_dir, 'Project_*', 'Sample_*')
742 basedirs.extend(glob(sample_pattern))
744 # the order in patterns determines the preference for what
749 ('(?P<sampleId>[^_]+)_(?P<index>(NoIndex|[AGCT])+)_L00%s(_R%s)_(?P<part>[\d]+)_export.txt(?P<ext>(\.bz2|\.gz)?)', MAPPED_ELAND),
750 ('s_(?P<lane>%s)(_(?P<end>%s))?_eland_result.txt(?P<ext>(\.bz2|\.gz)?)',
752 ('s_(?P<lane>%s)(_(?P<end>%s))?_eland_extended.txt(?P<ext>(\.bz2|\.gz)?)',
754 ('s_(?P<lane>%s)(_(?P<end>%s))?_eland_multi.txt(?P<ext>(\.bz2|\.gz)?)',
756 ('s_(?P<lane>%s)(_(?P<end>%s))?_export.txt(?P<ext>(\.bz2|\.gz)?)',
758 ('s_(?P<lane>%s)(_(?P<end>%s))?_sequence.txt(?P<ext>(\.bz2|\.gz)?)',
761 #('s_%s_eland_result.txt', MAPPED_ELAND),
762 #('s_%s_eland_result.txt.bz2', MAPPED_ELAND),
763 #('s_%s_eland_result.txt.gz', MAPPED_ELAND),
764 #('s_%s_eland_extended.txt', MAPPED_ELAND),
765 #('s_%s_eland_extended.txt.bz2', MAPPED_ELAND),
766 #('s_%s_eland_extended.txt.gz', MAPPED_ELAND),
767 #('s_%s_eland_multi.txt', MAPPED_ELAND),
768 #('s_%s_eland_multi.txt.bz2', MAPPED_ELAND),
769 #('s_%s_eland_multi.txt.gz', MAPPED_ELAND),
770 #('s_%s_export.txt', MAPPED_ELAND),
771 #('s_%s_export.txt.bz2', MAPPED_ELAND),
772 #('s_%s_export.txt.gz', MAPPED_ELAND),
773 #('s_%s_sequence.txt', SEQUENCE),
776 for basedir in basedirs:
778 for lane_id in lane_ids:
780 pathnames = check_for_eland_file(basedir, p[0], lane_id, end)
781 if len(pathnames) > 0:
782 if p[1] == MAPPED_ELAND:
783 update_result_with_eland(gerald, e.results, lane_id, end, pathnames, genome_maps)
784 elif p[1] == SEQUENCE:
785 update_result_with_sequence(gerald, e.results, lane_id, end, pathnames)
788 LOGGER.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end))
793 def build_genome_fasta_map(genome_dir):
794 # build fasta to fasta file map
795 LOGGER.info("Building genome map")
796 genome = genome_dir.split(os.path.sep)[-1]
798 for vld_file in glob(os.path.join(genome_dir, '*.vld')):
800 if os.path.islink(vld_file):
802 vld_file = os.path.realpath(vld_file)
803 path, vld_name = os.path.split(vld_file)
804 name, ext = os.path.splitext(vld_name)
806 fasta_map[name] = name
808 fasta_map[name] = os.path.join(genome, name)
812 def extract_eland_sequence(instream, outstream, start, end):
814 Extract a chunk of sequence out of an eland file
816 for line in instream:
817 record = line.split()
819 result = [record[0], record[1][start:end]]
821 result = [record[0][start:end]]
822 outstream.write("\t".join(result))
823 outstream.write(os.linesep)
826 def main(cmdline=None):
827 """Run eland extraction against the specified gerald directory"""
828 from optparse import OptionParser
829 parser = OptionParser("%prog: <gerald dir>+")
830 opts, args = parser.parse_args(cmdline)
831 logging.basicConfig(level=logging.DEBUG)
833 LOGGER.info("Starting scan of %s" % (a,))
835 print e.get_elements()
840 if __name__ == "__main__":