import sys
import types
-from htsworkflow.pipelines.runfolder import ElementTree, LANE_LIST
+from htsworkflow.pipelines import ElementTree, LANE_LIST
from htsworkflow.pipelines.samplekey import SampleKey
+from htsworkflow.pipelines.genomemap import GenomeMap
from htsworkflow.util.ethelp import indent, flatten
from htsworkflow.util.opener import autoopen
self._mapped_reads = None
self._match_codes = None
self._reads = None
- if genome_map is None:
- genome_map = {}
- self.genome_map = genome_map
+ self.genome_map = GenomeMap(genome_map)
self.eland_type = None
if xml is not None:
def get_elements(self):
lane = ElementTree.Element(ElandLane.LANE,
{'version':
- unicode(ElandLane.XML_VERSION)})
+ str(ElandLane.XML_VERSION)})
sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
sample_tag.text = self.sample_name
lane_tag = ElementTree.SubElement(lane, LANE_ID)
end_tag = ElementTree.SubElement(lane, END)
end_tag.text = str(self.end)
genome_map = ElementTree.SubElement(lane, GENOME_MAP)
- for k, v in self.genome_map.items():
+ for k, v in list(self.genome_map.items()):
item = ElementTree.SubElement(
genome_map, GENOME_ITEM,
- {'name':k, 'value':unicode(v)})
+ {'name':k, 'value':str(v)})
mapped_reads = ElementTree.SubElement(lane, MAPPED_READS)
- for k, v in self.mapped_reads.items():
+ for k, v in list(self.mapped_reads.items()):
item = ElementTree.SubElement(
mapped_reads, MAPPED_ITEM,
- {'name':k, 'value':unicode(v)})
+ {'name':k, 'value':str(v)})
match_codes = ElementTree.SubElement(lane, MATCH_CODES)
- for k, v in self.match_codes.items():
+ for k, v in list(self.match_codes.items()):
item = ElementTree.SubElement(
match_codes, MATCH_ITEM,
- {'name':k, 'value':unicode(v)})
+ {'name':k, 'value':str(v)})
reads = ElementTree.SubElement(lane, READS)
- reads.text = unicode(self.reads)
+ reads.text = str(self.reads)
return lane
for key in initializer:
if key not in self.match_codes:
errmsg = "Initializer can only contain: %s"
- raise ValueError(errmsg % (",".join(self.match_codes.keys())))
+ raise ValueError(errmsg % (",".join(list(self.match_codes.keys()))))
self.match_codes[key] += initializer[key]
def __iter__(self):
def __setitem__(self, key, value):
if key not in self.match_codes:
errmsg = "Unrecognized key, allowed values are: %s"
- raise ValueError(errmsg % (",".join(self.match_codes.keys())))
+ raise ValueError(errmsg % (",".join(list(self.match_codes.keys()))))
self.match_codes[key] = value
def __len__(self):
raise ValueError("Expected a MatchCodes, got %s", str(type(other)))
newobj = MatchCodes(self)
- for key, value in other.items():
+ for key, value in list(other.items()):
newobj[key] = self.get(key, 0) + other[key]
return newobj
LOGGER.info("summarizing results for %s" % (pathname))
lines = 0
f = open(pathname)
- for l in f.xreadlines():
+ for l in f:
lines += 1
f.close()
def get_elements(self):
lane = ElementTree.Element(SequenceLane.LANE,
{'version':
- unicode(SequenceLane.XML_VERSION)})
+ str(SequenceLane.XML_VERSION)})
sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
sample_tag.text = self.sample_name
lane_tag = ElementTree.SubElement(lane, LANE_ID)
end_tag = ElementTree.SubElement(lane, END)
end_tag.text = str(self.end)
reads = ElementTree.SubElement(lane, READS)
- reads.text = unicode(self.reads)
+ reads.text = str(self.reads)
sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE)
- sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
+ sequence_type.text = str(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
return lane
def set_elements(self, tree):
if tree.tag != SequenceLane.LANE:
raise ValueError('Exptecting %s' % (SequenceLane.LANE,))
- lookup_sequence_type = dict([ (v,k) for k,v in SequenceLane.SEQUENCE_DESCRIPTION.items()])
+ lookup_sequence_type = dict([ (v,k) for k,v in list(SequenceLane.SEQUENCE_DESCRIPTION.items())])
for element in tree:
tag = element.tag.lower()
del self.result[key]
def __iter__(self):
- keys = self.results.iterkeys()
+ keys = iter(self.results.keys())
for k in sorted(keys):
yield k
raise ValueError("Key must be a %s" % (str(type(SampleKey))))
if not search.iswild:
yield self[search]
- for key in self.keys():
+ for key in list(self.keys()):
if key.matches(search): yield key
def get_elements(self):
root = ElementTree.Element(ELAND.ELAND,
- {'version': unicode(ELAND.XML_VERSION)})
+ {'version': str(ELAND.XML_VERSION)})
for key in self:
eland_lane = self[key].get_elements()
- eland_lane.attrib[ELAND.END] = unicode(self[key].end-1)
- eland_lane.attrib[ELAND.LANE_ID] = unicode(self[key].lane_id)
- eland_lane.attrib[ELAND.SAMPLE] = unicode(self[key].sample_name)
+ eland_lane.attrib[ELAND.END] = str(self[key].end-1)
+ eland_lane.attrib[ELAND.LANE_ID] = str(self[key].lane_id)
+ eland_lane.attrib[ELAND.SAMPLE] = str(self[key].sample_name)
root.append(eland_lane)
return root
return root
# runfolder summary_report
names = [ os.path.split(p)[1] for p in pathnames]
LOGGER.info("Adding eland files %s" %(",".join(names),))
+ basedir = os.path.split(pathnames[0])[0]
+ gs_template = "{0}_*_L{1:03}_genomesize.xml"
+ genomesize = glob(
+ os.path.join(basedir,
+ gs_template.format(key.sample, key.lane)))
- genome_map = {}
+
+ genome_map = GenomeMap()
if genome_maps is not None:
- genome_map = genome_maps[key.lane]
+ genome_map = GenomeMap(genome_maps[key.lane])
+ elif len(genomesize) > 0:
+ LOGGER.info("Found {0}".format(genomesize))
+ genome_map.parse_genomesize(genomesize[0])
elif gerald is not None:
- genome_dir = gerald.lanes[key.lane].eland_genome
+ genome_dir = gerald.lanes[key].eland_genome
if genome_dir is not None:
- genome_map = build_genome_fasta_map(genome_dir)
+ genome_map.scan_genome_dir(genome_dir)
lane = ElandLane(pathnames, key.sample, key.lane, key.read, genome_map)
if self._part is not None: name.append('P%s' % (self.part,))
return '<ElandMatch(' + "_".join(name) + ')>'
-def build_genome_fasta_map(genome_dir):
- # build fasta to fasta file map
- LOGGER.info("Building genome map")
- genome = genome_dir.split(os.path.sep)[-1]
- fasta_map = {}
- for vld_file in glob(os.path.join(genome_dir, '*.vld')):
- is_link = False
- if os.path.islink(vld_file):
- is_link = True
- vld_file = os.path.realpath(vld_file)
- path, vld_name = os.path.split(vld_file)
- name, ext = os.path.splitext(vld_name)
- if is_link:
- fasta_map[name] = name
- else:
- fasta_map[name] = os.path.join(genome, name)
- return fasta_map
-
def extract_eland_sequence(instream, outstream, start, end):
"""
for a in args:
LOGGER.info("Starting scan of %s" % (a,))
e = eland(a)
- print ElementTree.tostring(e.get_elements())
+ print(ElementTree.tostring(e.get_elements()))
return