From: Diane Trout Date: Wed, 24 Dec 2008 23:39:31 +0000 (+0000) Subject: Handle paired-end eland files. X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~4 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=121e3f07342d1a9889dff2d0f2bacf16e65f0d66 Handle paired-end eland files. This required changing the ELAND class to hold a list of dictionaries from its previous implmentation where it was exporting an internal dictionary of the lanes. I decided to directly show the internal list and to remove the previous dictionary methods to make it more obvious when code was expecting the previous behavior. Also a saved runfolder will now have eland files of the form s__. Internally the end is 0 or 1, I tried to make the display show 1 or 2 for the users benefit though. --- diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index a010e1d..864f8ca 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -16,10 +16,11 @@ class ElandLane(object): """ Process an eland result file """ - XML_VERSION = 1 + XML_VERSION = 2 LANE = 'ElandLane' SAMPLE_NAME = 'SampleName' LANE_ID = 'LaneID' + END = 'End' GENOME_MAP = 'GenomeMap' GENOME_ITEM = 'GenomeItem' MAPPED_READS = 'MappedReads' @@ -33,10 +34,11 @@ class ElandLane(object): ELAND_EXTENDED = 2 ELAND_EXPORT = 3 - def __init__(self, pathname=None, genome_map=None, eland_type=None, xml=None): + def __init__(self, pathname=None, lane_id=None, end=None, genome_map=None, eland_type=None, xml=None): self.pathname = pathname self._sample_name = None - self._lane_id = None + self.lane_id = lane_id + self.end = end self._reads = None self._mapped_reads = None self._match_codes = None @@ -167,7 +169,6 @@ class ElandLane(object): path, name = os.path.split(self.pathname) split_name = name.split('_') self._sample_name = split_name[0] - self._lane_id = int(split_name[1]) def _get_sample_name(self): if self._sample_name is None: @@ -175,12 +176,6 @@ class ElandLane(object): return self._sample_name sample_name = property(_get_sample_name) - def _get_lane_id(self): - if self._lane_id is None: - self._update_name() - return self._lane_id - lane_id = property(_get_lane_id) - def _get_reads(self): if self._reads is None: self._update() @@ -207,6 +202,9 @@ class ElandLane(object): sample_tag.text = self.sample_name lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID) lane_tag.text = str(self.lane_id) + if self.end is not None: + end_tag = ElementTree.SubElement(lane, ElandLane.END) + end_tag.text = str(self.end) genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP) for k, v in self.genome_map.items(): item = ElementTree.SubElement( @@ -240,7 +238,9 @@ class ElandLane(object): if tag == ElandLane.SAMPLE_NAME.lower(): self._sample_name = element.text elif tag == ElandLane.LANE_ID.lower(): - self._lane_id = int(element.text) + self.lane_id = int(element.text) + elif tag == ElandLane.END.lower(): + self.end = int(element.text) elif tag == ElandLane.GENOME_MAP.lower(): for child in element: name = child.attrib['name'] @@ -265,41 +265,30 @@ class ELAND(object): """ Summarize information from eland files """ - XML_VERSION = 1 + XML_VERSION = 2 ELAND = 'ElandCollection' LANE = 'Lane' LANE_ID = 'id' + END = 'end' def __init__(self, xml=None): # we need information from the gerald config.xml - self.results = {} + self.results = [{},{}] if xml is not None: self.set_elements(xml) - def __len__(self): - return len(self.results) - - def keys(self): - return self.results.keys() - - def values(self): - return self.results.values() - - def items(self): - return self.results.items() - - def __getitem__(self, key): - return self.results[key] - def get_elements(self): root = ElementTree.Element(ELAND.ELAND, {'version': unicode(ELAND.XML_VERSION)}) - for lane_id, lane in self.results.items(): - eland_lane = lane.get_elements() - eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id) - root.append(eland_lane) + for end in range(len(self.results)): + end_results = self.results[end] + for lane_id, lane in end_results.items(): + eland_lane = lane.get_elements() + eland_lane.attrib[ELAND.END] = unicode (end) + eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id) + root.append(eland_lane) return root def set_elements(self, tree): @@ -307,11 +296,17 @@ class ELAND(object): raise ValueError('Expecting %s', ELAND.ELAND) for element in list(tree): lane_id = int(element.attrib[ELAND.LANE_ID]) + end = int(element.attrib.get(ELAND.END, 0)) lane = ElandLane(xml=element) - self.results[lane_id] = lane + self.results[end][lane_id] = lane + +def check_for_eland_file(basedir, pattern, lane_id, end): + if end is None: + full_lane_id = lane_id + else: + full_lane_id = "%d_%d" % ( lane_id, end ) -def check_for_eland_file(basedir, lane_id, pattern): - basename = pattern % (lane_id,) + basename = pattern % (full_lane_id,) pathname = os.path.join(basedir, basename) if os.path.exists(pathname): return pathname @@ -321,50 +316,57 @@ def check_for_eland_file(basedir, lane_id, pattern): def eland(basedir, gerald=None, genome_maps=None): e = ELAND() - file_list = glob(os.path.join(basedir, "*_eland_result.txt")) - if len(file_list) == 0: - # lets handle compressed eland files too - file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2")) + #file_list = glob(os.path.join(basedir, "*_eland_result.txt")) + #if len(file_list) == 0: + # # lets handle compressed eland files too + # file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2")) lane_ids = range(1,9) + ends = [None, 1, 2] + # the order in patterns determines the preference for what # will be found. - patterns = ['s_%d_eland_result.txt', - 's_%d_eland_result.txt.bz2', - 's_%d_eland_result.txt.gz', - 's_%d_eland_extended.txt', - 's_%d_eland_extended.txt.bz2', - 's_%d_eland_extended.txt.gz', - 's_%d_eland_multi.txt', - 's_%d_eland_multi.txt.bz2', - 's_%d_eland_multi.txt.gz',] - - for lane_id in lane_ids: - for p in patterns: - pathname = check_for_eland_file(basedir, lane_id, p) - if pathname is not None: - break - else: - continue - # yes the lane_id is also being computed in ElandLane._update - # I didn't want to clutter up my constructor - # but I needed to persist the sample_name/lane_id for - # runfolder summary_report - path, name = os.path.split(pathname) - logging.info("Adding eland file %s" %(name,)) - split_name = name.split('_') - lane_id = int(split_name[1]) - - if genome_maps is not None: - genome_map = genome_maps[lane_id] - elif gerald is not None: - genome_dir = gerald.lanes[lane_id].eland_genome - genome_map = build_genome_fasta_map(genome_dir) - else: - genome_map = {} + patterns = ['s_%s_eland_result.txt', + 's_%s_eland_result.txt.bz2', + 's_%s_eland_result.txt.gz', + 's_%s_eland_extended.txt', + 's_%s_eland_extended.txt.bz2', + 's_%s_eland_extended.txt.gz', + 's_%s_eland_multi.txt', + 's_%s_eland_multi.txt.bz2', + 's_%s_eland_multi.txt.gz',] + + for end in ends: + for lane_id in lane_ids: + for p in patterns: + pathname = check_for_eland_file(basedir, p, lane_id, end) + if pathname is not None: + break + else: + continue + # yes the lane_id is also being computed in ElandLane._update + # I didn't want to clutter up my constructor + # but I needed to persist the sample_name/lane_id for + # runfolder summary_report + path, name = os.path.split(pathname) + logging.info("Adding eland file %s" %(name,)) + # split_name = name.split('_') + # lane_id = int(split_name[1]) + + if genome_maps is not None: + genome_map = genome_maps[lane_id] + elif gerald is not None: + genome_dir = gerald.lanes[lane_id].eland_genome + genome_map = build_genome_fasta_map(genome_dir) + else: + genome_map = {} - eland_result = ElandLane(pathname, genome_map) - e.results[lane_id] = eland_result + eland_result = ElandLane(pathname, lane_id, end, genome_map) + if end is None: + effective_end = 0 + else: + effective_end = end - 1 + e.results[effective_end][lane_id] = eland_result return e def build_genome_fasta_map(genome_dir): diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py index 20df0b2..ba99c4c 100644 --- a/htsworkflow/pipelines/runfolder.py +++ b/htsworkflow/pipelines/runfolder.py @@ -216,28 +216,30 @@ def summarize_mapped_reads(mapped_reads): def summarize_lane(gerald, lane_id): report = [] summary_results = gerald.summary.lane_results - eland_result = gerald.eland_results.results[lane_id] - report.append("Sample name %s" % (eland_result.sample_name)) - report.append("Lane id %s" % (eland_result.lane_id,)) - cluster = summary_results[eland_result.lane_id].cluster - report.append("Clusters %d +/- %d" % (cluster[0], cluster[1])) - report.append("Total Reads: %d" % (eland_result.reads)) - mc = eland_result._match_codes - nm = mc['NM'] - nm_percent = float(nm)/eland_result.reads * 100 - qc = mc['QC'] - qc_percent = float(qc)/eland_result.reads * 100 - - report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent)) - report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent)) - report.append('Unique (0,1,2 mismatches) %d %d %d' % \ - (mc['U0'], mc['U1'], mc['U2'])) - report.append('Repeat (0,1,2 mismatches) %d %d %d' % \ - (mc['R0'], mc['R1'], mc['R2'])) - report.append("Mapped Reads") - mapped_reads = summarize_mapped_reads(eland_result.mapped_reads) - for name, counts in mapped_reads.items(): - report.append(" %s: %d" % (name, counts)) + for end in range(len(summary_results)): + eland_result = gerald.eland_results.results[end][lane_id] + report.append("Sample name %s" % (eland_result.sample_name)) + report.append("Lane id %s end %s" % (eland_result.lane_id, end)) + cluster = summary_results[end][eland_result.lane_id].cluster + report.append("Clusters %d +/- %d" % (cluster[0], cluster[1])) + report.append("Total Reads: %d" % (eland_result.reads)) + mc = eland_result._match_codes + nm = mc['NM'] + nm_percent = float(nm)/eland_result.reads * 100 + qc = mc['QC'] + qc_percent = float(qc)/eland_result.reads * 100 + + report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent)) + report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent)) + report.append('Unique (0,1,2 mismatches) %d %d %d' % \ + (mc['U0'], mc['U1'], mc['U2'])) + report.append('Repeat (0,1,2 mismatches) %d %d %d' % \ + (mc['R0'], mc['R1'], mc['R2'])) + report.append("Mapped Reads") + mapped_reads = summarize_mapped_reads(eland_result.mapped_reads) + for name, counts in mapped_reads.items(): + report.append(" %s: %d" % (name, counts)) + report.append('') return report def summary_report(runs): @@ -249,7 +251,7 @@ def summary_report(runs): # print a run name? report.append('Summary for %s' % (run.name,)) # sort the report - eland_keys = run.gerald.eland_results.results.keys() + eland_keys = run.gerald.eland_results.results[0].keys() eland_keys.sort(alphanum) for lane_id in eland_keys: @@ -320,22 +322,23 @@ def extract_results(runs, output_base_dir=None): tar.wait() # copy & bzip eland files - for eland_lane in g.eland_results.values(): - source_name = eland_lane.pathname - path, name = os.path.split(eland_lane.pathname) - dest_name = os.path.join(cycle_dir, name) - if is_compressed(name): - logging.info('Already compressed, Saving to %s' % (dest_name, )) - shutil.copy(source_name, dest_name) - else: - # not compressed - dest_name += '.bz2' - args = ['bzip2', '-9', '-c', source_name] - logging.info('Running: %s' % ( " ".join(args) )) - bzip_dest = open(dest_name, 'w') - bzip = subprocess.Popen(args, stdout=bzip_dest) - logging.info('Saving to %s' % (dest_name, )) - bzip.wait() + for lanes_dictionary in g.eland_results.results: + for eland_lane in lanes_dictionary.values(): + source_name = eland_lane.pathname + path, name = os.path.split(eland_lane.pathname) + dest_name = os.path.join(cycle_dir, name) + if is_compressed(name): + logging.info('Already compressed, Saving to %s' % (dest_name, )) + shutil.copy(source_name, dest_name) + else: + # not compressed + dest_name += '.bz2' + args = ['bzip2', '-9', '-c', source_name] + logging.info('Running: %s' % ( " ".join(args) )) + bzip_dest = open(dest_name, 'w') + bzip = subprocess.Popen(args, stdout=bzip_dest) + logging.info('Saving to %s' % (dest_name, )) + bzip.wait() def clean_runs(runs): """ diff --git a/htsworkflow/pipelines/test/simulate_runfolder.py b/htsworkflow/pipelines/test/simulate_runfolder.py index f6f3742..cc59602 100644 --- a/htsworkflow/pipelines/test/simulate_runfolder.py +++ b/htsworkflow/pipelines/test/simulate_runfolder.py @@ -1912,15 +1912,28 @@ def make_eland_results(gerald_dir): f.write(eland_result) f.close() -def make_eland_multi(gerald_dir): - eland_multi = """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM +def make_eland_multi(gerald_dir, paired=False): + eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R -""" - for i in range(1,9): - pathname = os.path.join(gerald_dir, - 's_%d_eland_multi.txt' % (i,)) - f = open(pathname, 'w') - f.write(eland_multi) - f.close() +""", """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM +>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0 +>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R +>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1"""] + if paired: + for e in [1,2]: + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_%d_eland_multi.txt' % (i,e)) + f = open(pathname, 'w') + f.write(eland_multi[e-1]) + f.close() + else: + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_eland_multi.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_multi[0]) + f.close() diff --git a/htsworkflow/pipelines/test/test_runfolder026.py b/htsworkflow/pipelines/test/test_runfolder026.py index 656b281..fac7257 100644 --- a/htsworkflow/pipelines/test/test_runfolder026.py +++ b/htsworkflow/pipelines/test/test_runfolder026.py @@ -381,20 +381,22 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) def test_eland(self): @@ -406,7 +408,7 @@ class RunfolderTests(unittest.TestCase): eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[i] + lane = eland.results[0][i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") self.failUnlessEqual(lane.lane_id, i) @@ -422,8 +424,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[i] - l2 = e2[i] + l1 = eland.results[0][i] + l2 = e2.results[0][i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder030.py b/htsworkflow/pipelines/test/test_runfolder030.py index 6e97ec6..4e3ddf6 100644 --- a/htsworkflow/pipelines/test/test_runfolder030.py +++ b/htsworkflow/pipelines/test/test_runfolder030.py @@ -805,20 +805,22 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) def test_eland(self): @@ -830,7 +832,7 @@ class RunfolderTests(unittest.TestCase): eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[i] + lane = eland.results[0][i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") self.failUnlessEqual(lane.lane_id, i) @@ -846,8 +848,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[i] - l2 = e2[i] + l1 = eland.results[0][i] + l2 = e2.results[0][i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder110.py b/htsworkflow/pipelines/test/test_runfolder110.py index fba2981..1c42a78 100644 --- a/htsworkflow/pipelines/test/test_runfolder110.py +++ b/htsworkflow/pipelines/test/test_runfolder110.py @@ -194,20 +194,22 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) def test_eland(self): @@ -222,7 +224,7 @@ class RunfolderTests(unittest.TestCase): eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[i] + lane = eland.results[0][i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") self.failUnlessEqual(lane.lane_id, i) @@ -243,8 +245,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[i] - l2 = e2[i] + l1 = eland.results[0][i] + l2 = e2.results[0][i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder_ipar100.py b/htsworkflow/pipelines/test/test_runfolder_ipar100.py index 76f1e64..4f07eff 100644 --- a/htsworkflow/pipelines/test/test_runfolder_ipar100.py +++ b/htsworkflow/pipelines/test/test_runfolder_ipar100.py @@ -189,20 +189,22 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) def test_eland(self): @@ -217,7 +219,7 @@ class RunfolderTests(unittest.TestCase): eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[i] + lane = eland.results[0][i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") self.failUnlessEqual(lane.lane_id, i) @@ -238,8 +240,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[i] - l2 = e2[i] + l1 = eland.results[0][i] + l2 = e2.results[0][i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder_pair.py b/htsworkflow/pipelines/test/test_runfolder_pair.py index 783a5af..9d8530e 100644 --- a/htsworkflow/pipelines/test/test_runfolder_pair.py +++ b/htsworkflow/pipelines/test/test_runfolder_pair.py @@ -45,7 +45,7 @@ def make_runfolder(obj=None): os.mkdir(gerald_dir) make_gerald_config(gerald_dir) make_summary_paired_htm(gerald_dir) - make_eland_multi(gerald_dir) + make_eland_multi(gerald_dir, paired=True) if obj is not None: obj.temp_dir = temp_dir @@ -197,20 +197,22 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for lane in g_eland.results[end].keys(): + g_results = g_eland.results[end][lane] + g2_results = g_eland.results[end][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) def test_eland(self): @@ -224,8 +226,9 @@ class RunfolderTests(unittest.TestCase): 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + # check first end for i in range(1,9): - lane = eland[i] + lane = eland.results[0][i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") self.failUnlessEqual(lane.lane_id, i) @@ -240,28 +243,46 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(lane.match_codes['NM'], 1) self.failUnlessEqual(lane.match_codes['QC'], 0) + # check second end + for i in range(1,9): + lane = eland.results[1][i] + self.failUnlessEqual(lane.reads, 5) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 15) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 1) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 1) + xml = eland.get_elements() # just make sure that element tree can serialize the tree xml_str = ElementTree.tostring(xml) e2 = gerald.ELAND(xml=xml) - for i in range(1,9): - l1 = eland[i] - l2 = e2[i] - self.failUnlessEqual(l1.reads, l2.reads) - self.failUnlessEqual(l1.sample_name, l2.sample_name) - self.failUnlessEqual(l1.lane_id, l2.lane_id) - self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) - self.failUnlessEqual(len(l1.mapped_reads), 15) - for k in l1.mapped_reads.keys(): - self.failUnlessEqual(l1.mapped_reads[k], - l2.mapped_reads[k]) - - self.failUnlessEqual(len(l1.match_codes), 9) - self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) - for k in l1.match_codes.keys(): - self.failUnlessEqual(l1.match_codes[k], - l2.match_codes[k]) + for end in [0, 1]: + for i in range(1,9): + l1 = eland.results[end][i] + l2 = e2.results[end][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 15) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) def test_runfolder(self): runs = runfolder.get_runs(self.runfolder_dir)