From: Diane Trout Date: Wed, 17 Jun 2009 19:36:00 +0000 (+0000) Subject: Add support for extracting data out of Illumina's new RTA runfolder. X-Git-Tag: 0.2.5~4 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=193a2a857d668b5c909a1e580031778f35f7560e Add support for extracting data out of Illumina's new RTA runfolder. I dropped extraction of matrix & phasing for the moment, no one was looking for them, and I need to go back to one matrix per lane for RTA Intensity directories. I'm also not sure where the phasing information might be being stored. Another weakness is if there is a firecrest directory the run scanning wasn't finding the intensities directory. --- diff --git a/htsworkflow/pipelines/bustard.py b/htsworkflow/pipelines/bustard.py index f1d73eb..85b1d4c 100644 --- a/htsworkflow/pipelines/bustard.py +++ b/htsworkflow/pipelines/bustard.py @@ -158,19 +158,22 @@ def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree): matrix_auto_flag = int(matrix.find('AutoFlag').text) matrix_auto_lane = int(matrix.find('AutoLane').text) + crosstalk = None if matrix_auto_flag: # we estimated the matrix from something in this run. # though we don't really care which lane it was matrix_path = os.path.join(bustard_path, 'Matrix', 's_02_matrix.txt') - matrix = CrosstalkMatrix(matrix_path) + crosstalk = CrosstalkMatrix(matrix_path) else: - # the matrix was provided matrix_elements = call_parameters.find('MatrixElements') - if matrix_elements is None: - raise RuntimeError('Expected to find MatrixElements in Bustard BaseCallParameters') - matrix = CrosstalkMatrix(xml=matrix_elements) + # the matrix was provided + if matrix_elements is not None: + crosstalk = CrosstalkMatrix(xml=matrix_elements) + else: + # we have no crosstalk matrix? + pass - return matrix + return crosstalk class Bustard(object): XML_VERSION = 2 @@ -185,7 +188,7 @@ class Bustard(object): def __init__(self, xml=None): self.version = None - self.date = date.today() + self.date = None self.user = None self.phasing = {} self.crosstalk = None @@ -196,6 +199,8 @@ class Bustard(object): self.set_elements(xml) def _get_time(self): + if self.date is None: + return None return time.mktime(self.date.timetuple()) time = property(_get_time, doc='return run time as seconds since epoch') @@ -208,15 +213,18 @@ class Bustard(object): {'version': str(Bustard.XML_VERSION)}) version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION) version.text = self.version - run_date = ElementTree.SubElement(root, Bustard.DATE) - run_date.text = str(self.time) - user = ElementTree.SubElement(root, Bustard.USER) - user.text = self.user + if self.time is not None: + run_date = ElementTree.SubElement(root, Bustard.DATE) + run_date.text = str(self.time) + if self.user is not None: + user = ElementTree.SubElement(root, Bustard.USER) + user.text = self.user params = ElementTree.SubElement(root, Bustard.PARAMETERS) # add phasing parameters for lane in LANE_LIST: - params.append(self.phasing[lane].get_elements()) + if self.phasing.has_key(lane): + params.append(self.phasing[lane].get_elements()) # add crosstalk matrix if it exists if self.crosstalk is not None: @@ -265,11 +273,18 @@ def bustard(pathname): pathname = os.path.abspath(pathname) path, name = os.path.split(pathname) groups = name.split("_") - version = re.search(VERSION_RE, groups[0]) - b.version = version.group(1) - t = time.strptime(groups[1], EUROPEAN_STRPTIME) - b.date = date(*t[0:3]) - b.user = groups[2] + if groups[0].lower().startswith('bustard'): + version = re.search(VERSION_RE, groups[0]) + b.version = version.group(1) + t = time.strptime(groups[1], EUROPEAN_STRPTIME) + b.date = date(*t[0:3]) + b.user = groups[2] + elif groups[0] == 'BaseCalls': + # stub values + b.version = None + b.date = None + b.user = None + b.pathname = pathname bustard_config_filename = os.path.join(pathname, 'config.xml') paramfiles = glob(os.path.join(pathname, "params?.xml")) @@ -289,6 +304,9 @@ def bustard(pathname): bustard_config_root = ElementTree.parse(bustard_config_filename) b.bustard_config = bustard_config_root.getroot() b.crosstalk = crosstalk_matrix_from_bustard_config(b.pathname, b.bustard_config) + software = bustard_config_root.find('*/Software') + b.version = software.attrib['Version'] + #b.version = software.attrib['Name'] + "-" + software.attrib['Version'] return b diff --git a/htsworkflow/pipelines/gerald.py b/htsworkflow/pipelines/gerald.py index cbc5fcb..980c661 100644 --- a/htsworkflow/pipelines/gerald.py +++ b/htsworkflow/pipelines/gerald.py @@ -173,6 +173,7 @@ class Gerald(object): xml_version = int(tree.attrib.get('version', 0)) if xml_version > Gerald.XML_VERSION: logging.warn('XML tree is a higher version than this class') + self.eland_results = ELAND() for element in list(tree): tag = element.tag.lower() if tag == Gerald.RUN_PARAMETERS.lower(): @@ -184,18 +185,17 @@ class Gerald(object): else: logging.warn("Unrecognized tag %s" % (element.tag,)) - def gerald(pathname): g = Gerald() - g.pathname = pathname - path, name = os.path.split(pathname) + g.pathname = os.path.expanduser(pathname) + path, name = os.path.split(g.pathname) logging.info("Parsing gerald config.xml") - config_pathname = os.path.join(pathname, 'config.xml') + config_pathname = os.path.join(g.pathname, 'config.xml') g.tree = ElementTree.parse(config_pathname).getroot() # parse Summary.htm file logging.info("Parsing Summary.htm") - summary_pathname = os.path.join(pathname, 'Summary.htm') + summary_pathname = os.path.join(g.pathname, 'Summary.htm') g.summary = Summary(summary_pathname) # parse eland files g.eland_results = eland(g.pathname, g) diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py index 239239e..b7d5a54 100644 --- a/htsworkflow/pipelines/ipar.py +++ b/htsworkflow/pipelines/ipar.py @@ -24,6 +24,8 @@ from htsworkflow.pipelines.runfolder import \ VERSION_RE, \ EUROPEAN_STRPTIME +SOFTWARE_NAMES = ('IPAR_1.01', 'IPAR_1.3', 'Intensities') + class Tiles(object): def __init__(self, tree): self.tree = tree.find("TileSelection") @@ -82,8 +84,9 @@ class IPAR(object): def _get_cycles(self): if self.tree is None: - return None + raise RuntimeError("get cycles called before xml tree initalized") cycles = self.tree.find("Cycles") + assert cycles is not None if cycles is None: return None return cycles.attrib @@ -178,23 +181,24 @@ def load_ipar_param_tree(paramfile): tree = ElementTree.parse(paramfile).getroot() run = tree.find('Run') - if run.attrib.has_key('Name') and run.attrib['Name'].startswith("IPAR"): + if run.attrib.has_key('Name') and run.attrib['Name'] in SOFTWARE_NAMES: return run - - return None + else: + logging.info("No run found") + return None def ipar(pathname): """ Examine the directory at pathname and initalize a IPAR object """ - logging.info("Searching IPAR directory") + logging.info("Searching IPAR directory %s" % (pathname,)) i = IPAR() i.pathname = pathname # parse firecrest directory name path, name = os.path.split(pathname) groups = name.split('_') - if groups[0] != 'IPAR': + if not (groups[0] == 'IPAR' or groups[0] == 'Intensities'): raise ValueError('ipar can only process IPAR directories') bustard_pattern = os.path.join(pathname, 'Bustard*') @@ -206,15 +210,18 @@ def ipar(pathname): elif glob(bustard_pattern) > 0: i.matrix = None # its still live. - else: - return None # look for parameter xml file - paramfile = os.path.join(path, '.params') - if os.path.exists(paramfile): - i.tree = load_ipar_param_tree(paramfile) - mtime_local = os.stat(paramfile)[stat.ST_MTIME] - i.time = mtime_local + paramfiles = [os.path.join(pathname, 'config.xml'), + os.path.join(path, '.params')] + for paramfile in paramfiles: + if os.path.exists(paramfile): + logging.info("Found IPAR Config file at: %s" % ( paramfile, )) + i.tree = load_ipar_param_tree(paramfile) + mtime_local = os.stat(paramfile)[stat.ST_MTIME] + i.time = mtime_local + return i + return i def fromxml(tree): @@ -225,15 +232,15 @@ def fromxml(tree): f.set_elements(tree) return f -if __name__ == "__main__": - i = ipar(os.path.expanduser('~/gec/081021_HWI-EAS229_0063_30HKUAAXX/Data/IPAR_1.01')) - x = i.get_elements() - j = fromxml(x) +#if __name__ == "__main__": + #i = ipar(os.path.expanduser('~/gec/081021_HWI-EAS229_0063_30HKUAAXX/Data/IPAR_1.01')) + #x = i.get_elements() + #j = fromxml(x) #ElementTree.dump(x) - print j.date - print j.start - print j.stop - print i.tiles.keys() - print j.tiles.keys() - print j.tiles.items() - print j.file_list() + #print j.date + #print j.start + #print j.stop + #print i.tiles.keys() + #print j.tiles.keys() + #print j.tiles.items() + #print j.file_list() diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py index 11795c2..14b7e07 100644 --- a/htsworkflow/pipelines/runfolder.py +++ b/htsworkflow/pipelines/runfolder.py @@ -21,6 +21,7 @@ EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})" VERSION_RE = "([0-9\.]+)" USER_RE = "([a-zA-Z0-9]+)" LANES_PER_FLOWCELL = 8 +LANE_LIST = range(1, LANES_PER_FLOWCELL+1) from htsworkflow.util.alphanum import alphanum from htsworkflow.util.ethelp import indent, flatten @@ -165,8 +166,10 @@ def get_runs(runfolder): def scan_post_image_analysis(runs, runfolder, image_analysis, pathname): logging.info("Looking for bustard directories in %s" % (pathname,)) - bustard_glob = os.path.join(pathname, "Bustard*") - for bustard_pathname in glob(bustard_glob): + bustard_dirs = glob(os.path.join(pathname, "Bustard*")) + # RTA BaseCalls looks enough like Bustard. + bustard_dirs.extend(glob(os.path.join(pathname, "BaseCalls"))) + for bustard_pathname in bustard_dirs: logging.info("Found bustard directory %s" % (bustard_pathname,)) b = bustard.bustard(bustard_pathname) gerald_glob = os.path.join(bustard_pathname, 'GERALD*') @@ -192,22 +195,25 @@ def get_runs(runfolder): logging.info('Found firecrest in ' + datadir) image_analysis = firecrest.firecrest(firecrest_pathname) if image_analysis is None: - logging.warn( + logging.warn( "%s is an empty or invalid firecrest directory" % (firecrest_pathname,) ) - else: + else: scan_post_image_analysis( runs, runfolder, image_analysis, firecrest_pathname ) # scan for IPAR directories - for ipar_pathname in glob(os.path.join(datadir,"IPAR_*")): + ipar_dirs = glob(os.path.join(datadir, "IPAR_*")) + # The Intensities directory from the RTA software looks a lot like IPAR + ipar_dirs.extend(glob(os.path.join(datadir, 'Intensities'))) + for ipar_pathname in ipar_dirs: logging.info('Found ipar directories in ' + datadir) image_analysis = ipar.ipar(ipar_pathname) if image_analysis is None: - logging.warn( + logging.warn( "%s is an empty or invalid IPAR directory" %(ipar_pathname,) ) - else: + else: scan_post_image_analysis( runs, runfolder, image_analysis, ipar_pathname ) @@ -227,6 +233,7 @@ def get_specific_run(gerald_dir): from htsworkflow.pipelines import bustard from htsworkflow.pipelines import gerald + gerald_dir = os.path.expanduser(gerald_dir) bustard_dir = os.path.abspath(os.path.join(gerald_dir, '..')) image_dir = os.path.abspath(os.path.join(gerald_dir, '..', '..')) @@ -252,6 +259,9 @@ def get_specific_run(gerald_dir): image_run = firecrest.firecrest(image_dir) elif re.search('IPAR', short_image_dir, re.IGNORECASE) is not None: image_run = ipar.ipar(image_dir) + elif re.search('Intensities', short_image_dir, re.IGNORECASE) is not None: + image_run = ipar.ipar(image_dir) + # if we din't find a run, report the error and return if image_run is None: msg = '%s does not contain an image processing step' % (image_dir,) diff --git a/htsworkflow/pipelines/test/simulate_runfolder.py b/htsworkflow/pipelines/test/simulate_runfolder.py index 2e340d2..8de0a7e 100644 --- a/htsworkflow/pipelines/test/simulate_runfolder.py +++ b/htsworkflow/pipelines/test/simulate_runfolder.py @@ -50,6 +50,33 @@ def make_bustard_config132(gerald_dir): destination = os.path.join(gerald_dir, 'config.xml') shutil.copy(source, destination) +def make_rta_intensities_1460(data_dir, version='1.4.6.0'): + """ + Construct an artificial RTA Intensities parameter file and directory + """ + intensities_dir = os.path.join(data_dir, 'Intensities') + if not os.path.exists(intensities_dir): + os.mkdir(intensities_dir) + + param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml') + shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml')) + + return intensities_dir + +def make_rta_basecalls_1460(intensities_dir): + """ + Construct an artificial RTA Intensities parameter file and directory + """ + basecalls_dir = os.path.join(intensities_dir, 'BaseCalls') + if not os.path.exists(basecalls_dir): + os.mkdir(basecalls_dir) + + param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml') + shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml')) + + return basecalls_dir + + def make_matrix(matrix_filename): contents = """# Auto-generated frequency response matrix > A diff --git a/htsworkflow/pipelines/test/test_runfolder_rta.py b/htsworkflow/pipelines/test/test_runfolder_rta.py new file mode 100644 index 0000000..08db8e2 --- /dev/null +++ b/htsworkflow/pipelines/test/test_runfolder_rta.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import eland +from htsworkflow.pipelines import ipar +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '090608_HWI-EAS229_0117_4286GAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + intensities_dir = make_rta_intensities_1460(data_dir) + + basecalls_dir = make_rta_basecalls_1460(intensities_dir) + + #make_phasing_params(bustard_dir) + #make_bustard_config132(bustard_dir) + + gerald_dir = os.path.join(basecalls_dir, + 'GERALD_16-06-2009_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_ipar130_htm(gerald_dir) + make_eland_multi(gerald_dir, lane_list=[1,2,3,4,5,6,]) + make_scarf(gerald_dir, lane_list=[7,]) + make_fastq(gerald_dir, lane_list=[8,]) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = intensities_dir + obj.bustard_dir = basecalls_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_ipar(self): + """ + Construct a firecrest object + """ + i = ipar.ipar(self.image_analysis_dir) + self.failUnlessEqual(i.version, '1.4.6.0') + self.failUnlessEqual(i.start, 1) + self.failUnlessEqual(i.stop, 38) + + xml = i.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + i2 = ipar.IPAR(xml=xml) + self.failUnlessEqual(i.version, i2.version) + self.failUnlessEqual(i.start, i2.start) + self.failUnlessEqual(i.stop, i2.stop) + self.failUnlessEqual(i.date, i2.date) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.4.6.0') + self.failUnlessEqual(b.date, None) + self.failUnlessEqual(b.user, None) + self.failUnlessEqual(len(b.phasing), 0) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # test data extracted from summary file + clusters = [None, + (126910, 4300), (165739, 6792), + (196565, 8216), (153897, 8501), + (135536, 3908), (154083, 9315), + (159991, 9292), (198479, 17671),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + return + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + if isinstance(g_results, eland.ElandLane): + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + return + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + # I added sequence lanes to the last 2 lanes of this test case + for i in range(1,7): + lane = eland_container.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + # test scarf + lane = eland_container.results[0][7] + self.failUnlessEqual(lane.reads, 5) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 7) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) + + # test fastq + lane = eland_container.results[0][8] + self.failUnlessEqual(lane.reads, 3) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 8) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.FASTQ_TYPE) + + xml = eland_container.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland_container.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + if isinstance(l1, eland.ElandLane): + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + elif isinstance(l1, eland.SequenceLane): + self.failUnlessEqual(l1.sequence_type, l2.sequence_type) + + def test_runfolder(self): + return + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/htsworkflow/pipelines/test/testdata/rta_basecalls_config.xml b/htsworkflow/pipelines/test/testdata/rta_basecalls_config.xml new file mode 100644 index 0000000..462df1b --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/rta_basecalls_config.xml @@ -0,0 +1,94 @@ + + + + + 0.6 + + 0 + 0 + 0 + 0 + 1 + 38 + 1 + + + 0 + 0 + 0 + 0 + 1 + 38 + 1 + + 0 + failed-chastity + le + 1.0 + + + + + 0 + 0 + 0 + 0 + 0 + + 1 + 38 + D:\Runs\090608_HWI-EAS229_0117_4286GAAXX + + HWI-EAS229 + 0 + 0 + 0 + 0 + + 1 + 38 + D:\Runs\090608_HWI-EAS229_0117_4286GAAXX + + D:\Runs\090608_HWI-EAS229_0117_4286GAAXX + 0 + 0 + + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + + \ No newline at end of file diff --git a/htsworkflow/pipelines/test/testdata/rta_intensities_config.xml b/htsworkflow/pipelines/test/testdata/rta_intensities_config.xml new file mode 100644 index 0000000..dee1f4f --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/rta_intensities_config.xml @@ -0,0 +1,866 @@ + + + + + + 0 + 0 + 0 + 0 + 0 + + + 0 + 0 + 0 + 0 + 0 + + 1 + 38 + D:\Runs\090608_HWI-EAS229_0117_4286GAAXX + + HWI-EAS229 + 0 + 0 + 0 + 0 + + 1 + 38 + D:\Runs\090608_HWI-EAS229_0117_4286GAAXX + + 090608 + 117 + + + + + s + 1 + 2 + 4 + 3 + 5 + 7 + 6 + 8 + 10 + 9 + 11 + 13 + 12 + 14 + 16 + 15 + 17 + 19 + 18 + 20 + 22 + 21 + 23 + 25 + 24 + 26 + 28 + 27 + 29 + 31 + 30 + 32 + 34 + 33 + 35 + 37 + 36 + 38 + 40 + 39 + 41 + 43 + 42 + 44 + 46 + 45 + 47 + 49 + 48 + 50 + 52 + 51 + 53 + 55 + 54 + 56 + 58 + 57 + 59 + 61 + 60 + 62 + 64 + 63 + 65 + 67 + 66 + 68 + 70 + 69 + 71 + 73 + 72 + 74 + 76 + 75 + 77 + 79 + 78 + 80 + 82 + 81 + 83 + 85 + 84 + 86 + 88 + 87 + 89 + 91 + 90 + 92 + 94 + 93 + 95 + 97 + 96 + 98 + 100 + 99 + + + s + 2 + 1 + 3 + 5 + 4 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + + + s + 2 + 3 + 1 + 5 + 6 + 4 + 8 + 9 + 7 + 11 + 12 + 10 + 14 + 15 + 13 + 17 + 18 + 16 + 20 + 21 + 19 + 23 + 24 + 22 + 26 + 27 + 25 + 29 + 30 + 28 + 32 + 33 + 31 + 35 + 36 + 34 + 38 + 39 + 37 + 41 + 42 + 40 + 44 + 45 + 43 + 47 + 48 + 46 + 50 + 51 + 53 + 49 + 54 + 56 + 52 + 57 + 59 + 55 + 60 + 62 + 58 + 63 + 65 + 61 + 66 + 68 + 64 + 69 + 71 + 67 + 72 + 74 + 70 + 75 + 77 + 73 + 78 + 80 + 76 + 81 + 83 + 79 + 84 + 86 + 82 + 87 + 89 + 85 + 90 + 92 + 88 + 93 + 95 + 91 + 96 + 94 + 98 + 99 + 97 + 100 + + + s + 2 + 3 + 5 + 6 + 1 + 9 + 8 + 4 + 12 + 11 + 7 + 15 + 14 + 10 + 18 + 17 + 13 + 21 + 20 + 16 + 24 + 23 + 19 + 27 + 26 + 22 + 30 + 29 + 25 + 33 + 32 + 28 + 36 + 35 + 31 + 39 + 38 + 34 + 42 + 41 + 37 + 45 + 44 + 40 + 48 + 47 + 43 + 51 + 50 + 46 + 54 + 53 + 49 + 57 + 56 + 52 + 60 + 59 + 55 + 63 + 62 + 58 + 66 + 65 + 61 + 69 + 68 + 64 + 72 + 71 + 67 + 75 + 74 + 70 + 78 + 77 + 73 + 81 + 80 + 76 + 84 + 83 + 79 + 87 + 86 + 82 + 90 + 89 + 85 + 93 + 92 + 88 + 96 + 95 + 91 + 99 + 98 + 94 + 97 + 100 + + + s + 3 + 2 + 6 + 5 + 9 + 8 + 1 + 12 + 11 + 4 + 15 + 14 + 7 + 18 + 17 + 10 + 21 + 20 + 13 + 24 + 23 + 16 + 27 + 26 + 19 + 30 + 29 + 22 + 33 + 32 + 25 + 36 + 35 + 28 + 39 + 38 + 31 + 42 + 41 + 34 + 45 + 44 + 37 + 48 + 47 + 40 + 51 + 50 + 43 + 54 + 53 + 46 + 57 + 56 + 49 + 60 + 59 + 52 + 63 + 62 + 55 + 66 + 65 + 58 + 69 + 68 + 61 + 72 + 71 + 64 + 75 + 74 + 67 + 78 + 77 + 70 + 81 + 80 + 73 + 84 + 83 + 76 + 87 + 86 + 79 + 90 + 89 + 82 + 93 + 92 + 85 + 96 + 95 + 88 + 99 + 98 + 91 + 94 + 97 + 100 + + + s + 3 + 2 + 6 + 5 + 9 + 8 + 12 + 11 + 1 + 15 + 14 + 4 + 18 + 17 + 7 + 21 + 20 + 10 + 24 + 23 + 13 + 27 + 26 + 16 + 30 + 29 + 19 + 33 + 32 + 22 + 36 + 35 + 25 + 39 + 38 + 28 + 31 + 34 + 37 + 42 + 45 + 48 + 41 + 51 + 44 + 54 + 47 + 57 + 50 + 60 + 53 + 63 + 56 + 66 + 59 + 69 + 62 + 72 + 65 + 75 + 68 + 78 + 71 + 81 + 40 + 74 + 84 + 43 + 77 + 87 + 46 + 80 + 90 + 49 + 83 + 93 + 52 + 86 + 96 + 55 + 89 + 99 + 58 + 92 + 61 + 95 + 64 + 98 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 3 + 6 + 9 + 2 + 12 + 5 + 15 + 8 + 18 + 11 + 21 + 14 + 24 + 17 + 27 + 20 + 30 + 23 + 33 + 26 + 36 + 29 + 39 + 32 + 42 + 35 + 45 + 1 + 38 + 48 + 4 + 41 + 51 + 7 + 44 + 54 + 10 + 47 + 57 + 13 + 50 + 60 + 16 + 53 + 63 + 19 + 56 + 66 + 22 + 59 + 69 + 25 + 62 + 72 + 28 + 65 + 75 + 31 + 68 + 78 + 34 + 71 + 81 + 37 + 74 + 84 + 40 + 77 + 87 + 43 + 80 + 90 + 46 + 83 + 93 + 49 + 86 + 96 + 52 + 89 + 99 + 55 + 92 + 58 + 95 + 61 + 98 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 3 + 6 + 9 + 2 + 12 + 5 + 15 + 8 + 18 + 11 + 21 + 14 + 24 + 17 + 27 + 20 + 30 + 23 + 33 + 26 + 36 + 29 + 39 + 32 + 42 + 35 + 45 + 38 + 48 + 1 + 41 + 51 + 4 + 44 + 54 + 47 + 7 + 57 + 50 + 10 + 60 + 53 + 13 + 63 + 56 + 16 + 66 + 59 + 19 + 69 + 62 + 22 + 72 + 65 + 25 + 75 + 68 + 28 + 78 + 71 + 31 + 81 + 74 + 34 + 84 + 77 + 37 + 87 + 80 + 40 + 90 + 83 + 43 + 93 + 86 + 46 + 96 + 89 + 49 + 99 + 92 + 52 + 95 + 55 + 98 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + + \ No newline at end of file