Support scanning HiSeq runs with multiple analyses.
authorDiane Trout <diane@caltech.edu>
Wed, 16 Jan 2013 01:00:22 +0000 (17:00 -0800)
committerDiane Trout <diane@caltech.edu>
Wed, 16 Jan 2013 01:00:22 +0000 (17:00 -0800)
It extends the previous C1-100 directory name with the concept of
a suffix extension to that name. The suffix is gathered from whatever the
user came up with their own Aligned/Unaligned directory names.

I discovered I'd previously been calling the
"run_{flowcell id}_{timestamp}.xml" filename the runfolder name. Which is
dumb, as that's a filename, not a name. So this patch renamed it.
(Since I needed to clean up some of the names to implement the above
"run_dirname" functionality.

since I was testing the run.name in the test_runfolders* I needed
to fix those. (And I'm really regretting my cut-n-paste programming).

13 files changed:
htsworkflow/pipelines/runfolder.py
htsworkflow/pipelines/test/test_runfolder026.py
htsworkflow/pipelines/test/test_runfolder030.py
htsworkflow/pipelines/test/test_runfolder110.py
htsworkflow/pipelines/test/test_runfolder_casava_1_7.py
htsworkflow/pipelines/test/test_runfolder_ipar100.py
htsworkflow/pipelines/test/test_runfolder_ipar130.py
htsworkflow/pipelines/test/test_runfolder_pair.py
htsworkflow/pipelines/test/test_runfolder_rta.py
htsworkflow/pipelines/test/test_runfolder_rta160.py
htsworkflow/pipelines/test/test_runfolder_rta180.py
htsworkflow/pipelines/test/test_runfolder_rta1_12.py
htsworkflow/pipelines/test/test_runfolder_utils.py

index b072dd9cb8ee135236721384240a14e364a5910a..5d1e16bce460da443da7bebea56220e388770c88 100644 (file)
@@ -32,7 +32,7 @@ class PipelineRun(object):
     
     :Variables:
       - `pathname` location of the root of this runfolder
     
     :Variables:
       - `pathname` location of the root of this runfolder
-      - `name` read only property containing name of run xml file
+      - `serialization_filename` read only property containing name of run xml file
       - `flowcell_id` read-only property containing flowcell id (bar code)
       - `datadir` location of the runfolder data dir.
       - `image_analysis` generic name for Firecrest or IPAR image analysis
       - `flowcell_id` read-only property containing flowcell id (bar code)
       - `datadir` location of the runfolder data dir.
       - `image_analysis` generic name for Firecrest or IPAR image analysis
@@ -63,6 +63,7 @@ class PipelineRun(object):
         self._name = None
         self._flowcell_id = flowcell_id
         self.datadir = None
         self._name = None
         self._flowcell_id = flowcell_id
         self.datadir = None
+        self.suffix = None
         self.image_analysis = None
         self.bustard = None
         self.gerald = None
         self.image_analysis = None
         self.bustard = None
         self.gerald = None
@@ -133,16 +134,25 @@ class PipelineRun(object):
             return self.gerald.runfolder_name
     runfolder_name = property(_get_runfolder_name)
 
             return self.gerald.runfolder_name
     runfolder_name = property(_get_runfolder_name)
 
-    def _get_run_id(self):
-        """Return a identifer for a run.
+    def _get_run_dirname(self):
+        """Return name of directory to hold result files from one analysis
         
         For pre-multiplexing runs this is just the cycle range C1-123
         For post-multiplexing runs the "suffix" that we add to 
         differentiate runs will be added to the range.
         E.g. Unaligned_6mm may produce C1-200_6mm
         """
         
         For pre-multiplexing runs this is just the cycle range C1-123
         For post-multiplexing runs the "suffix" that we add to 
         differentiate runs will be added to the range.
         E.g. Unaligned_6mm may produce C1-200_6mm
         """
-        pass
-        
+        if self.image_analysis is None:
+            raise ValueError("Not initialized yet")
+        start = self.image_analysis.start
+        stop = self.image_analysis.stop
+        cycle_fragment = "C%d-%d" % (start, stop)
+        if self.suffix:
+            cycle_fragment += self.suffix
+
+        return cycle_fragment
+    run_dirname = property(_get_run_dirname)
+
     def get_elements(self):
         """make one master xml file from all of our sub-components.
         
     def get_elements(self):
         """make one master xml file from all of our sub-components.
         
@@ -187,21 +197,22 @@ class PipelineRun(object):
           else:
             LOGGER.warn('PipelineRun unrecognized tag %s' % (tag,))
 
           else:
             LOGGER.warn('PipelineRun unrecognized tag %s' % (tag,))
 
-    def _get_run_name(self):
-        """Compute the run name for the run xml file
+    def _get_serialization_filename(self):
+        """Compute the filename for the run xml file
         
         Attempts to find the latest date from all of the run 
         components.
         
         
         Attempts to find the latest date from all of the run 
         components.
         
-        :return: run xml name
+        :return: filename run_{flowcell id}_{timestamp}.xml
         :rtype: str
         """
         if self._name is None:
         :rtype: str
         """
         if self._name is None:
-          tmax = max(self.image_analysis.time, self.bustard.time, self.gerald.time)
+          components = [self.image_analysis, self.bustard, self.gerald]
+          tmax = max([ c.time for c in components if c ])
           timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
           self._name = 'run_' + self.flowcell_id + "_" + timestamp + '.xml'
         return self._name
           timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
           self._name = 'run_' + self.flowcell_id + "_" + timestamp + '.xml'
         return self._name
-    name = property(_get_run_name)
+    serialization_filename = property(_get_serialization_filename)
 
     def save(self, destdir=None):
         """Save a run xml file.
 
     def save(self, destdir=None):
         """Save a run xml file.
@@ -212,10 +223,10 @@ class PipelineRun(object):
         """
         if destdir is None:
             destdir = ''
         """
         if destdir is None:
             destdir = ''
-        LOGGER.info("Saving run report " + self.name)
+        LOGGER.info("Saving run report " + self.serialization_filename)
         xml = self.get_elements()
         indent(xml)
         xml = self.get_elements()
         indent(xml)
-        dest_pathname = os.path.join(destdir, self.name)
+        dest_pathname = os.path.join(destdir, self.serialization_filename)
         ElementTree.ElementTree(xml).write(dest_pathname)
 
     def load(self, filename):
         ElementTree.ElementTree(xml).write(dest_pathname)
 
     def load(self, filename):
@@ -338,17 +349,18 @@ def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
     matched_paths = hiseq_match_aligned_unaligned(aligned_paths, unaligned_paths)
     LOGGER.debug("Matched HiSeq analysis: %s", str(matched_paths))
 
     matched_paths = hiseq_match_aligned_unaligned(aligned_paths, unaligned_paths)
     LOGGER.debug("Matched HiSeq analysis: %s", str(matched_paths))
 
-    for aligned, unaligned in matched_paths:
+    for aligned, unaligned, suffix in matched_paths:
         if unaligned is None:
             LOGGER.warn("Aligned directory %s without matching unalinged, skipping", aligned)
             continue
 
         if unaligned is None:
             LOGGER.warn("Aligned directory %s without matching unalinged, skipping", aligned)
             continue
 
-        print "scan for aligned then remove them from unaligned list"
         try:
             p = PipelineRun(runfolder, flowcell_id)
             p.datadir = datadir
         try:
             p = PipelineRun(runfolder, flowcell_id)
             p.datadir = datadir
+            p.suffix = suffix
             p.image_analysis = image_analysis
             p.bustard = bustard.bustard(unaligned)
             p.image_analysis = image_analysis
             p.bustard = bustard.bustard(unaligned)
+            assert p.bustard
             if aligned:
                 p.gerald = gerald.gerald(aligned)
             runs.append(p)
             if aligned:
                 p.gerald = gerald.gerald(aligned)
             runs.append(p)
@@ -370,7 +382,7 @@ def hiseq_match_aligned_unaligned(aligned, unaligned):
     for key in keys:
         a = aligned_by_suffix.get(key)
         u = unaligned_by_suffix.get(key)
     for key in keys:
         a = aligned_by_suffix.get(key)
         u = unaligned_by_suffix.get(key)
-        matches.append((a, u))
+        matches.append((a, u, key))
     return matches
 
 def build_dir_dict_by_suffix(prefix, dirnames):
     return matches
 
 def build_dir_dict_by_suffix(prefix, dirnames):
@@ -524,11 +536,16 @@ def summary_report(runs):
     Summarize cluster numbers and mapped read counts for a runfolder
     """
     report = []
     Summarize cluster numbers and mapped read counts for a runfolder
     """
     report = []
+    eland_keys = []
     for run in runs:
         # print a run name?
     for run in runs:
         # print a run name?
-        report.append('Summary for %s' % (run.name,))
+        report.append('Summary for %s' % (run.serialization_filename,))
        # sort the report
        # sort the report
-       eland_keys = sorted(run.gerald.eland_results.keys())
+       if run.gerald:
+            eland_keys = sorted(run.gerald.eland_results.keys())
+        else:
+            report.append("Alignment not done, no report possible")
+
     for lane_id in eland_keys:
         report.extend(summarize_lane(run.gerald, lane_id))
         report.append('---')
     for lane_id in eland_keys:
         report.extend(summarize_lane(run.gerald, lane_id))
         report.append('---')
@@ -543,14 +560,14 @@ def is_compressed(filename):
     else:
         return False
 
     else:
         return False
 
-def save_flowcell_reports(data_dir, cycle_dir):
+def save_flowcell_reports(data_dir, run_dirname):
     """
     Save the flowcell quality reports
     """
     data_dir = os.path.abspath(data_dir)
     status_file = os.path.join(data_dir, 'Status.xml')
     reports_dir = os.path.join(data_dir, 'reports')
     """
     Save the flowcell quality reports
     """
     data_dir = os.path.abspath(data_dir)
     status_file = os.path.join(data_dir, 'Status.xml')
     reports_dir = os.path.join(data_dir, 'reports')
-    reports_dest = os.path.join(cycle_dir, 'flowcell-reports.tar.bz2')
+    reports_dest = os.path.join(run_dirname, 'flowcell-reports.tar.bz2')
     if os.path.exists(reports_dir):
         cmd_list = [ 'tar', 'cjvf', reports_dest, 'reports/' ]
         if os.path.exists(status_file):
     if os.path.exists(reports_dir):
         cmd_list = [ 'tar', 'cjvf', reports_dest, 'reports/' ]
         if os.path.exists(status_file):
@@ -563,21 +580,21 @@ def save_flowcell_reports(data_dir, cycle_dir):
         os.chdir(cwd)
 
 
         os.chdir(cwd)
 
 
-def save_summary_file(pipeline, cycle_dir):
+def save_summary_file(pipeline, run_dirname):
     # Copy Summary.htm
     gerald_object = pipeline.gerald
     gerald_summary = os.path.join(gerald_object.pathname, 'Summary.htm')
     status_files_summary = os.path.join(pipeline.datadir, 'Status_Files', 'Summary.htm')
     if os.path.exists(gerald_summary):
     # Copy Summary.htm
     gerald_object = pipeline.gerald
     gerald_summary = os.path.join(gerald_object.pathname, 'Summary.htm')
     status_files_summary = os.path.join(pipeline.datadir, 'Status_Files', 'Summary.htm')
     if os.path.exists(gerald_summary):
-        LOGGER.info('Copying %s to %s' % (gerald_summary, cycle_dir))
-        shutil.copy(gerald_summary, cycle_dir)
+        LOGGER.info('Copying %s to %s' % (gerald_summary, run_dirname))
+        shutil.copy(gerald_summary, run_dirname)
     elif os.path.exists(status_files_summary):
     elif os.path.exists(status_files_summary):
-        LOGGER.info('Copying %s to %s' % (status_files_summary, cycle_dir))
-        shutil.copy(status_files_summary, cycle_dir)
+        LOGGER.info('Copying %s to %s' % (status_files_summary, run_dirname))
+        shutil.copy(status_files_summary, run_dirname)
     else:
         LOGGER.info('Summary file %s was not found' % (summary_path,))
 
     else:
         LOGGER.info('Summary file %s was not found' % (summary_path,))
 
-def save_ivc_plot(bustard_object, cycle_dir):
+def save_ivc_plot(bustard_object, run_dirname):
     """
     Save the IVC page and its supporting images
     """
     """
     Save the IVC page and its supporting images
     """
@@ -585,12 +602,12 @@ def save_ivc_plot(bustard_object, cycle_dir):
     plot_image_path = os.path.join(bustard_object.pathname, 'Plots')
     plot_images = os.path.join(plot_image_path, 's_?_[a-z]*.png')
 
     plot_image_path = os.path.join(bustard_object.pathname, 'Plots')
     plot_images = os.path.join(plot_image_path, 's_?_[a-z]*.png')
 
-    plot_target_path = os.path.join(cycle_dir, 'Plots')
+    plot_target_path = os.path.join(run_dirname, 'Plots')
 
     if os.path.exists(plot_html):
         LOGGER.debug("Saving %s" % (plot_html,))
         LOGGER.debug("Saving %s" % (plot_images,))
 
     if os.path.exists(plot_html):
         LOGGER.debug("Saving %s" % (plot_html,))
         LOGGER.debug("Saving %s" % (plot_images,))
-        shutil.copy(plot_html, cycle_dir)
+        shutil.copy(plot_html, run_dirname)
         if not os.path.exists(plot_target_path):
             os.mkdir(plot_target_path)
         for plot_file in glob(plot_images):
         if not os.path.exists(plot_target_path):
             os.mkdir(plot_target_path)
         for plot_file in glob(plot_images):
@@ -599,7 +616,7 @@ def save_ivc_plot(bustard_object, cycle_dir):
         LOGGER.warning('Missing IVC.html file, not archiving')
 
 
         LOGGER.warning('Missing IVC.html file, not archiving')
 
 
-def compress_score_files(bustard_object, cycle_dir):
+def compress_score_files(bustard_object, run_dirname):
     """
     Compress score files into our result directory
     """
     """
     Compress score files into our result directory
     """
@@ -617,7 +634,7 @@ def compress_score_files(bustard_object, cycle_dir):
 
     tar_cmd = ['tar', 'c'] + score_files
     bzip_cmd = [ 'bzip2', '-9', '-c' ]
 
     tar_cmd = ['tar', 'c'] + score_files
     bzip_cmd = [ 'bzip2', '-9', '-c' ]
-    tar_dest_name = os.path.join(cycle_dir, 'scores.tar.bz2')
+    tar_dest_name = os.path.join(run_dirname, 'scores.tar.bz2')
     tar_dest = open(tar_dest_name, 'w')
     LOGGER.info("Compressing score files from %s" % (scores_path,))
     LOGGER.info("Running tar: " + " ".join(tar_cmd[:10]))
     tar_dest = open(tar_dest_name, 'w')
     LOGGER.info("Compressing score files from %s" % (scores_path,))
     LOGGER.info("Running tar: " + " ".join(tar_cmd[:10]))
@@ -631,7 +648,7 @@ def compress_score_files(bustard_object, cycle_dir):
     tar.wait()
 
 
     tar.wait()
 
 
-def compress_eland_results(gerald_object, cycle_dir, num_jobs=1):
+def compress_eland_results(gerald_object, run_dirname, num_jobs=1):
     """
     Compress eland result files into the archive directory
     """
     """
     Compress eland result files into the archive directory
     """
@@ -646,7 +663,7 @@ def compress_eland_results(gerald_object, cycle_dir, num_jobs=1):
                 "Lane ID %s does not have a filename." % (eland_lane.lane_id,))
             else:
               path, name = os.path.split(source_name)
                 "Lane ID %s does not have a filename." % (eland_lane.lane_id,))
             else:
               path, name = os.path.split(source_name)
-              dest_name = os.path.join(cycle_dir, name)
+              dest_name = os.path.join(run_dirname, name)
               LOGGER.info("Saving eland file %s to %s" % \
                          (source_name, dest_name))
 
               LOGGER.info("Saving eland file %s to %s" % \
                          (source_name, dest_name))
 
@@ -687,52 +704,56 @@ def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1, r
         if not os.path.exists(result_dir):
             os.mkdir(result_dir)
 
         if not os.path.exists(result_dir):
             os.mkdir(result_dir)
 
-        # create cycle_dir
-        cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
-        LOGGER.info("Filling in %s" % (cycle,))
-        cycle_dir = os.path.join(result_dir, cycle)
-        cycle_dir = os.path.abspath(cycle_dir)
-        if os.path.exists(cycle_dir):
-            LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
+        # create directory to add this runs results to
+        LOGGER.info("Filling in %s" % (r.run_dirname,))
+        run_dirname = os.path.join(result_dir, r.run_dirname)
+        run_dirname = os.path.abspath(run_dirname)
+        if os.path.exists(run_dirname):
+            LOGGER.error("%s already exists, not overwriting" % (run_dirname,))
             continue
         else:
             continue
         else:
-            os.mkdir(cycle_dir)
+            os.mkdir(run_dirname)
 
         # save run file
 
         # save run file
-        r.save(cycle_dir)
+        r.save(run_dirname)
 
         # save illumina flowcell status report
         save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
 
         # save illumina flowcell status report
         save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
-                              cycle_dir)
+                              run_dirname)
 
         # save stuff from bustard
         # grab IVC plot
 
         # save stuff from bustard
         # grab IVC plot
-        save_ivc_plot(r.bustard, cycle_dir)
+        save_ivc_plot(r.bustard, run_dirname)
 
         # build base call saving commands
         if site is not None:
 
         # build base call saving commands
         if site is not None:
-            save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
+            save_raw_data(num_jobs, r, site, raw_format, run_dirname)
 
         # save stuff from GERALD
         # copy stuff out of the main run
 
         # save stuff from GERALD
         # copy stuff out of the main run
-        g = r.gerald
+        if r.gerald:
+            g = r.gerald
 
 
-        # save summary file
-        save_summary_file(r, cycle_dir)
+            # save summary file
+            save_summary_file(r, run_dirname)
 
 
-        # compress eland result files
-        compress_eland_results(g, cycle_dir, num_jobs)
+            # compress eland result files
+            compress_eland_results(g, run_dirname, num_jobs)
 
         # md5 all the compressed files once we're done
 
         # md5 all the compressed files once we're done
-        md5_commands = srf.make_md5_commands(cycle_dir)
-        srf.run_commands(cycle_dir, md5_commands, num_jobs)
+        md5_commands = srf.make_md5_commands(run_dirname)
+        srf.run_commands(run_dirname, md5_commands, num_jobs)
 
 
-def save_raw_data(num_jobs, r, site, raw_format, cycle_dir):
+def save_raw_data(num_jobs, r, site, raw_format, run_dirname):
     lanes = []
     lanes = []
-    for lane in r.gerald.lanes:
-        lane_parameters = r.gerald.lanes.get(lane, None)
-        if lane_parameters is not None:
-            lanes.append(lane)
+    if r.gerald:
+        for lane in r.gerald.lanes:
+            lane_parameters = r.gerald.lanes.get(lane, None)
+            if lane_parameters is not None:
+                lanes.append(lane)
+    else:
+        # assume default list of lanes
+        lanes = LANE_LIST
 
     run_name = srf.pathname_to_run_name(r.pathname)
     seq_cmds = []
 
     run_name = srf.pathname_to_run_name(r.pathname)
     seq_cmds = []
@@ -741,13 +762,14 @@ def save_raw_data(num_jobs, r, site, raw_format, cycle_dir):
 
     LOGGER.info("Raw Format is: %s" % (raw_format, ))
     if raw_format == 'fastq':
 
     LOGGER.info("Raw Format is: %s" % (raw_format, ))
     if raw_format == 'fastq':
-        rawpath = os.path.join(r.pathname, r.gerald.runfolder_name)
+        LOGGER.info("Reading fastq files from %s", r.bustard.pathname)
+        rawpath = os.path.join(r.pathname, r.bustard.pathname)
         LOGGER.info("raw data = %s" % (rawpath,))
         LOGGER.info("raw data = %s" % (rawpath,))
-        srf.copy_hiseq_project_fastqs(run_name, rawpath, site, cycle_dir)
+        srf.copy_hiseq_project_fastqs(run_name, rawpath, site, run_dirname)
     elif raw_format == 'qseq':
     elif raw_format == 'qseq':
-        seq_cmds = srf.make_qseq_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir)
+        seq_cmds = srf.make_qseq_commands(run_name, r.bustard.pathname, lanes, site, run_dirname)
     elif raw_format == 'srf':
     elif raw_format == 'srf':
-        seq_cmds = srf.make_srf_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir, 0)
+        seq_cmds = srf.make_srf_commands(run_name, r.bustard.pathname, lanes, site, run_dirname, 0)
     else:
         raise ValueError('Unknown --raw-format=%s' % (raw_format))
     srf.run_commands(r.bustard.pathname, seq_cmds, num_jobs)
     else:
         raise ValueError('Unknown --raw-format=%s' % (raw_format))
     srf.run_commands(r.bustard.pathname, seq_cmds, num_jobs)
index 970dbc27e1dac8c9e91d07098f570305f11c2c83..de68a2505cbb2e1e6f83726e23780008d6e90b83 100644 (file)
@@ -451,20 +451,20 @@ class RunfolderTests(TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+        self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+        self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index cb2bf95b3e048e27ceb20df1414f67cf2ffcfb3a..cd631bf6070a4f7c5bdd105ca56631462b364d70 100644 (file)
@@ -875,20 +875,20 @@ class RunfolderTests(TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+        self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+        self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index c215f557671c4192e49447ef10e524ab8f0d02ee..27e66499684b0860b08925ff24558b7959aafeb5 100644 (file)
@@ -279,21 +279,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_30J55AAXX_2009-02-22.xml'
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_30J55AAXX_2009-02-22.xml'
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '30J55AAXX')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_30J55AAXX_2009-02-22.xml'
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '30J55AAXX')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_30J55AAXX_2009-02-22.xml'
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index ae8899eeda02df677ca79360035d1b3c4a3c09df..96a5bcf1102a1a558c00ac93876cfb870cc50677 100644 (file)
@@ -268,21 +268,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_%s_%s.xml' % ( FCID, date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_%s_%s.xml' % ( FCID, date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, FCID)
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_%s_%s.xml' % ( FCID, date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, FCID)
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_%s_%s.xml' % ( FCID, date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 04e802642446298fd9fd4a4ce0e473573e99be86..34a9c8a04e8ce85dfd97f4b46d77b02ee9256fd9 100644 (file)
@@ -278,21 +278,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 617b917ffa00c87a585356e7b840bf213640a3a8..c23ed9730808416484500e37c866a29e984e9ce0 100644 (file)
@@ -316,21 +316,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_3021JAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_3021JAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 3ed091d351c8174d0badc58050da4e7b137ff80b..dce07884228bdb154058af15e838a5926bca63d7 100644 (file)
@@ -304,21 +304,21 @@ class RunfolderTests(TestCase):
         self.failUnlessEqual(len(runs), 1)
         # firecrest's date depends on filename not the create time.
         name = 'run_207BTAAXX_2009-02-22.xml'
         self.failUnlessEqual(len(runs), 1)
         # firecrest's date depends on filename not the create time.
         name = 'run_207BTAAXX_2009-02-22.xml'
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_2009-02-22.xml'
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_2009-02-22.xml'
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 0503ea5916c998eb2a63b06f12c161764dcd5671..497fe7b33181cd6896fb0cac05c0ce19d79c0b24 100644 (file)
@@ -278,21 +278,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 25ab00f8a23ad4624c794c350e3a04ddcfcc2884..8d8f2f104744672fc5ceba1ec3379f970dc093ac 100644 (file)
@@ -249,14 +249,14 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         bustard_dir = os.path.join(self.runfolder_dir, 'Data',
                                    'Intensities', 'BaseCalls')
 
         bustard_dir = os.path.join(self.runfolder_dir, 'Data',
                                    'Intensities', 'BaseCalls')
@@ -269,7 +269,7 @@ class RunfolderTests(TestCase):
                              '090220_HWI-EAS229_0093_30VR0AAXX')
 
         r2 = runfolder.PipelineRun(xml=xml)
                              '090220_HWI-EAS229_0093_30VR0AAXX')
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index a4039e7991537a01368914cce186e40310524be9..63b4a8df0c7176b8ced6aa1a022a8b22f7c6a7bb 100644 (file)
@@ -263,21 +263,21 @@ class RunfolderTests(TestCase):
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
         name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
 
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index b04dfe63affe387ca61f78e45dfd2ddaaa13a755..8396bd0351265fe940a4b75dff3c421345272f56 100644 (file)
@@ -257,7 +257,7 @@ class RunfolderTests(TestCase):
         self.assertEqual(runs[0].flowcell_id, self.flowcell_id)
         name = 'run_%s_%s.xml' % ( self.flowcell_id,
                                    date.today().strftime('%Y-%m-%d'),)
         self.assertEqual(runs[0].flowcell_id, self.flowcell_id)
         name = 'run_%s_%s.xml' % ( self.flowcell_id,
                                    date.today().strftime('%Y-%m-%d'),)
-        self.failUnlessEqual(runs[0].name, name)
+        self.failUnlessEqual(runs[0].serialization_filename, name)
 
         bustard_dir = os.path.join(self.runfolder_dir, 'Unaligned')
         r1 = runs[0]
 
         bustard_dir = os.path.join(self.runfolder_dir, 'Unaligned')
         r1 = runs[0]
@@ -269,7 +269,7 @@ class RunfolderTests(TestCase):
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
         xml_str = ElementTree.tostring(xml)
 
         r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
+        self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
         self.failIfEqual(r2.image_analysis, None)
         self.failIfEqual(r2.bustard, None)
         self.failIfEqual(r2.gerald, None)
index 59ee418928f6b84b7317c60a62f074894701d430..e7c0c3807d950e18cdb854c86932c8247e4b45f1 100644 (file)
@@ -10,10 +10,10 @@ class TestRunfolderUtilities(TestCase):
 
         matches = set(runfolder.hiseq_match_aligned_unaligned(aligned, unaligned))
         self.assertEqual(len(matches), 4)
 
         matches = set(runfolder.hiseq_match_aligned_unaligned(aligned, unaligned))
         self.assertEqual(len(matches), 4)
-        self.assertTrue(('/a/b/c/Aligned', '/a/b/c/Unaligned') in matches )
-        self.assertTrue(('/a/b/c/Aligned1234', None) in matches )
-        self.assertTrue(('/a/b/c/Aligned_3mm', '/a/b/c/Unaligned_3mm') in matches )
-        self.assertTrue((None, '/a/b/c/Unaligned_6index') in matches )
+        self.assertTrue(('/a/b/c/Aligned', '/a/b/c/Unaligned', '') in matches )
+        self.assertTrue(('/a/b/c/Aligned1234', None, '1234') in matches )
+        self.assertTrue(('/a/b/c/Aligned_3mm', '/a/b/c/Unaligned_3mm', '_3mm') in matches )
+        self.assertTrue((None, '/a/b/c/Unaligned_6index', '_6index') in matches )
 
     def test_match_aligned_unaligned_relpath(self):
         aligned = ['./Aligned', './Aligned1234', './Aligned_3mm']
 
     def test_match_aligned_unaligned_relpath(self):
         aligned = ['./Aligned', './Aligned1234', './Aligned_3mm']
@@ -21,10 +21,10 @@ class TestRunfolderUtilities(TestCase):
 
         matches = set(runfolder.hiseq_match_aligned_unaligned(aligned, unaligned))
         self.assertEqual(len(matches), 4)
 
         matches = set(runfolder.hiseq_match_aligned_unaligned(aligned, unaligned))
         self.assertEqual(len(matches), 4)
-        self.assertTrue(('./Aligned', './Unaligned') in matches )
-        self.assertTrue(('./Aligned1234', None) in matches )
-        self.assertTrue(('./Aligned_3mm', './Unaligned_3mm') in matches )
-        self.assertTrue((None, './Unaligned_6index') in matches )
+        self.assertTrue(('./Aligned', './Unaligned', '') in matches )
+        self.assertTrue(('./Aligned1234', None, '1234') in matches )
+        self.assertTrue(('./Aligned_3mm', './Unaligned_3mm', '_3mm') in matches )
+        self.assertTrue((None, './Unaligned_6index', '_6index') in matches )
 
 def suite():
     suite = TestSuite()
 
 def suite():
     suite = TestSuite()