- eland_keys = run.gerald.eland_results.results.keys()
- eland_keys.sort(alphanum)
-
- lane_results = run.gerald.summary.lane_results
- for lane_id in eland_keys:
- result = run.gerald.eland_results.results[lane_id]
- report.append("Sample name %s" % (result.sample_name))
- report.append("Lane id %s" % (result.lane_id,))
- cluster = lane_results[result.lane_id].cluster
- report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
- report.append("Total Reads: %d" % (result.reads))
- mc = result._match_codes
- nm = mc['NM']
- nm_percent = float(nm)/result.reads * 100
- qc = mc['QC']
- qc_percent = float(qc)/result.reads * 100
-
- report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
- report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
- report.append('Unique (0,1,2 mismatches) %d %d %d' % \
- (mc['U0'], mc['U1'], mc['U2']))
- report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
- (mc['R0'], mc['R1'], mc['R2']))
- report.append("Mapped Reads")
- mapped_reads = summarize_mapped_reads(result.mapped_reads)
- for name, counts in mapped_reads.items():
- report.append(" %s: %d" % (name, counts))
- report.append('---')
- report.append('')
- return os.linesep.join(report)
-
-def extract_results(runs, output_base_dir=None):
+ if run.gerald:
+ eland_keys = sorted(run.gerald.eland_results.keys())
+ else:
+ report.append("Alignment not done, no report possible")
+
+ for lane_id in eland_keys:
+ report.extend(summarize_lane(run.gerald, lane_id))
+ report.append('---')
+ report.append('')
+ return os.linesep.join(report)
+
+def is_compressed(filename):
+ if os.path.splitext(filename)[1] == ".gz":
+ return True
+ elif os.path.splitext(filename)[1] == '.bz2':
+ return True
+ else:
+ return False
+
+def save_flowcell_reports(data_dir, run_dirname):
+ """
+ Save the flowcell quality reports
+ """
+ data_dir = os.path.abspath(data_dir)
+ status_file = os.path.join(data_dir, 'Status.xml')
+ reports_dir = os.path.join(data_dir, 'reports')
+ reports_dest = os.path.join(run_dirname, 'flowcell-reports.tar.bz2')
+ if os.path.exists(reports_dir):
+ cmd_list = [ 'tar', 'cjvf', reports_dest, 'reports/' ]
+ if os.path.exists(status_file):
+ cmd_list.extend(['Status.xml', 'Status.xsl'])
+ LOGGER.info("Saving reports from " + reports_dir)
+ cwd = os.getcwd()
+ os.chdir(data_dir)
+ q = QueueCommands([" ".join(cmd_list)])
+ q.run()
+ os.chdir(cwd)
+
+
+def save_summary_file(pipeline, run_dirname):
+ # Copy Summary.htm
+ gerald_object = pipeline.gerald
+ gerald_summary = os.path.join(gerald_object.pathname, 'Summary.htm')
+ status_files_summary = os.path.join(pipeline.datadir, 'Status_Files', 'Summary.htm')
+ if os.path.exists(gerald_summary):
+ LOGGER.info('Copying %s to %s' % (gerald_summary, run_dirname))
+ shutil.copy(gerald_summary, run_dirname)
+ elif os.path.exists(status_files_summary):
+ LOGGER.info('Copying %s to %s' % (status_files_summary, run_dirname))
+ shutil.copy(status_files_summary, run_dirname)
+ else:
+ LOGGER.info('Summary file %s was not found' % (summary_path,))
+
+def save_ivc_plot(bustard_object, run_dirname):
+ """
+ Save the IVC page and its supporting images
+ """
+ plot_html = os.path.join(bustard_object.pathname, 'IVC.htm')
+ plot_image_path = os.path.join(bustard_object.pathname, 'Plots')
+ plot_images = os.path.join(plot_image_path, 's_?_[a-z]*.png')
+
+ plot_target_path = os.path.join(run_dirname, 'Plots')
+
+ if os.path.exists(plot_html):
+ LOGGER.debug("Saving %s" % (plot_html,))
+ LOGGER.debug("Saving %s" % (plot_images,))
+ shutil.copy(plot_html, run_dirname)
+ if not os.path.exists(plot_target_path):
+ os.mkdir(plot_target_path)
+ for plot_file in glob(plot_images):
+ shutil.copy(plot_file, plot_target_path)
+ else:
+ LOGGER.warning('Missing IVC.html file, not archiving')
+
+
+def compress_score_files(bustard_object, run_dirname):
+ """
+ Compress score files into our result directory
+ """
+ # check for g.pathname/Temp a new feature of 1.1rc1
+ scores_path = bustard_object.pathname
+ scores_path_temp = os.path.join(scores_path, 'Temp')
+ if os.path.isdir(scores_path_temp):
+ scores_path = scores_path_temp
+
+ # hopefully we have a directory that contains s_*_score files
+ score_files = []
+ for f in os.listdir(scores_path):
+ if re.match('.*_score.txt', f):
+ score_files.append(f)
+
+ tar_cmd = ['tar', 'c'] + score_files
+ bzip_cmd = [ 'bzip2', '-9', '-c' ]
+ tar_dest_name = os.path.join(run_dirname, 'scores.tar.bz2')
+ tar_dest = open(tar_dest_name, 'w')
+ LOGGER.info("Compressing score files from %s" % (scores_path,))
+ LOGGER.info("Running tar: " + " ".join(tar_cmd[:10]))
+ LOGGER.info("Running bzip2: " + " ".join(bzip_cmd))
+ LOGGER.info("Writing to %s" % (tar_dest_name,))
+
+ env = {'BZIP': '-9'}
+ tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, env=env,
+ cwd=scores_path)
+ bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
+ tar.wait()
+
+
+def compress_eland_results(gerald_object, run_dirname, num_jobs=1):
+ """
+ Compress eland result files into the archive directory
+ """
+ # copy & bzip eland files
+ bz_commands = []
+
+ for key in gerald_object.eland_results:
+ eland_lane = gerald_object.eland_results[key]
+ for source_name in eland_lane.pathnames:
+ if source_name is None:
+ LOGGER.info(
+ "Lane ID %s does not have a filename." % (eland_lane.lane_id,))
+ else:
+ path, name = os.path.split(source_name)
+ dest_name = os.path.join(run_dirname, name)
+ LOGGER.info("Saving eland file %s to %s" % \
+ (source_name, dest_name))
+
+ if is_compressed(name):
+ LOGGER.info('Already compressed, Saving to %s' % (dest_name,))
+ shutil.copy(source_name, dest_name)
+ else:
+ # not compressed
+ dest_name += '.bz2'
+ args = ['bzip2', '-9', '-c', source_name, '>', dest_name ]
+ bz_commands.append(" ".join(args))
+ #LOGGER.info('Running: %s' % ( " ".join(args) ))
+ #bzip_dest = open(dest_name, 'w')
+ #bzip = subprocess.Popen(args, stdout=bzip_dest)
+ #LOGGER.info('Saving to %s' % (dest_name, ))
+ #bzip.wait()
+
+ if len(bz_commands) > 0:
+ q = QueueCommands(bz_commands, num_jobs)
+ q.run()
+
+
+def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1, raw_format=None):
+ """
+ Iterate over runfolders in runs extracting the most useful information.
+ * run parameters (in run-*.xml)
+ * eland_result files
+ * score files
+ * Summary.htm
+ * srf files (raw sequence & qualities)
+ """