+++ /dev/null
-#!/usr/bin/env python
-import os
-import sys
-import re
-from htsworkflow.pipelines.configure_run import *
-from htsworkflow.pipelines import retrieve_config as _rc
-from htsworkflow.pipelines.run_status import startCmdLineStatusMonitor
-
-s_fc = re.compile('FC[0-9]+')
-
-#Turn on built-in command-line parsing.
-_rc.DISABLE_CMDLINE = False
-
-GENOME_DIR = '/data-store01/compbio/genomes/'
-
-
-
-def main(args=None):
- ci = ConfigInfo()
- ci.analysis_dir = os.getcwd()
- ci.base_analysis_dir, junk = os.path.split(ci.analysis_dir)
-
- #FIXME: make a better command line tool
- skip_retrieve_config = False
- if len(args) == 1:
- arg = args[0]
-
- #If FC##### found
- if s_fc.search(arg):
- cfg_filepath = os.path.abspath('config32auto.txt')
- flowcell = arg
- #else, config file provide
- else:
- cfg_filepath = os.path.abspath(args[0])
- skip_retrieve_config = True
- else:
- print "usage:\n" \
- " configure_pipeline FC#####\n" \
- " or:\n" \
- " configure_pipeline <conf_filepath>\n"
- return 3
-
- genome_dir = GENOME_DIR
-
- if not skip_retrieve_config:
- status_retrieve_cfg = retrieve_config(ci, flowcell, cfg_filepath, genome_dir)
- if status_retrieve_cfg:
- print "Retrieve config file successful"
- else:
- print "Failed to retrieve config file"
- else:
- print "Config file %s provided from command-line" % (cfg_filepath)
- ci.config_filepath = cfg_filepath
- status_retrieve_cfg = True
-
- if status_retrieve_cfg:
- status = configure(ci)
- if status:
- print "Configure success"
- else:
- print "Configure failed"
-
- print 'Run Dir:', ci.run_path
- print 'Bustard Dir:', ci.bustard_path
-
- if status:
- # Setup status cmdline status monitor
- startCmdLineStatusMonitor(ci)
-
- print 'Running pipeline now!'
- run_status = run_pipeline(ci)
- if run_status is True:
- print 'Pipeline ran successfully.'
- return 0
- else:
- print 'Pipeline run failed.'
- return 1
-
- return 2
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG,
- format='%(asctime)s %(levelname)-8s %(message)s',
- datefmt='%a, %d %b %Y %H:%M:%S',
- #filename='pipeline_main.log',
- filemode='w')
-
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.copier import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines.eland import extract_eland_sequence
-
-def make_parser():
- usage = "usage: %prog [options] infile [outfile]"
-
- parser = optparse.OptionParser(usage)
- parser.add_option("-e", "--extract", dest="slice",
- default=":",
- help="provide a python slice operator to select a portion of an eland file")
- return parser
-
-def main(argv):
- parser = make_parser()
-
- (opt, args) = parser.parse_args(argv)
-
- if len(args) not in (0, 1, 2):
- parser.error('incorrect number of arguments')
-
- # get our slice coordinates
- start, end = opt.slice.split(':')
- if len(start) > 0:
- start = int(start)
- else:
- start = None
- if len(end) > 0:
- end = int(end)
- else:
- end = None
-
- # open infile
- if len(args) > 0:
- instream = open(args[0],'r')
- else:
- instream = sys.stdin
-
- if len(args) > 1:
- outstream = open(args[1],'w')
- else:
- outstream = sys.stdout
-
- extract_eland_sequence(instream, outstream, start, end)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/python
-"""
-Convert a group of eland_result files from a sequencer run to bed files.
-"""
-from glob import glob
-import logging
-import optparse
-import sys
-import os
-
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
-
-def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
- """
- convert s_[1-8]_eland_result.txt to corresponding bed files
- """
- eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
- out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
- if len(out_files) > 0:
- raise RuntimeError("please move old bedfiles")
-
- logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
- for pathname in eland_files:
- path, name = os.path.split(pathname)
- lane = int(name[2])
- outname = 's_%d_eland_result.bed' %(lane,)
- logging.info('Converting lane %d to %s' % (lane, outname))
-
- outpathname = os.path.join(eland_dir, outname)
- # look up descriptions
- bed_name, description = make_description(database, flowcell, lane)
-
- # open files
- instream = open(pathname,'r')
- outstream = open(outpathname,'w')
-
- make_bed_from_eland_stream(
- instream, outstream, name, description, prefix
- )
-
-def make_parser():
- usage = """%prog: --flowcell <flowcell id> directory_name
-
-directory should contain a set of 8 eland result files named like
-s_[12345678]_eland_result.txt"""
-
-
- parser = optparse.OptionParser(usage)
-
- parser.add_option('-o', '--output', dest='output',
- help="destination directory for our bed files" \
- "defaults to eland directory",
- default=None)
- parser.add_option('--chromosome', dest='prefix',
- help='Set the chromosome prefix name. defaults to "chr"',
- default='chr')
- parser.add_option("--database", dest='database',
- help="specify location of fctracker database",
- default=None)
- parser.add_option("--flowcell", dest='flowcell',
- help="specify the flowcell id for this run",
- default=None)
- parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
- help='increase verbosity',
- default=False)
- return parser
-
-def main(command_line=None):
- logging.basicConfig(level=logging.WARNING)
- if command_line is None:
- command_line = sys.argv[1:]
-
- parser = make_parser()
- (opts, args) = parser.parse_args(command_line)
-
- if len(args) != 1:
- parser.error('Directory name required')
-
- eland_dir = args[0]
- if not os.path.isdir(eland_dir):
- parser.error('%s must be a directory' % (eland_dir,))
-
- if opts.flowcell is None:
- parser.error('Flowcell ID required')
-
- if opts.verbose:
- logger = logging.getLogger()
- logger.setLevel(logging.INFO)
-
- make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
--- /dev/null
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.copier import main
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/python
+import optparse
+import sys
+import os
+
+from htsworkflow.util.opener import autoopen
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
+
+def make_parser():
+ parser = optparse.OptionParser()
+ parser.add_option('-e', '--eland', dest='inname',
+ help='specify input eland filename')
+ parser.add_option('-b', '--bed', dest='outname',
+ help='specify output befilename')
+ parser.add_option('-n', '--name', dest='name',
+ help='specify the track (short) name.',
+ default=None)
+ parser.add_option('-d', '--description', dest='description',
+ help='specify the track description',
+ default=None)
+ parser.add_option('--chromosome', dest='prefix',
+ help='Set the chromosome prefix name. defaults to "chr"',
+ default='chr')
+ parser.add_option("--database", dest='database',
+ help="specify location of fctracker database",
+ default=None)
+ parser.add_option("--flowcell", dest='flowcell',
+ help="compute name and description from database using flowcell id",
+ default=None)
+ parser.add_option("--lane", dest='lane',
+ help='specify which lane to use when retrieving description from database',
+ default=None)
+
+ multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
+
+ multi.add_option('-m', '--multi', action='store_true',
+ help='Enable parsing multi-read eland files',
+ default=False)
+ multi.add_option('--reads', type='int',
+ help='limit reporting multi reads to this many reads'
+ '(most usefully --reads=1 will turn a multi-read '
+ 'file into a single read file)',
+ default=255)
+ parser.add_option_group(multi)
+
+ return parser
+
+def main(command_line=None):
+ instream = None
+ outstream = None
+
+ if command_line is None:
+ command_line = sys.argv[1:]
+
+ parser = make_parser()
+ (options, args) = parser.parse_args(command_line)
+
+ if options.inname is None:
+ parser.error("Need eland input file name")
+ return 1
+
+ if options.inname == '-':
+ instream = sys.stdin
+ elif os.path.exists(options.inname):
+ instream = autoopen(options.inname, 'r')
+ else:
+ parser.error('%s was not found' % (options.inname))
+ return 1
+
+ # figure out name for output file
+ if options.outname is None:
+ # if outname wasn't defined, and we're reading from stdout
+ if instream is sys.stdin:
+ # write to stdout
+ outstream = sys.stdout
+ else:
+ # if there's a name write to name.bed
+ options.outname = os.path.splitext(options.inname)[0]+'.bed'
+ print >>sys.stderr, "defaulting to outputname", options.outname
+ elif options.outname == '-':
+ outstream = sys.stdout
+
+ if outstream is None:
+ if os.path.exists(options.outname):
+ parser.error("not overwriting %s" % (options.outname))
+ return 1
+ else:
+ outstream = open(options.outname, 'w')
+
+ if options.flowcell is not None and options.lane is not None:
+ # get our name/description out of the database
+ name, description = make_description(
+ options.database, options.flowcell, options.lane
+ )
+ else:
+ name = options.name
+ description = options.description
+
+ if options.multi:
+ make_bed_from_multi_eland_stream(instream, outstream,
+ name, description,
+ options.prefix,
+ options.reads)
+
+ else:
+ make_bed_from_eland_stream(instream, outstream,
+ name, description,
+ options.prefix)
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines.eland import extract_eland_sequence
+
+def make_parser():
+ usage = "usage: %prog [options] infile [outfile]"
+
+ parser = optparse.OptionParser(usage)
+ parser.add_option("-e", "--extract", dest="slice",
+ default=":",
+ help="provide a python slice operator to select a portion of an eland file")
+ return parser
+
+def main(argv):
+ parser = make_parser()
+
+ (opt, args) = parser.parse_args(argv)
+
+ if len(args) not in (0, 1, 2):
+ parser.error('incorrect number of arguments')
+
+ # get our slice coordinates
+ start, end = opt.slice.split(':')
+ if len(start) > 0:
+ start = int(start)
+ else:
+ start = None
+ if len(end) > 0:
+ end = int(end)
+ else:
+ end = None
+
+ # open infile
+ if len(args) > 0:
+ instream = open(args[0],'r')
+ else:
+ instream = sys.stdin
+
+ if len(args) > 1:
+ outstream = open(args[1],'w')
+ else:
+ outstream = sys.stdout
+
+ extract_eland_sequence(instream, outstream, start, end)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/python
+"""
+Convert a group of eland_result files from a sequencer run to bed files.
+"""
+from glob import glob
+import logging
+import optparse
+import sys
+import os
+
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
+
+def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
+ """
+ convert s_[1-8]_eland_result.txt to corresponding bed files
+ """
+ eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
+ out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
+ if len(out_files) > 0:
+ raise RuntimeError("please move old bedfiles")
+
+ logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
+ for pathname in eland_files:
+ path, name = os.path.split(pathname)
+ lane = int(name[2])
+ outname = 's_%d_eland_result.bed' %(lane,)
+ logging.info('Converting lane %d to %s' % (lane, outname))
+
+ outpathname = os.path.join(eland_dir, outname)
+ # look up descriptions
+ bed_name, description = make_description(database, flowcell, lane)
+
+ # open files
+ instream = open(pathname,'r')
+ outstream = open(outpathname,'w')
+
+ make_bed_from_eland_stream(
+ instream, outstream, name, description, prefix
+ )
+
+def make_parser():
+ usage = """%prog: --flowcell <flowcell id> directory_name
+
+directory should contain a set of 8 eland result files named like
+s_[12345678]_eland_result.txt"""
+
+
+ parser = optparse.OptionParser(usage)
+
+ parser.add_option('-o', '--output', dest='output',
+ help="destination directory for our bed files" \
+ "defaults to eland directory",
+ default=None)
+ parser.add_option('--chromosome', dest='prefix',
+ help='Set the chromosome prefix name. defaults to "chr"',
+ default='chr')
+ parser.add_option("--database", dest='database',
+ help="specify location of fctracker database",
+ default=None)
+ parser.add_option("--flowcell", dest='flowcell',
+ help="specify the flowcell id for this run",
+ default=None)
+ parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+ help='increase verbosity',
+ default=False)
+ return parser
+
+def main(command_line=None):
+ logging.basicConfig(level=logging.WARNING)
+ if command_line is None:
+ command_line = sys.argv[1:]
+
+ parser = make_parser()
+ (opts, args) = parser.parse_args(command_line)
+
+ if len(args) != 1:
+ parser.error('Directory name required')
+
+ eland_dir = args[0]
+ if not os.path.isdir(eland_dir):
+ parser.error('%s must be a directory' % (eland_dir,))
+
+ if opts.flowcell is None:
+ parser.error('Flowcell ID required')
+
+ if opts.verbose:
+ logger = logging.getLogger()
+ logger.setLevel(logging.INFO)
+
+ make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+import logging
+import sys
+from htsworkflow.pipelines.retrieve_config import *
+from htsworkflow.pipelines import retrieve_config
+
+#Turn on built-in command-line parsing.
+retrieve_config.DISABLE_CMDLINE = False
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv
+
+ #Display help if no args are presented
+ options = getCombinedOptions(argv)
+
+ if options.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+ else:
+ logging.basicConfig(level=logging.INFO)
+
+ msg_list = ['ERROR MESSAGES:']
+ if options.flowcell is None:
+ msg_list.append(" Flow cell argument required. -f <flowcell> or --flowcell=<flowcell>")
+
+ if options.url is None:
+ msg_list.append(" URL argument required (-u <url> or --url=<url>), or entry\n" \
+ " in /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
+ if options.genome_dir is None:
+ msg_list.append(" genome_dir argument required (-g <genome_dir> or \n" \
+ " --genome_dir=<genome_dir>, or entry in \n" \
+ " /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
+
+ if len(msg_list) > 1:
+ print '\n'.join(msg_list)
+ return 1
+
+ saveConfigFile(options)
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/python
+import sys
+from htsworkflow.pipelines.qseq2fastq import main
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/env python
+
+from htsworkflow.util.hdquery import get_hd_serial_num
+from htsworkflow.frontend import settings
+
+from optparse import OptionParser
+import os
+import re
+import sys
+import urllib2
+import urlparse
+
+runfolder_pattern = re.compile(r'[0-9]{6}_[-A-Za-z\d]+_\d+_(?P<flowcell>[A-Z\d]+)\.tgz')
+
+def extract_flowcell(runfolder_name):
+ path, basename = os.path.split(runfolder_name)
+ match = runfolder_pattern.match(basename)
+ if match is not None:
+ return match.group('flowcell')
+ else:
+ return None
+
+def construct_parser():
+ """
+ """
+ msg = "usage: %prog [-d </dev/sdX> | -s <serial_number] [-f <flowcell>] [archived dirs]"
+ parser = OptionParser()
+ parser.add_option('-u', '--url', default=None,
+ help="Alternate url for marking archived flowcells")
+ parser.add_option("-f", "--flowcell", type="string", help="flowcell being archived")
+ parser.add_option("-d", "--device", type="string",
+ help="device flowcell is being archived to")
+ parser.add_option("-s", "--serial", type="string", help="serial num. of archive device")
+ parser.add_option("-v", "--verbose", action="store_true", default=False)
+
+ return parser
+
+
+def update_db(root_url, flowcells, serial, debug=False):
+ """
+ Creates link between flowcell and storage device over http
+ """
+ for fc in flowcells:
+ url = urlparse.urljoin(root_url, '%s/%s/' % (fc, serial))
+
+ req = urllib2.Request(url)
+ try:
+ response = urllib2.urlopen(req)
+ except urllib2.URLError, e:
+ print 'ERROR - HTTP OUTPUT (Return Code: %s); use -v/--verbose for more details.' % (e.code)
+ if debug:
+ print e.read()
+ sys.exit(e.code)
+
+ print "DB Update of %s & %s succeeded" % (fc, serial)
+ print response.read()
+
+
+def process_args(parser):
+ """
+ returns flowcell and serial#
+ """
+ options, args = parser.parse_args()
+
+ msg = []
+
+ # Only provide device or serial
+ if options.device is not None and options.serial is not None:
+ parser.error("Please provide only --device or --serial.\n"\
+ "The serial number is extracted automatically if the"\
+ "device is provided.")
+
+ # allow user to override the default destination URL
+ if options.url is not None:
+ root_url = options.url
+ else:
+ root_url = settings.LINK_FLOWCELL_STORAGE_DEVICE_URL
+
+ # if device and serial missing:
+ if options.device is None and options.serial is None:
+ parser.error('One of --device or --serial is required')
+
+ flowcells = []
+
+ # sanitize args
+ for runfolder in args:
+ flowcell_id = extract_flowcell(runfolder)
+ if flowcell_id is None:
+ parser.error('archive names must look like YYMMDD_MACHINE_RUN_FLOWCELLID.tgz\n'\
+ '(got %s)' % (runfolder,))
+ else:
+ flowcells.append(flowcell_id)
+
+ if options.flowcell is not None:
+ flowcells.append(options.flowcell)
+
+ if len(flowcells) == 0:
+ parser.error('please specify a --flowcell or list of runfolder archives\n'\
+ 'for archival. I need something to do.')
+
+ # Update db records
+ if options.device is not None:
+ serial = get_hd_serial_num(options.device)
+ update_db(root_url, flowcells, serial=serial, debug=options.verbose)
+ elif options.serial is not None:
+ update_db(root_url, flowcells, serial=options.serial, debug=options.verbose)
+ else:
+ msg ="FATAL should not happen error occured; i.e. the best kind!"
+ raise ValueError, msg
+
+
+
+def main():
+ """
+ """
+ parser = construct_parser()
+ process_args(parser)
+
+ #print "Database Updated."
+ sys.exit(0)
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python
+"""
+Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
+
+ * Flowcell ID
+ * run dates
+ * start/stop cycle numbers
+ * Firecrest, bustard, gerald version numbers
+ * Eland analysis types, and everything in the eland configuration file.
+ * cluster numbers and other values from the Summary.htm
+ LaneSpecificParameters table.
+ * How many reads mapped to a genome from an eland file
+
+The ELAND "mapped reads" counter will also check for eland squashed file
+that were symlinked from another directory. This is so I can track how
+many reads landed on the genome of interest and on the spike ins.
+
+Basically my subdirectories something like:
+
+genomes/hg18
+genomes/hg18/chr*.2bpb <- files for hg18 genome
+genomes/hg18/chr*.vld
+genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
+genomes/spikein
+
+runfolder.py can also spit out a simple summary report (-s option)
+that contains the per lane post filter cluster numbers and the mapped
+read counts. (The report isn't currently very pretty)
+"""
+from glob import glob
+import logging
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines import runfolder
+from htsworkflow.pipelines.runfolder import ElementTree
+
+def make_parser():
+ usage = 'usage: %prog [options] runfolder_root_dir'
+ parser = optparse.OptionParser(usage)
+
+ parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+ default=False,
+ help='turn on verbose mode')
+ parser.add_option('--dry-run', action='store_true', default=False,
+ help="Don't delete anything (in clean mode)")
+
+ commands = optparse.OptionGroup(parser, 'Commands')
+
+ commands.add_option('-s', '--summary', dest='summary', action='store_true',
+ default=False,
+ help='produce summary report')
+ commands.add_option('-a', '--archive', dest='archive', action='store_true',
+ default=False,
+ help='generate run configuration archive')
+ commands.add_option('--extract-results', action='store_true',
+ default=False,
+ help='create run-xml summary, compress the eland result files, build srf files and '
+ 'copy all that and the Summary.htm file into an archival directory.')
+ commands.add_option('-c', '--clean', action='store_true', default=False,
+ help='Clean runfolder, preparing it for long-term storage')
+ parser.add_option_group(commands)
+
+ parser.add_option('-j', '--max-jobs', default=1,
+ help='sepcify the maximum number of processes to run '
+ '(used in extract-results)')
+ parser.add_option('-o', '--output-dir', default=None,
+ help="specify the default output directory for extract results")
+ parser.add_option('--run-xml', dest='run_xml',
+ default=None,
+ help='specify a run_<FlowCell>.xml file for summary reports')
+ parser.add_option('--site', default=None,
+ help='create srf files tagged with the provided site name')
+ parser.add_option('-u', '--use-run', dest='use_run', default=None,
+ help='Specify which run to use instead of autoscanning '
+ 'the runfolder. You do this by providing the final '
+ ' GERALD directory, and it assumes the parent '
+ 'directories are the bustard and image processing '
+ 'directories.')
+
+ return parser
+
+def main(cmdlist=None):
+ parser = make_parser()
+ opt, args = parser.parse_args(cmdlist)
+
+ logging.basicConfig()
+ if opt.verbose:
+ root_log = logging.getLogger()
+ root_log.setLevel(logging.INFO)
+
+ logging.info('Starting htsworkflow illumina runfolder processing tool.')
+ runs = []
+ if opt.run_xml:
+ # handle ~ shortcut
+ opt.run_xml = os.path.expanduser(opt.run_xml)
+ tree = ElementTree.parse(opt.run_xml).getroot()
+ runs.append(runfolder.PipelineRun(xml=tree))
+
+ # look for manually specified run
+ if opt.use_run is not None:
+ specific_run = runfolder.get_specific_run(opt.use_run)
+ if specific_run is not None:
+ runs.append(specific_run)
+ else:
+ logging.warn("Couldn't find a run in %s" % (opt.use_run,))
+
+ # scan runfolders for runs
+ for run_pattern in args:
+ # expand args on our own if needed
+ for run_dir in glob(run_pattern):
+ runs.extend(runfolder.get_runs(run_dir))
+
+ if len(runs) > 0:
+ command_run = False
+ if opt.summary:
+ print runfolder.summary_report(runs)
+ command_run = True
+ if opt.archive:
+ runfolder.extract_run_parameters(runs)
+ command_run = True
+ if opt.extract_results:
+ if opt.dry_run:
+ parser.error("Dry-run is not supported for extract-results")
+ runfolder.extract_results(runs,
+ opt.output_dir,
+ opt.site,
+ opt.max_jobs)
+ command_run = True
+ if opt.clean:
+ runfolder.clean_runs(runs, opt.dry_run)
+ command_run = True
+
+ if command_run == False:
+ print "You need to specify a command."+os.linesep
+ parser.print_help()
+ else:
+ print "You need to specify some run folders to process..."+os.linesep
+ parser.print_help()
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.runner import main
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.spoolwatcher import main
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/python
+
+import logging
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines import runfolder
+from htsworkflow.pipelines.srf import make_srf_commands, make_qseq_commands, \
+ run_commands, pathname_to_run_name
+from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
+
+def make_parser():
+ usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
+
+ parser = optparse.OptionParser(usage)
+ parser.add_option('--dry-run', action='store_true',
+ help='print what would be done',
+ default=False)
+
+ parser.add_option('-d', '--dest-dir', dest='dest_dir',
+ help='location to write srf files to',
+ default='.')
+ parser.add_option('-s', '--site',
+ help='site name',
+ default='Individual')
+ parser.add_option('-l', '--lanes', dest='lanes', action="append",
+ default=[],
+ help='comma seperated list of lanes to add to srf'
+ )
+ parser.add_option('-j', '--jobs', default=1, type='int',
+ help='how many jobs to run simultaneously')
+ parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
+ help='Which class of srf file should we attempt to create\n'
+ '0 = Solexa pipeline 0.2.6 - 0.3\n'
+ '1 = illumina pipeline 1.0\n'
+ '2 = illumina pipeline 1.1rc1 and later \n')
+
+ parser.add_option('-v', '--verbose', dest='verbose',
+ default=False, action='store_true',
+ help='report more about internals (INFO)')
+ parser.add_option('--debug', dest='debug',
+ default=False, action='store_true',
+ help='report even more about internals (DEBUG)')
+
+ return parser
+
+def parse_lane_arg(lane_arg):
+ """
+ Convert comma sperated list of lane ids to a list of integers
+ """
+ lanes = []
+ for lane in lane_arg.split(','):
+ try:
+ lane = int(lane)
+ if lane < 1 or lane > 8:
+ parser.error('Lanes must be in range [1..8]')
+ lanes.append(lane)
+ except ValueError:
+ parser.error('Lane selections must be integers')
+ return lanes
+
+def main(cmdline=None):
+ parser = make_parser()
+ opts, args = parser.parse_args(cmdline)
+
+ if opts.debug:
+ logging.basicConfig(level=logging.DEBUG)
+ elif opts.verbose:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig(level=logging.WARNING)
+
+ if len(args) == 0:
+ parser.error('need runfolder arguments')
+
+ # parse lane arguemnts
+ lanes_list = []
+ if len(opts.lanes) == 0:
+ lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
+ elif len(opts.lanes) == len(args):
+ for lane_arg in opts.lanes:
+ lanes_list.append(parse_lane_arg(lane_arg))
+ else:
+ parser.error(
+ "Number of lane arguments must match number of runfolders"
+ )
+
+ make_commands = make_qseq_commands
+ # build list of commands
+ cmds = {}
+ for runfolder_path, lanes in zip(args, lanes_list):
+ # normalize paths, either relative to home dirs or current dir
+ runfolder_path = os.path.abspath(runfolder_path)
+ run_name = pathname_to_run_name(runfolder_path)
+ # so any bustard directories?
+ runs = runfolder.get_runs(runfolder_path)
+ # give up if there are anything other than 1 run
+ if len(runs) > 1:
+ print 'ERROR: Too many run directories in %s' %(runfolder_path,)
+ return 1
+ elif len(runs) == 1:
+ bustard_dir = runs[0].bustard.pathname
+ cmds[bustard_dir] = make_commands(run_name,
+ bustard_dir,
+ lanes,
+ opts.site,
+ opts.dest_dir,
+ opts.runfolder_version)
+ else:
+ print "ERROR: Couldn't find a bustard directory in", runfolder_path
+ return 1
+
+ if not opts.dry_run:
+ for cwd, cmd_list in cmds.items():
+ run_commands(cwd, cmd_list, opts.jobs)
+ else:
+ for cwd, cmd_list in cmds.items():
+ print cwd
+ print cmd_list
+ print 'jobs: ', opts.jobs
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/python
+import sys
+from htsworkflow.pipelines.srf2fastq import main
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
--- /dev/null
+#!/usr/bin/env python
+
+from ConfigParser import SafeConfigParser
+
+import logging
+import os
+from optparse import OptionParser
+import stat
+import shelve
+
+from htsworkflow.util import api
+from htsworkflow.pipelines.sequences import scan_for_sequences
+
+def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
+ """
+ compare our flowcell database with our list of sequences and return
+ a fully populated database
+ """
+ fcdb = shelve.open(fcdb_filename)
+ libdb = {}
+ apidata = {'apiid': apiid, 'apikey': apikey}
+ for seq in sequences:
+ flowcell = seq.flowcell
+ flowcell_info = None
+
+ # get info about flowcell from server or shelf
+ if not fcdb.has_key(flowcell):
+ url = api.flowcell_url(baseurl, flowcell)
+ flowcell_info = api.retrieve_info(url, apidata)
+ if flowcell_info is not None:
+ fcdb[flowcell] = flowcell_info
+ else:
+ flowcell_info = fcdb[flowcell]
+
+ # make library id db
+ if flowcell_info is not None:
+ seq_library_id = flowcell_info['lane_set'][unicode(seq.lane)]['library_id']
+ libdb.setdefault(seq_library_id, []).append(seq)
+
+ fcdb.sync()
+ return fcdb, libdb
+
+def carefully_make_hardlink(source, destination, dry_run=False):
+ """
+ Make a hard link, failing if a different link already exists
+
+ Checking to see if the link already exists and is
+ the same as the link we want to make.
+ If the link already exists and is different, throw an error.
+
+ If we didn't update anything return 0, if we did update
+ return 1.
+ """
+ logging.debug("CHECKING: %s -> %s", source, destination)
+
+ if not os.path.exists(source):
+ logging.warning("%s doesn't exist", source)
+ return 0
+
+ if os.path.exists(destination):
+ if os.path.samefile(source, destination):
+ logging.debug('SAME: %s -> %s' % (source, destination))
+ return 0
+ else:
+ logging.error('%s and %s are different files, skipping' % \
+ (source, destination))
+ return 0
+ logging.debug('Linking: %s -> %s' % (source, destination))
+
+ # we would do something by this part
+ if dry_run: return 1
+
+ os.link(source, destination)
+ os.chmod(destination,
+ stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
+ return 1
+
+def make_library_links(root, library_db, dry_run=False):
+ """
+ Make a tree of sequencer roots organized by library id
+
+ Root is the root of the library tree
+ library_db is a dictionary of SequenceFiles organized by library id
+ """
+ count = 0
+ root = os.path.abspath(root)
+ for lib_id, sequences in library_db.items():
+ target_dir = os.path.join(root, lib_id)
+ if not os.path.exists(target_dir):
+ logging.info("mkdir %s" % (target_dir,))
+ if not dry_run:
+ os.mkdir(target_dir)
+
+ for s in sequences:
+ count += carefully_make_hardlink(s.path,
+ s.make_target_name(target_dir),
+ dry_run=dry_run)
+ return count
+
+def configure_logging(opts):
+ # setup logging
+ level = logging.WARN
+ if opts.verbose:
+ level = logging.INFO
+ if opts.debug:
+ level = logging.DEBUG
+ logging.basicConfig(level=level)
+
+
+def configure_opts(opts):
+ """
+ Load in options from config file
+ """
+ SECTION_NAME = 'sequence_archive'
+ ARCHIVE_OPT = 'sequence_archive'
+ CACHE_OPT = 'cache'
+ HOST_OPT = 'host'
+ APIID_OPT = 'apiid'
+ APIKEY_OPT = 'apikey'
+
+ # figure out what config file to read
+ config_path = [os.path.expanduser('~/.htsworkflow.ini'),
+ '/etc/htsworkflow.ini']
+ if opts.config is not None:
+ config_path = [opts.config]
+ # parse options from config file
+ config_file = SafeConfigParser()
+ config_file.read(config_path)
+
+ # load defaults from config file if not overriden by the command line
+ if opts.cache is None:
+ if config_file.has_option(SECTION_NAME, CACHE_OPT):
+ opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
+ else:
+ opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
+
+ if opts.sequence_archive is None and \
+ config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
+ opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
+ opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
+
+ opts.sequence_archive = os.path.abspath(opts.sequence_archive)
+ opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
+ opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
+ opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
+
+ if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
+ opts.host = config_file.get(SECTION_NAME, HOST_OPT)
+
+ if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
+ opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
+
+ if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
+ opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
+
+ return opts
+
+def make_parser():
+ """
+ Make parser
+ """
+ parser = OptionParser()
+ parser.add_option('-c', '--config', default=None,
+ help='path to a configuration file containing a '
+ 'sequence archive section')
+ parser.add_option('--cache', default=None,
+ help="default flowcell cache")
+
+ parser.add_option('--host', default=None,
+ help="specify http://host for quering flowcell information")
+ parser.add_option('--apiid', default=None,
+ help="API ID to use when retriving information")
+ parser.add_option("--apikey", default=None,
+ help="API Key for when retriving information")
+
+ parser.add_option('-a', '--sequence-archive', default=None,
+ help='path to where the sequence archive lives')
+
+ parser.add_option('-v', '--verbose', action='store_true', default=False,
+ help='be more verbose')
+ parser.add_option('-d', '--debug', action='store_true', default=False,
+ help='report everything')
+
+ parser.add_option("--dry-run", dest="dry_run", action="store_true",
+ default=False,
+ help="Don't modify the filesystem")
+ return parser
+
+def main(cmdline=None):
+ parser = make_parser()
+ opts, args = parser.parse_args(cmdline)
+
+ configure_logging(opts)
+ opts = configure_opts(opts)
+
+ # complain if critical things are missing
+ if opts.cache is None:
+ parser.error('Need location of htsworkflow frontend database')
+
+ if opts.sequence_archive is None:
+ parser.error('Need the root path for the sequence archive')
+
+ seq_dirs = [ opts.flowcells, opts.srfs ]
+ if len(args) > 0:
+ seq_dirs = [os.path.abspath(f) for f in args]
+
+ seqs = scan_for_sequences(seq_dirs)
+ fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
+ updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
+
+ logging.warn("%s flowcells in database" % (len(fcdb),))
+ logging.warn("found %s sequence files" % (len(seqs),))
+ logging.warn("%s libraries being checked" % (len(libdb),))
+ logging.warn("%s sequence files were linked" % (updates,))
+
+ return 0
+
+if __name__ == "__main__":
+ main()
+++ /dev/null
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-from optparse import OptionParser
-import sys
-
-from htsworkflow.util import fctracker
-
-def make_parser():
- """
- Make parser
- """
- parser = OptionParser()
- parser.add_option("-d", "--database", dest="database",
- help="path to the fctracker.db",
- default=None)
- parser.add_option("-w", "--where", dest="where",
- help="add a where clause",
- default=None)
- return parser
-
-def main(argv=None):
- if argv is None:
- argv = []
- parser = make_parser()
-
- opt, args = parser.parse_args(argv)
-
- fc = fctracker.fctracker(opt.database)
- cells = fc._get_flowcells(opt.where)
-
- print fctracker.recoverable_drive_report(cells)
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-
-from ConfigParser import SafeConfigParser
-
-import logging
-import os
-from optparse import OptionParser
-import stat
-import shelve
-
-from htsworkflow.util import api
-from htsworkflow.pipelines.sequences import scan_for_sequences
-
-def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
- """
- compare our flowcell database with our list of sequences and return
- a fully populated database
- """
- fcdb = shelve.open(fcdb_filename)
- libdb = {}
- apidata = {'apiid': apiid, 'apikey': apikey}
- for seq in sequences:
- flowcell = seq.flowcell
- flowcell_info = None
-
- # get info about flowcell from server or shelf
- if not fcdb.has_key(flowcell):
- url = api.flowcell_url(baseurl, flowcell)
- flowcell_info = api.retrieve_info(url, apidata)
- if flowcell_info is not None:
- fcdb[flowcell] = flowcell_info
- else:
- flowcell_info = fcdb[flowcell]
-
- # make library id db
- if flowcell_info is not None:
- seq_library_id = flowcell_info['lane_set'][unicode(seq.lane)]['library_id']
- libdb.setdefault(seq_library_id, []).append(seq)
-
- fcdb.sync()
- return fcdb, libdb
-
-def carefully_make_hardlink(source, destination, dry_run=False):
- """
- Make a hard link, failing if a different link already exists
-
- Checking to see if the link already exists and is
- the same as the link we want to make.
- If the link already exists and is different, throw an error.
-
- If we didn't update anything return 0, if we did update
- return 1.
- """
- logging.debug("CHECKING: %s -> %s", source, destination)
-
- if not os.path.exists(source):
- logging.warning("%s doesn't exist", source)
- return 0
-
- if os.path.exists(destination):
- if os.path.samefile(source, destination):
- logging.debug('SAME: %s -> %s' % (source, destination))
- return 0
- else:
- logging.error('%s and %s are different files, skipping' % \
- (source, destination))
- return 0
- logging.debug('Linking: %s -> %s' % (source, destination))
-
- # we would do something by this part
- if dry_run: return 1
-
- os.link(source, destination)
- os.chmod(destination,
- stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
- return 1
-
-def make_library_links(root, library_db, dry_run=False):
- """
- Make a tree of sequencer roots organized by library id
-
- Root is the root of the library tree
- library_db is a dictionary of SequenceFiles organized by library id
- """
- count = 0
- root = os.path.abspath(root)
- for lib_id, sequences in library_db.items():
- target_dir = os.path.join(root, lib_id)
- if not os.path.exists(target_dir):
- logging.info("mkdir %s" % (target_dir,))
- if not dry_run:
- os.mkdir(target_dir)
-
- for s in sequences:
- count += carefully_make_hardlink(s.path,
- s.make_target_name(target_dir),
- dry_run=dry_run)
- return count
-
-def configure_logging(opts):
- # setup logging
- level = logging.WARN
- if opts.verbose:
- level = logging.INFO
- if opts.debug:
- level = logging.DEBUG
- logging.basicConfig(level=level)
-
-
-def configure_opts(opts):
- """
- Load in options from config file
- """
- SECTION_NAME = 'sequence_archive'
- ARCHIVE_OPT = 'sequence_archive'
- CACHE_OPT = 'cache'
- HOST_OPT = 'host'
- APIID_OPT = 'apiid'
- APIKEY_OPT = 'apikey'
-
- # figure out what config file to read
- config_path = [os.path.expanduser('~/.htsworkflow.ini'),
- '/etc/htsworkflow.ini']
- if opts.config is not None:
- config_path = [opts.config]
- # parse options from config file
- config_file = SafeConfigParser()
- config_file.read(config_path)
-
- # load defaults from config file if not overriden by the command line
- if opts.cache is None:
- if config_file.has_option(SECTION_NAME, CACHE_OPT):
- opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
- else:
- opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
-
- if opts.sequence_archive is None and \
- config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
- opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
- opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
-
- opts.sequence_archive = os.path.abspath(opts.sequence_archive)
- opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
- opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
- opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
-
- if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
- opts.host = config_file.get(SECTION_NAME, HOST_OPT)
-
- if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
- opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
-
- if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
- opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
-
- return opts
-
-def make_parser():
- """
- Make parser
- """
- parser = OptionParser()
- parser.add_option('-c', '--config', default=None,
- help='path to a configuration file containing a '
- 'sequence archive section')
- parser.add_option('--cache', default=None,
- help="default flowcell cache")
-
- parser.add_option('--host', default=None,
- help="specify http://host for quering flowcell information")
- parser.add_option('--apiid', default=None,
- help="API ID to use when retriving information")
- parser.add_option("--apikey", default=None,
- help="API Key for when retriving information")
-
- parser.add_option('-a', '--sequence-archive', default=None,
- help='path to where the sequence archive lives')
-
- parser.add_option('-v', '--verbose', action='store_true', default=False,
- help='be more verbose')
- parser.add_option('-d', '--debug', action='store_true', default=False,
- help='report everything')
-
- parser.add_option("--dry-run", dest="dry_run", action="store_true",
- default=False,
- help="Don't modify the filesystem")
- return parser
-
-def main(cmdline=None):
- parser = make_parser()
- opts, args = parser.parse_args(cmdline)
-
- configure_logging(opts)
- opts = configure_opts(opts)
-
- # complain if critical things are missing
- if opts.cache is None:
- parser.error('Need location of htsworkflow frontend database')
-
- if opts.sequence_archive is None:
- parser.error('Need the root path for the sequence archive')
-
- seq_dirs = [ opts.flowcells, opts.srfs ]
- if len(args) > 0:
- seq_dirs = [os.path.abspath(f) for f in args]
-
- seqs = scan_for_sequences(seq_dirs)
- fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
- updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
-
- logging.warn("%s flowcells in database" % (len(fcdb),))
- logging.warn("found %s sequence files" % (len(seqs),))
- logging.warn("%s libraries being checked" % (len(libdb),))
- logging.warn("%s sequence files were linked" % (updates,))
-
- return 0
-
-if __name__ == "__main__":
- main()
+++ /dev/null
-#!/usr/bin/python
-import optparse
-import sys
-import os
-
-from htsworkflow.util.opener import autoopen
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
-
-def make_parser():
- parser = optparse.OptionParser()
- parser.add_option('-e', '--eland', dest='inname',
- help='specify input eland filename')
- parser.add_option('-b', '--bed', dest='outname',
- help='specify output befilename')
- parser.add_option('-n', '--name', dest='name',
- help='specify the track (short) name.',
- default=None)
- parser.add_option('-d', '--description', dest='description',
- help='specify the track description',
- default=None)
- parser.add_option('--chromosome', dest='prefix',
- help='Set the chromosome prefix name. defaults to "chr"',
- default='chr')
- parser.add_option("--database", dest='database',
- help="specify location of fctracker database",
- default=None)
- parser.add_option("--flowcell", dest='flowcell',
- help="compute name and description from database using flowcell id",
- default=None)
- parser.add_option("--lane", dest='lane',
- help='specify which lane to use when retrieving description from database',
- default=None)
-
- multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
-
- multi.add_option('-m', '--multi', action='store_true',
- help='Enable parsing multi-read eland files',
- default=False)
- multi.add_option('--reads', type='int',
- help='limit reporting multi reads to this many reads'
- '(most usefully --reads=1 will turn a multi-read '
- 'file into a single read file)',
- default=255)
- parser.add_option_group(multi)
-
- return parser
-
-def main(command_line=None):
- instream = None
- outstream = None
-
- if command_line is None:
- command_line = sys.argv[1:]
-
- parser = make_parser()
- (options, args) = parser.parse_args(command_line)
-
- if options.inname is None:
- parser.error("Need eland input file name")
- return 1
-
- if options.inname == '-':
- instream = sys.stdin
- elif os.path.exists(options.inname):
- instream = autoopen(options.inname, 'r')
- else:
- parser.error('%s was not found' % (options.inname))
- return 1
-
- # figure out name for output file
- if options.outname is None:
- # if outname wasn't defined, and we're reading from stdout
- if instream is sys.stdin:
- # write to stdout
- outstream = sys.stdout
- else:
- # if there's a name write to name.bed
- options.outname = os.path.splitext(options.inname)[0]+'.bed'
- print >>sys.stderr, "defaulting to outputname", options.outname
- elif options.outname == '-':
- outstream = sys.stdout
-
- if outstream is None:
- if os.path.exists(options.outname):
- parser.error("not overwriting %s" % (options.outname))
- return 1
- else:
- outstream = open(options.outname, 'w')
-
- if options.flowcell is not None and options.lane is not None:
- # get our name/description out of the database
- name, description = make_description(
- options.database, options.flowcell, options.lane
- )
- else:
- name = options.name
- description = options.description
-
- if options.multi:
- make_bed_from_multi_eland_stream(instream, outstream,
- name, description,
- options.prefix,
- options.reads)
-
- else:
- make_bed_from_eland_stream(instream, outstream,
- name, description,
- options.prefix)
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-
-from htsworkflow.util.hdquery import get_hd_serial_num
-from htsworkflow.frontend import settings
-
-from optparse import OptionParser
-import os
-import re
-import sys
-import urllib2
-import urlparse
-
-runfolder_pattern = re.compile(r'[0-9]{6}_[-A-Za-z\d]+_\d+_(?P<flowcell>[A-Z\d]+)\.tgz')
-
-def extract_flowcell(runfolder_name):
- path, basename = os.path.split(runfolder_name)
- match = runfolder_pattern.match(basename)
- if match is not None:
- return match.group('flowcell')
- else:
- return None
-
-def construct_parser():
- """
- """
- msg = "usage: %prog [-d </dev/sdX> | -s <serial_number] [-f <flowcell>] [archived dirs]"
- parser = OptionParser()
- parser.add_option('-u', '--url', default=None,
- help="Alternate url for marking archived flowcells")
- parser.add_option("-f", "--flowcell", type="string", help="flowcell being archived")
- parser.add_option("-d", "--device", type="string",
- help="device flowcell is being archived to")
- parser.add_option("-s", "--serial", type="string", help="serial num. of archive device")
- parser.add_option("-v", "--verbose", action="store_true", default=False)
-
- return parser
-
-
-def update_db(root_url, flowcells, serial, debug=False):
- """
- Creates link between flowcell and storage device over http
- """
- for fc in flowcells:
- url = urlparse.urljoin(root_url, '%s/%s/' % (fc, serial))
-
- req = urllib2.Request(url)
- try:
- response = urllib2.urlopen(req)
- except urllib2.URLError, e:
- print 'ERROR - HTTP OUTPUT (Return Code: %s); use -v/--verbose for more details.' % (e.code)
- if debug:
- print e.read()
- sys.exit(e.code)
-
- print "DB Update of %s & %s succeeded" % (fc, serial)
- print response.read()
-
-
-def process_args(parser):
- """
- returns flowcell and serial#
- """
- options, args = parser.parse_args()
-
- msg = []
-
- # Only provide device or serial
- if options.device is not None and options.serial is not None:
- parser.error("Please provide only --device or --serial.\n"\
- "The serial number is extracted automatically if the"\
- "device is provided.")
-
- # allow user to override the default destination URL
- if options.url is not None:
- root_url = options.url
- else:
- root_url = settings.LINK_FLOWCELL_STORAGE_DEVICE_URL
-
- # if device and serial missing:
- if options.device is None and options.serial is None:
- parser.error('One of --device or --serial is required')
-
- flowcells = []
-
- # sanitize args
- for runfolder in args:
- flowcell_id = extract_flowcell(runfolder)
- if flowcell_id is None:
- parser.error('archive names must look like YYMMDD_MACHINE_RUN_FLOWCELLID.tgz\n'\
- '(got %s)' % (runfolder,))
- else:
- flowcells.append(flowcell_id)
-
- if options.flowcell is not None:
- flowcells.append(options.flowcell)
-
- if len(flowcells) == 0:
- parser.error('please specify a --flowcell or list of runfolder archives\n'\
- 'for archival. I need something to do.')
-
- # Update db records
- if options.device is not None:
- serial = get_hd_serial_num(options.device)
- update_db(root_url, flowcells, serial=serial, debug=options.verbose)
- elif options.serial is not None:
- update_db(root_url, flowcells, serial=options.serial, debug=options.verbose)
- else:
- msg ="FATAL should not happen error occured; i.e. the best kind!"
- raise ValueError, msg
-
-
-
-def main():
- """
- """
- parser = construct_parser()
- process_args(parser)
-
- #print "Database Updated."
- sys.exit(0)
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-#!/usr/bin/python
-import sys
-from htsworkflow.pipelines.qseq2fastq import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import logging
-import sys
-from htsworkflow.pipelines.retrieve_config import *
-from htsworkflow.pipelines import retrieve_config
-
-#Turn on built-in command-line parsing.
-retrieve_config.DISABLE_CMDLINE = False
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv
-
- #Display help if no args are presented
- options = getCombinedOptions(argv)
-
- if options.verbose:
- logging.basicConfig(level=logging.DEBUG)
- else:
- logging.basicConfig(level=logging.INFO)
-
- msg_list = ['ERROR MESSAGES:']
- if options.flowcell is None:
- msg_list.append(" Flow cell argument required. -f <flowcell> or --flowcell=<flowcell>")
-
- if options.url is None:
- msg_list.append(" URL argument required (-u <url> or --url=<url>), or entry\n" \
- " in /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
- if options.genome_dir is None:
- msg_list.append(" genome_dir argument required (-g <genome_dir> or \n" \
- " --genome_dir=<genome_dir>, or entry in \n" \
- " /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
-
- if len(msg_list) > 1:
- print '\n'.join(msg_list)
- return 1
-
- saveConfigFile(options)
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-"""
-Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
-
- * Flowcell ID
- * run dates
- * start/stop cycle numbers
- * Firecrest, bustard, gerald version numbers
- * Eland analysis types, and everything in the eland configuration file.
- * cluster numbers and other values from the Summary.htm
- LaneSpecificParameters table.
- * How many reads mapped to a genome from an eland file
-
-The ELAND "mapped reads" counter will also check for eland squashed file
-that were symlinked from another directory. This is so I can track how
-many reads landed on the genome of interest and on the spike ins.
-
-Basically my subdirectories something like:
-
-genomes/hg18
-genomes/hg18/chr*.2bpb <- files for hg18 genome
-genomes/hg18/chr*.vld
-genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
-genomes/spikein
-
-runfolder.py can also spit out a simple summary report (-s option)
-that contains the per lane post filter cluster numbers and the mapped
-read counts. (The report isn't currently very pretty)
-"""
-from glob import glob
-import logging
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines import runfolder
-from htsworkflow.pipelines.runfolder import ElementTree
-
-def make_parser():
- usage = 'usage: %prog [options] runfolder_root_dir'
- parser = optparse.OptionParser(usage)
-
- parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
- default=False,
- help='turn on verbose mode')
- parser.add_option('--dry-run', action='store_true', default=False,
- help="Don't delete anything (in clean mode)")
-
- commands = optparse.OptionGroup(parser, 'Commands')
-
- commands.add_option('-s', '--summary', dest='summary', action='store_true',
- default=False,
- help='produce summary report')
- commands.add_option('-a', '--archive', dest='archive', action='store_true',
- default=False,
- help='generate run configuration archive')
- commands.add_option('--extract-results', action='store_true',
- default=False,
- help='create run-xml summary, compress the eland result files, build srf files and '
- 'copy all that and the Summary.htm file into an archival directory.')
- commands.add_option('-c', '--clean', action='store_true', default=False,
- help='Clean runfolder, preparing it for long-term storage')
- parser.add_option_group(commands)
-
- parser.add_option('-j', '--max-jobs', default=1,
- help='sepcify the maximum number of processes to run '
- '(used in extract-results)')
- parser.add_option('-o', '--output-dir', default=None,
- help="specify the default output directory for extract results")
- parser.add_option('--run-xml', dest='run_xml',
- default=None,
- help='specify a run_<FlowCell>.xml file for summary reports')
- parser.add_option('--site', default=None,
- help='create srf files tagged with the provided site name')
- parser.add_option('-u', '--use-run', dest='use_run', default=None,
- help='Specify which run to use instead of autoscanning '
- 'the runfolder. You do this by providing the final '
- ' GERALD directory, and it assumes the parent '
- 'directories are the bustard and image processing '
- 'directories.')
-
- return parser
-
-def main(cmdlist=None):
- parser = make_parser()
- opt, args = parser.parse_args(cmdlist)
-
- logging.basicConfig()
- if opt.verbose:
- root_log = logging.getLogger()
- root_log.setLevel(logging.INFO)
-
- logging.info('Starting htsworkflow illumina runfolder processing tool.')
- runs = []
- if opt.run_xml:
- # handle ~ shortcut
- opt.run_xml = os.path.expanduser(opt.run_xml)
- tree = ElementTree.parse(opt.run_xml).getroot()
- runs.append(runfolder.PipelineRun(xml=tree))
-
- # look for manually specified run
- if opt.use_run is not None:
- specific_run = runfolder.get_specific_run(opt.use_run)
- if specific_run is not None:
- runs.append(specific_run)
- else:
- logging.warn("Couldn't find a run in %s" % (opt.use_run,))
-
- # scan runfolders for runs
- for run_pattern in args:
- # expand args on our own if needed
- for run_dir in glob(run_pattern):
- runs.extend(runfolder.get_runs(run_dir))
-
- if len(runs) > 0:
- command_run = False
- if opt.summary:
- print runfolder.summary_report(runs)
- command_run = True
- if opt.archive:
- runfolder.extract_run_parameters(runs)
- command_run = True
- if opt.extract_results:
- if opt.dry_run:
- parser.error("Dry-run is not supported for extract-results")
- runfolder.extract_results(runs,
- opt.output_dir,
- opt.site,
- opt.max_jobs)
- command_run = True
- if opt.clean:
- runfolder.clean_runs(runs, opt.dry_run)
- command_run = True
-
- if command_run == False:
- print "You need to specify a command."+os.linesep
- parser.print_help()
- else:
- print "You need to specify some run folders to process..."+os.linesep
- parser.print_help()
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.runner import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.spoolwatcher import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/python
-
-import logging
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines import runfolder
-from htsworkflow.pipelines.srf import make_srf_commands, make_qseq_commands, \
- run_commands, pathname_to_run_name
-from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
-
-def make_parser():
- usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
-
- parser = optparse.OptionParser(usage)
- parser.add_option('--dry-run', action='store_true',
- help='print what would be done',
- default=False)
-
- parser.add_option('-d', '--dest-dir', dest='dest_dir',
- help='location to write srf files to',
- default='.')
- parser.add_option('-s', '--site',
- help='site name',
- default='Individual')
- parser.add_option('-l', '--lanes', dest='lanes', action="append",
- default=[],
- help='comma seperated list of lanes to add to srf'
- )
- parser.add_option('-j', '--jobs', default=1, type='int',
- help='how many jobs to run simultaneously')
- parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
- help='Which class of srf file should we attempt to create\n'
- '0 = Solexa pipeline 0.2.6 - 0.3\n'
- '1 = illumina pipeline 1.0\n'
- '2 = illumina pipeline 1.1rc1 and later \n')
-
- parser.add_option('-v', '--verbose', dest='verbose',
- default=False, action='store_true',
- help='report more about internals (INFO)')
- parser.add_option('--debug', dest='debug',
- default=False, action='store_true',
- help='report even more about internals (DEBUG)')
-
- return parser
-
-def parse_lane_arg(lane_arg):
- """
- Convert comma sperated list of lane ids to a list of integers
- """
- lanes = []
- for lane in lane_arg.split(','):
- try:
- lane = int(lane)
- if lane < 1 or lane > 8:
- parser.error('Lanes must be in range [1..8]')
- lanes.append(lane)
- except ValueError:
- parser.error('Lane selections must be integers')
- return lanes
-
-def main(cmdline=None):
- parser = make_parser()
- opts, args = parser.parse_args(cmdline)
-
- if opts.debug:
- logging.basicConfig(level=logging.DEBUG)
- elif opts.verbose:
- logging.basicConfig(level=logging.INFO)
- else:
- logging.basicConfig(level=logging.WARNING)
-
- if len(args) == 0:
- parser.error('need runfolder arguments')
-
- # parse lane arguemnts
- lanes_list = []
- if len(opts.lanes) == 0:
- lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
- elif len(opts.lanes) == len(args):
- for lane_arg in opts.lanes:
- lanes_list.append(parse_lane_arg(lane_arg))
- else:
- parser.error(
- "Number of lane arguments must match number of runfolders"
- )
-
- make_commands = make_qseq_commands
- # build list of commands
- cmds = {}
- for runfolder_path, lanes in zip(args, lanes_list):
- # normalize paths, either relative to home dirs or current dir
- runfolder_path = os.path.abspath(runfolder_path)
- run_name = pathname_to_run_name(runfolder_path)
- # so any bustard directories?
- runs = runfolder.get_runs(runfolder_path)
- # give up if there are anything other than 1 run
- if len(runs) > 1:
- print 'ERROR: Too many run directories in %s' %(runfolder_path,)
- return 1
- elif len(runs) == 1:
- bustard_dir = runs[0].bustard.pathname
- cmds[bustard_dir] = make_commands(run_name,
- bustard_dir,
- lanes,
- opts.site,
- opts.dest_dir,
- opts.runfolder_version)
- else:
- print "ERROR: Couldn't find a bustard directory in", runfolder_path
- return 1
-
- if not opts.dry_run:
- for cwd, cmd_list in cmds.items():
- run_commands(cwd, cmd_list, opts.jobs)
- else:
- for cwd, cmd_list in cmds.items():
- print cwd
- print cmd_list
- print 'jobs: ', opts.jobs
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/python
-import sys
-from htsworkflow.pipelines.srf2fastq import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
"htsworkflow.frontend.samples",
],
scripts=[
- 'scripts/copier',
- 'scripts/library.py',
- 'scripts/makebed',
- 'scripts/make-library-tree',
- 'scripts/mark_archived_data',
- 'scripts/qseq2fastq',
- 'scripts/retrieve_config',
- 'scripts/runfolder',
- 'scripts/runner',
- 'scripts/spoolwatcher',
- 'scripts/srf',
- 'scripts/srf2fastq'
+ "scripts/htsw-copier",
+ "scripts/htsw-eland2bed",
+ "scripts/htsw-elandseq",
+ "scripts/htsw-gerald2bed",
+ "scripts/htsw-get-config",
+ "scripts/htsw-qseq2fastq",
+ "scripts/htsw-record-runfolder",
+ "scripts/htsw-runfolder",
+ "scripts/htsw-runner",
+ "scripts/htsw-spoolwatcher",
+ "scripts/htsw-srf",
+ "scripts/htsw-srf2fastq",
+ "scripts/htsw-update-archive",
],
)