Rename all the scripts to start with htsw-
authorDiane Trout <diane@caltech.edu>
Tue, 4 Jan 2011 21:29:27 +0000 (13:29 -0800)
committerDiane Trout <diane@caltech.edu>
Tue, 4 Jan 2011 21:29:27 +0000 (13:29 -0800)
In preparation for making a debian package I thought
I should try to limit my pollution of the command namespace.

so now everything that's actually an installed script starts
with htsw-. There's one left-over that still might be useful
in the future but isn't currently that was left with its old
name.

29 files changed:
scripts/configure_pipeline [deleted file]
scripts/copier [deleted file]
scripts/elandseq [deleted file]
scripts/gerald2bed.py [deleted file]
scripts/htsw-copier [new file with mode: 0755]
scripts/htsw-eland2bed [new file with mode: 0755]
scripts/htsw-elandseq [new file with mode: 0755]
scripts/htsw-gerald2bed [new file with mode: 0755]
scripts/htsw-get-config [new file with mode: 0755]
scripts/htsw-qseq2fastq [new file with mode: 0755]
scripts/htsw-record-runfolder [new file with mode: 0755]
scripts/htsw-runfolder [new file with mode: 0755]
scripts/htsw-runner [new file with mode: 0755]
scripts/htsw-spoolwatcher [new file with mode: 0755]
scripts/htsw-srf [new file with mode: 0755]
scripts/htsw-srf2fastq [new file with mode: 0755]
scripts/htsw-update-archive [new file with mode: 0755]
scripts/library.py [deleted file]
scripts/make-library-tree [deleted file]
scripts/makebed [deleted file]
scripts/mark_archived_data [deleted file]
scripts/qseq2fastq [deleted file]
scripts/retrieve_config [deleted file]
scripts/runfolder [deleted file]
scripts/runner [deleted file]
scripts/spoolwatcher [deleted file]
scripts/srf [deleted file]
scripts/srf2fastq [deleted file]
setup.py

diff --git a/scripts/configure_pipeline b/scripts/configure_pipeline
deleted file mode 100644 (file)
index 0251337..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-import os
-import sys
-import re
-from htsworkflow.pipelines.configure_run import *
-from htsworkflow.pipelines import retrieve_config as _rc
-from htsworkflow.pipelines.run_status import startCmdLineStatusMonitor
-
-s_fc = re.compile('FC[0-9]+')
-
-#Turn on built-in command-line parsing.
-_rc.DISABLE_CMDLINE = False
-
-GENOME_DIR = '/data-store01/compbio/genomes/'
-
-
-
-def main(args=None):
-  ci = ConfigInfo()
-  ci.analysis_dir = os.getcwd()
-  ci.base_analysis_dir, junk = os.path.split(ci.analysis_dir)
-
-  #FIXME: make a better command line tool
-  skip_retrieve_config = False
-  if len(args) == 1:
-    arg = args[0]
-
-    #If FC##### found
-    if s_fc.search(arg):
-      cfg_filepath = os.path.abspath('config32auto.txt')
-      flowcell = arg
-    #else, config file provide
-    else:
-      cfg_filepath = os.path.abspath(args[0])
-      skip_retrieve_config = True
-  else:
-    print "usage:\n" \
-          "       configure_pipeline FC#####\n" \
-          " or:\n" \
-          "       configure_pipeline <conf_filepath>\n"
-    return 3
-
-  genome_dir = GENOME_DIR
-
-  if not skip_retrieve_config:
-    status_retrieve_cfg = retrieve_config(ci, flowcell, cfg_filepath, genome_dir)
-    if status_retrieve_cfg:
-      print "Retrieve config file successful"
-    else:
-      print "Failed to retrieve config file"
-  else:
-    print "Config file %s provided from command-line" % (cfg_filepath)
-    ci.config_filepath = cfg_filepath
-    status_retrieve_cfg = True
-  
-  if status_retrieve_cfg:
-    status = configure(ci)
-    if status:
-      print "Configure success"
-    else:
-      print "Configure failed"
-    
-    print 'Run Dir:', ci.run_path
-    print 'Bustard Dir:', ci.bustard_path
-    
-    if status:
-      # Setup status cmdline status monitor
-      startCmdLineStatusMonitor(ci)
-      
-      print 'Running pipeline now!'
-      run_status = run_pipeline(ci)
-      if run_status is True:
-        print 'Pipeline ran successfully.'
-        return 0
-      else:
-        print 'Pipeline run failed.'
-        return 1
-
-    return 2
-
-if __name__ == "__main__":
-  logging.basicConfig(level=logging.DEBUG,
-                    format='%(asctime)s %(levelname)-8s %(message)s',
-                    datefmt='%a, %d %b %Y %H:%M:%S',
-                    #filename='pipeline_main.log',
-                    filemode='w')
-
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/copier b/scripts/copier
deleted file mode 100644 (file)
index 9338b07..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.copier import main
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/elandseq b/scripts/elandseq
deleted file mode 100755 (executable)
index 6a5178c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines.eland import extract_eland_sequence
-
-def make_parser():
-  usage = "usage: %prog [options] infile [outfile]"
-
-  parser = optparse.OptionParser(usage)
-  parser.add_option("-e", "--extract", dest="slice",
-    default=":",
-    help="provide a python slice operator to select a portion of an eland file")
-  return parser
-
-def main(argv):
-  parser = make_parser()
-
-  (opt, args) = parser.parse_args(argv)
-
-  if len(args) not in (0, 1, 2):
-    parser.error('incorrect number of arguments')
-
-  # get our slice coordinates
-  start, end = opt.slice.split(':')
-  if len(start) > 0:
-    start = int(start)
-  else:
-    start = None
-  if len(end) > 0:
-    end = int(end)
-  else:
-    end = None
-
-  # open infile
-  if len(args) > 0:
-    instream = open(args[0],'r')
-  else:
-    instream = sys.stdin
-
-  if len(args) > 1:
-    outstream = open(args[1],'w')
-  else:
-    outstream = sys.stdout
-
-  extract_eland_sequence(instream, outstream, start, end)
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/gerald2bed.py b/scripts/gerald2bed.py
deleted file mode 100644 (file)
index 7a726e7..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/python
-"""
-Convert a group of eland_result files from a sequencer run to bed files.
-"""
-from glob import glob
-import logging
-import optparse
-import sys
-import os
-
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
-
-def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
-    """
-    convert s_[1-8]_eland_result.txt to corresponding bed files
-    """
-    eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
-    out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
-    if len(out_files) > 0:
-        raise RuntimeError("please move old bedfiles")
-
-    logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
-    for pathname in eland_files:
-        path, name = os.path.split(pathname)
-        lane = int(name[2])
-        outname = 's_%d_eland_result.bed' %(lane,)
-        logging.info('Converting lane %d to %s' % (lane, outname))
-
-        outpathname = os.path.join(eland_dir, outname)
-        # look up descriptions
-        bed_name, description = make_description(database, flowcell, lane)
-
-        # open files
-        instream = open(pathname,'r')
-        outstream = open(outpathname,'w')
-
-        make_bed_from_eland_stream(
-          instream, outstream, name, description, prefix
-        )
-
-def make_parser():
-  usage = """%prog: --flowcell <flowcell id> directory_name
-
-directory should contain a set of 8 eland result files named like
-s_[12345678]_eland_result.txt"""
-
-
-  parser = optparse.OptionParser(usage)
-
-  parser.add_option('-o', '--output', dest='output',
-                    help="destination directory for our bed files" \
-                         "defaults to eland directory",
-                    default=None)
-  parser.add_option('--chromosome', dest='prefix',
-                    help='Set the chromosome prefix name. defaults to "chr"',
-                    default='chr')
-  parser.add_option("--database", dest='database',
-                    help="specify location of fctracker database",
-                    default=None)
-  parser.add_option("--flowcell", dest='flowcell',
-                    help="specify the flowcell id for this run",
-                    default=None)
-  parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
-                    help='increase verbosity',
-                    default=False)
-  return parser
-
-def main(command_line=None):
-    logging.basicConfig(level=logging.WARNING)
-    if command_line is None:
-        command_line = sys.argv[1:]
-
-    parser = make_parser()
-    (opts, args) = parser.parse_args(command_line)
-
-    if len(args) != 1:
-        parser.error('Directory name required')
-
-    eland_dir = args[0]
-    if not os.path.isdir(eland_dir):
-        parser.error('%s must be a directory' % (eland_dir,))
-
-    if opts.flowcell is None:
-        parser.error('Flowcell ID required')
-
-    if opts.verbose:
-        logger = logging.getLogger()
-        logger.setLevel(logging.INFO)
-
-    make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
-
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/htsw-copier b/scripts/htsw-copier
new file mode 100755 (executable)
index 0000000..9338b07
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.copier import main
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-eland2bed b/scripts/htsw-eland2bed
new file mode 100755 (executable)
index 0000000..577b868
--- /dev/null
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+import optparse
+import sys
+import os
+
+from htsworkflow.util.opener import autoopen
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
+
+def make_parser():
+  parser = optparse.OptionParser()
+  parser.add_option('-e', '--eland', dest='inname',
+                    help='specify input eland filename')
+  parser.add_option('-b', '--bed', dest='outname',
+                    help='specify output befilename')
+  parser.add_option('-n', '--name', dest='name',
+                    help='specify the track (short) name.',
+                    default=None)
+  parser.add_option('-d', '--description', dest='description',
+                    help='specify the track description',
+                    default=None)
+  parser.add_option('--chromosome', dest='prefix',
+                    help='Set the chromosome prefix name. defaults to "chr"',
+                    default='chr')
+  parser.add_option("--database", dest='database',
+                    help="specify location of fctracker database",
+                    default=None)
+  parser.add_option("--flowcell", dest='flowcell',
+                    help="compute name and description from database using flowcell id",
+                    default=None)
+  parser.add_option("--lane", dest='lane',
+                    help='specify which lane to use when retrieving description from database',
+                    default=None)
+
+  multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
+
+  multi.add_option('-m', '--multi', action='store_true',
+                    help='Enable parsing multi-read eland files',
+                    default=False)
+  multi.add_option('--reads', type='int',
+                   help='limit reporting multi reads to this many reads'
+                        '(most usefully --reads=1 will turn a multi-read '
+                        'file into a single read file)',
+                   default=255)
+  parser.add_option_group(multi)
+
+  return parser
+
+def main(command_line=None):
+  instream = None
+  outstream = None
+
+  if command_line is None:
+    command_line = sys.argv[1:]
+
+  parser = make_parser()
+  (options, args) = parser.parse_args(command_line)
+
+  if options.inname is None:
+    parser.error("Need eland input file name")
+    return 1
+
+  if options.inname == '-':
+    instream = sys.stdin
+  elif os.path.exists(options.inname):
+    instream = autoopen(options.inname, 'r')
+  else:
+    parser.error('%s was not found' % (options.inname))
+    return 1
+
+  # figure out name for output file
+  if options.outname is None:
+      # if outname wasn't defined, and we're reading from stdout
+      if instream is sys.stdin:
+          # write to stdout
+          outstream = sys.stdout
+      else:
+          # if there's a name write to name.bed
+          options.outname = os.path.splitext(options.inname)[0]+'.bed'
+          print >>sys.stderr, "defaulting to outputname", options.outname
+  elif options.outname == '-':
+      outstream = sys.stdout
+
+  if outstream is None:
+      if os.path.exists(options.outname):
+          parser.error("not overwriting %s" % (options.outname))
+          return 1
+      else:
+          outstream = open(options.outname, 'w')
+
+  if options.flowcell is not None and options.lane is not None:
+    # get our name/description out of the database
+    name, description = make_description(
+                           options.database, options.flowcell, options.lane
+                        )
+  else:
+    name = options.name
+    description = options.description
+
+  if options.multi:
+    make_bed_from_multi_eland_stream(instream, outstream, 
+                                     name, description, 
+                                     options.prefix,
+                                     options.reads)
+
+  else:
+    make_bed_from_eland_stream(instream, outstream, 
+                               name, description, 
+                               options.prefix)
+  return 0
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
+
diff --git a/scripts/htsw-elandseq b/scripts/htsw-elandseq
new file mode 100755 (executable)
index 0000000..6a5178c
--- /dev/null
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines.eland import extract_eland_sequence
+
+def make_parser():
+  usage = "usage: %prog [options] infile [outfile]"
+
+  parser = optparse.OptionParser(usage)
+  parser.add_option("-e", "--extract", dest="slice",
+    default=":",
+    help="provide a python slice operator to select a portion of an eland file")
+  return parser
+
+def main(argv):
+  parser = make_parser()
+
+  (opt, args) = parser.parse_args(argv)
+
+  if len(args) not in (0, 1, 2):
+    parser.error('incorrect number of arguments')
+
+  # get our slice coordinates
+  start, end = opt.slice.split(':')
+  if len(start) > 0:
+    start = int(start)
+  else:
+    start = None
+  if len(end) > 0:
+    end = int(end)
+  else:
+    end = None
+
+  # open infile
+  if len(args) > 0:
+    instream = open(args[0],'r')
+  else:
+    instream = sys.stdin
+
+  if len(args) > 1:
+    outstream = open(args[1],'w')
+  else:
+    outstream = sys.stdout
+
+  extract_eland_sequence(instream, outstream, start, end)
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+
diff --git a/scripts/htsw-gerald2bed b/scripts/htsw-gerald2bed
new file mode 100755 (executable)
index 0000000..7a726e7
--- /dev/null
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+"""
+Convert a group of eland_result files from a sequencer run to bed files.
+"""
+from glob import glob
+import logging
+import optparse
+import sys
+import os
+
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
+
+def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
+    """
+    convert s_[1-8]_eland_result.txt to corresponding bed files
+    """
+    eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
+    out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
+    if len(out_files) > 0:
+        raise RuntimeError("please move old bedfiles")
+
+    logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
+    for pathname in eland_files:
+        path, name = os.path.split(pathname)
+        lane = int(name[2])
+        outname = 's_%d_eland_result.bed' %(lane,)
+        logging.info('Converting lane %d to %s' % (lane, outname))
+
+        outpathname = os.path.join(eland_dir, outname)
+        # look up descriptions
+        bed_name, description = make_description(database, flowcell, lane)
+
+        # open files
+        instream = open(pathname,'r')
+        outstream = open(outpathname,'w')
+
+        make_bed_from_eland_stream(
+          instream, outstream, name, description, prefix
+        )
+
+def make_parser():
+  usage = """%prog: --flowcell <flowcell id> directory_name
+
+directory should contain a set of 8 eland result files named like
+s_[12345678]_eland_result.txt"""
+
+
+  parser = optparse.OptionParser(usage)
+
+  parser.add_option('-o', '--output', dest='output',
+                    help="destination directory for our bed files" \
+                         "defaults to eland directory",
+                    default=None)
+  parser.add_option('--chromosome', dest='prefix',
+                    help='Set the chromosome prefix name. defaults to "chr"',
+                    default='chr')
+  parser.add_option("--database", dest='database',
+                    help="specify location of fctracker database",
+                    default=None)
+  parser.add_option("--flowcell", dest='flowcell',
+                    help="specify the flowcell id for this run",
+                    default=None)
+  parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+                    help='increase verbosity',
+                    default=False)
+  return parser
+
+def main(command_line=None):
+    logging.basicConfig(level=logging.WARNING)
+    if command_line is None:
+        command_line = sys.argv[1:]
+
+    parser = make_parser()
+    (opts, args) = parser.parse_args(command_line)
+
+    if len(args) != 1:
+        parser.error('Directory name required')
+
+    eland_dir = args[0]
+    if not os.path.isdir(eland_dir):
+        parser.error('%s must be a directory' % (eland_dir,))
+
+    if opts.flowcell is None:
+        parser.error('Flowcell ID required')
+
+    if opts.verbose:
+        logger = logging.getLogger()
+        logger.setLevel(logging.INFO)
+
+    make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
+
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+
diff --git a/scripts/htsw-get-config b/scripts/htsw-get-config
new file mode 100755 (executable)
index 0000000..e4fdff1
--- /dev/null
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+import logging
+import sys
+from htsworkflow.pipelines.retrieve_config import *
+from htsworkflow.pipelines import retrieve_config
+
+#Turn on built-in command-line parsing.
+retrieve_config.DISABLE_CMDLINE = False
+
+def main(argv=None):
+  if argv is None:
+    argv = sys.argv
+    
+  #Display help if no args are presented
+  options = getCombinedOptions(argv)
+
+  if options.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+  else:
+    logging.basicConfig(level=logging.INFO)
+  
+  msg_list = ['ERROR MESSAGES:']
+  if options.flowcell is None:
+    msg_list.append("  Flow cell argument required. -f <flowcell> or --flowcell=<flowcell>")
+    
+  if options.url is None:
+    msg_list.append("  URL argument required (-u <url> or --url=<url>), or entry\n" \
+                    "    in /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
+  if options.genome_dir is None:
+    msg_list.append("  genome_dir argument required (-g <genome_dir> or \n" \
+                    "    --genome_dir=<genome_dir>, or entry in \n" \
+                    "    /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
+    
+  if len(msg_list) > 1:
+    print '\n'.join(msg_list)
+    return 1
+  
+  saveConfigFile(options)
+  
+  return 0
+  
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-qseq2fastq b/scripts/htsw-qseq2fastq
new file mode 100755 (executable)
index 0000000..555e668
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/python
+import sys
+from htsworkflow.pipelines.qseq2fastq import main
+
+if __name__ == "__main__":    
+    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-record-runfolder b/scripts/htsw-record-runfolder
new file mode 100755 (executable)
index 0000000..288ec1a
--- /dev/null
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+from htsworkflow.util.hdquery import get_hd_serial_num
+from htsworkflow.frontend import settings
+
+from optparse import OptionParser
+import os
+import re
+import sys
+import urllib2
+import urlparse
+
+runfolder_pattern = re.compile(r'[0-9]{6}_[-A-Za-z\d]+_\d+_(?P<flowcell>[A-Z\d]+)\.tgz')
+
+def extract_flowcell(runfolder_name):
+    path, basename = os.path.split(runfolder_name)
+    match = runfolder_pattern.match(basename)
+    if match is not None:
+        return match.group('flowcell')
+    else:
+        return None
+    
+def construct_parser():
+    """
+    """
+    msg = "usage: %prog [-d </dev/sdX> | -s <serial_number] [-f <flowcell>] [archived dirs]"
+    parser = OptionParser()
+    parser.add_option('-u', '--url', default=None,
+                      help="Alternate url for marking archived flowcells")
+    parser.add_option("-f", "--flowcell",  type="string", help="flowcell being archived")
+    parser.add_option("-d", "--device", type="string",
+                      help="device flowcell is being archived to")
+    parser.add_option("-s", "--serial", type="string", help="serial num. of archive device")
+    parser.add_option("-v", "--verbose", action="store_true", default=False)
+    
+    return parser
+
+
+def update_db(root_url, flowcells, serial, debug=False):
+    """
+    Creates link between flowcell and storage device over http
+    """
+    for fc in flowcells:
+        url = urlparse.urljoin(root_url, '%s/%s/' % (fc, serial))
+        
+        req = urllib2.Request(url)
+        try:
+            response = urllib2.urlopen(req)
+        except urllib2.URLError, e:
+            print 'ERROR - HTTP OUTPUT (Return Code: %s); use -v/--verbose for more details.' % (e.code)
+            if debug:
+                print e.read()
+            sys.exit(e.code)
+        
+        print "DB Update of %s & %s succeeded" % (fc, serial)
+        print response.read()
+    
+
+def process_args(parser):
+    """
+    returns flowcell and serial#
+    """
+    options, args = parser.parse_args()
+    
+    msg = []
+    
+    # Only provide device or serial
+    if options.device is not None and options.serial is not None:
+        parser.error("Please provide only --device or --serial.\n"\
+                     "The serial number is extracted automatically if the"\
+                     "device is provided.")
+
+    # allow user to override the default destination URL
+    if options.url is not None:
+        root_url = options.url
+    else:
+        root_url = settings.LINK_FLOWCELL_STORAGE_DEVICE_URL
+
+    # if device and serial missing:
+    if options.device is None and options.serial is None:
+        parser.error('One of --device or --serial is required')
+
+    flowcells = []
+    
+    # sanitize args    
+    for runfolder in args:
+        flowcell_id = extract_flowcell(runfolder)
+        if flowcell_id is None:
+            parser.error('archive names must look like YYMMDD_MACHINE_RUN_FLOWCELLID.tgz\n'\
+                         '(got %s)' % (runfolder,))
+        else:
+            flowcells.append(flowcell_id)
+            
+    if options.flowcell is not None:
+        flowcells.append(options.flowcell)
+        
+    if len(flowcells) == 0:
+        parser.error('please specify a  --flowcell or list of runfolder archives\n'\
+                     'for archival. I need something to do.')
+
+    # Update db records
+    if options.device is not None:
+        serial = get_hd_serial_num(options.device)
+        update_db(root_url, flowcells, serial=serial, debug=options.verbose)
+    elif options.serial is not None:
+        update_db(root_url, flowcells, serial=options.serial, debug=options.verbose)
+    else:
+        msg ="FATAL should not happen error occured; i.e. the best kind!"
+        raise ValueError, msg
+    
+    
+
+def main():
+    """
+    """
+    parser = construct_parser()
+    process_args(parser)
+    
+    #print "Database Updated."
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/htsw-runfolder b/scripts/htsw-runfolder
new file mode 100755 (executable)
index 0000000..145fd7a
--- /dev/null
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+"""
+Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
+
+  * Flowcell ID
+  * run dates
+  * start/stop cycle numbers
+  * Firecrest, bustard, gerald version numbers
+  * Eland analysis types, and everything in the eland configuration file.
+  * cluster numbers and other values from the Summary.htm 
+    LaneSpecificParameters table. 
+  * How many reads mapped to a genome from an eland file
+
+The ELAND "mapped reads" counter will also check for eland squashed file
+that were symlinked from another directory. This is so I can track how 
+many reads landed on the genome of interest and on the spike ins. 
+
+Basically my subdirectories something like:
+
+genomes/hg18
+genomes/hg18/chr*.2bpb <- files for hg18 genome
+genomes/hg18/chr*.vld  
+genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
+genomes/spikein 
+
+runfolder.py can also spit out a simple summary report (-s option) 
+that contains the per lane post filter cluster numbers and the mapped 
+read counts. (The report isn't currently very pretty)
+"""
+from glob import glob
+import logging
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines import runfolder
+from htsworkflow.pipelines.runfolder import ElementTree
+        
+def make_parser():
+    usage = 'usage: %prog [options] runfolder_root_dir'
+    parser = optparse.OptionParser(usage)
+
+    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+                      default=False,
+                      help='turn on verbose mode')
+    parser.add_option('--dry-run', action='store_true', default=False,
+                      help="Don't delete anything (in clean mode)")
+
+    commands = optparse.OptionGroup(parser, 'Commands')
+
+    commands.add_option('-s', '--summary', dest='summary', action='store_true',
+                        default=False,
+                        help='produce summary report')
+    commands.add_option('-a', '--archive', dest='archive', action='store_true',
+                        default=False,
+                        help='generate run configuration archive')
+    commands.add_option('--extract-results', action='store_true',
+           default=False,
+           help='create run-xml summary, compress the eland result files, build srf files and '
+                'copy all that and the Summary.htm file into an archival directory.')
+    commands.add_option('-c', '--clean', action='store_true', default=False,
+                        help='Clean runfolder, preparing it for long-term storage')
+    parser.add_option_group(commands)
+
+    parser.add_option('-j', '--max-jobs', default=1,
+                      help='sepcify the maximum number of processes to run '
+                           '(used in extract-results)')
+    parser.add_option('-o', '--output-dir', default=None,
+           help="specify the default output directory for extract results")
+    parser.add_option('--run-xml', dest='run_xml',
+           default=None,
+           help='specify a run_<FlowCell>.xml file for summary reports')
+    parser.add_option('--site', default=None,
+                      help='create srf files tagged with the provided site name')
+    parser.add_option('-u', '--use-run', dest='use_run', default=None,
+                      help='Specify which run to use instead of autoscanning '
+                           'the runfolder. You do this by providing the final '
+                           ' GERALD directory, and it assumes the parent '
+                           'directories are the bustard and image processing '
+                           'directories.')
+
+    return parser
+
+def main(cmdlist=None):
+    parser = make_parser()
+    opt, args = parser.parse_args(cmdlist)
+
+    logging.basicConfig()
+    if opt.verbose:
+        root_log = logging.getLogger()
+        root_log.setLevel(logging.INFO)
+
+    logging.info('Starting htsworkflow illumina runfolder processing tool.')
+    runs = []
+    if opt.run_xml:
+        # handle ~ shortcut
+        opt.run_xml = os.path.expanduser(opt.run_xml)
+        tree = ElementTree.parse(opt.run_xml).getroot()
+        runs.append(runfolder.PipelineRun(xml=tree))
+
+    # look for manually specified run
+    if opt.use_run is not None:
+        specific_run = runfolder.get_specific_run(opt.use_run)
+        if specific_run is not None:
+            runs.append(specific_run)
+        else:
+            logging.warn("Couldn't find a run in %s" % (opt.use_run,))
+
+    # scan runfolders for runs
+    for run_pattern in args:
+        # expand args on our own if needed
+        for run_dir in glob(run_pattern):
+            runs.extend(runfolder.get_runs(run_dir))
+
+    if len(runs) > 0:
+        command_run = False
+        if opt.summary:
+            print runfolder.summary_report(runs)
+            command_run = True
+        if opt.archive:
+            runfolder.extract_run_parameters(runs)
+            command_run = True
+        if opt.extract_results:
+            if opt.dry_run:
+                parser.error("Dry-run is not supported for extract-results")
+            runfolder.extract_results(runs, 
+                                      opt.output_dir, 
+                                      opt.site, 
+                                      opt.max_jobs)
+            command_run = True
+        if opt.clean:
+            runfolder.clean_runs(runs, opt.dry_run)
+            command_run = True
+
+        if command_run == False:
+            print "You need to specify a command."+os.linesep
+            parser.print_help()
+    else:
+        print "You need to specify some run folders to process..."+os.linesep
+        parser.print_help()
+
+    return 0
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-runner b/scripts/htsw-runner
new file mode 100755 (executable)
index 0000000..560299f
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.runner import main
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-spoolwatcher b/scripts/htsw-spoolwatcher
new file mode 100755 (executable)
index 0000000..b2f833e
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+import sys
+from htsworkflow.automation.spoolwatcher import main
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-srf b/scripts/htsw-srf
new file mode 100755 (executable)
index 0000000..bcf835d
--- /dev/null
@@ -0,0 +1,126 @@
+#!/usr/bin/python
+
+import logging
+import optparse
+import os
+import sys
+
+from htsworkflow.pipelines import runfolder
+from htsworkflow.pipelines.srf import make_srf_commands, make_qseq_commands, \
+                                      run_commands, pathname_to_run_name
+from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
+
+def make_parser():
+  usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
+
+  parser = optparse.OptionParser(usage)
+  parser.add_option('--dry-run', action='store_true',
+                    help='print what would be done',
+                    default=False)
+
+  parser.add_option('-d', '--dest-dir', dest='dest_dir',
+                    help='location to write srf files to',
+                    default='.')
+  parser.add_option('-s', '--site',
+                    help='site name',
+                    default='Individual')
+  parser.add_option('-l', '--lanes', dest='lanes', action="append",
+         default=[],
+         help='comma seperated list of lanes to add to srf'
+  )
+  parser.add_option('-j', '--jobs', default=1, type='int',
+                    help='how many jobs to run simultaneously')
+  parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
+                    help='Which class of srf file should we attempt to create\n'
+                         '0 = Solexa pipeline 0.2.6 - 0.3\n'
+                         '1 = illumina pipeline 1.0\n'
+                         '2 = illumina pipeline 1.1rc1 and later \n')
+                     
+  parser.add_option('-v', '--verbose', dest='verbose',
+                    default=False, action='store_true',
+                    help='report more about internals (INFO)')
+  parser.add_option('--debug', dest='debug',
+                    default=False, action='store_true',
+                    help='report even more about internals (DEBUG)')
+  return parser
+
+def parse_lane_arg(lane_arg):
+    """
+    Convert comma sperated list of lane ids to a list of integers
+    """
+    lanes = []
+    for lane in lane_arg.split(','):
+        try:
+            lane = int(lane)
+            if lane < 1 or lane > 8:
+                parser.error('Lanes must be in range [1..8]')
+            lanes.append(lane)
+        except ValueError:
+            parser.error('Lane selections must be integers')
+    return lanes
+
+def main(cmdline=None):
+    parser = make_parser()
+    opts, args = parser.parse_args(cmdline)
+   
+    if opts.debug: 
+        logging.basicConfig(level=logging.DEBUG)
+    elif opts.verbose:
+        logging.basicConfig(level=logging.INFO)
+    else:
+        logging.basicConfig(level=logging.WARNING)
+
+    if len(args) == 0:
+        parser.error('need runfolder arguments')
+
+    # parse lane arguemnts
+    lanes_list = []
+    if len(opts.lanes) == 0:
+        lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
+    elif len(opts.lanes) == len(args):
+        for lane_arg in opts.lanes:
+            lanes_list.append(parse_lane_arg(lane_arg))
+    else:
+        parser.error(
+          "Number of lane arguments must match number of runfolders"
+        )
+
+    make_commands = make_qseq_commands
+    # build list of commands
+    cmds = {}
+    for runfolder_path, lanes in zip(args, lanes_list):
+        # normalize paths, either relative to home dirs or current dir
+        runfolder_path = os.path.abspath(runfolder_path)
+        run_name = pathname_to_run_name(runfolder_path)
+        # so any bustard directories?
+        runs = runfolder.get_runs(runfolder_path)
+        # give up if there are anything other than 1 run
+        if len(runs) > 1:
+          print 'ERROR: Too many run directories in %s' %(runfolder_path,)
+          return 1
+        elif len(runs) == 1:
+          bustard_dir = runs[0].bustard.pathname
+          cmds[bustard_dir] = make_commands(run_name,
+                                            bustard_dir,
+                                            lanes,
+                                            opts.site,
+                                            opts.dest_dir,
+                                            opts.runfolder_version)
+        else:
+          print "ERROR: Couldn't find a bustard directory in", runfolder_path
+          return 1
+
+    if not opts.dry_run:
+      for cwd, cmd_list in cmds.items():
+        run_commands(cwd, cmd_list, opts.jobs)
+    else:
+      for cwd, cmd_list in cmds.items():
+        print cwd
+        print cmd_list
+        print 'jobs: ', opts.jobs
+
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-srf2fastq b/scripts/htsw-srf2fastq
new file mode 100755 (executable)
index 0000000..0361e43
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/python
+import sys
+from htsworkflow.pipelines.srf2fastq import main
+
+if __name__ == "__main__":    
+    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/htsw-update-archive b/scripts/htsw-update-archive
new file mode 100755 (executable)
index 0000000..2ccbec6
--- /dev/null
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+
+from ConfigParser import SafeConfigParser
+
+import logging
+import os
+from optparse import OptionParser
+import stat
+import shelve
+
+from htsworkflow.util import api
+from htsworkflow.pipelines.sequences import scan_for_sequences
+
+def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
+    """
+    compare our flowcell database with our list of sequences and return
+    a fully populated database
+    """
+    fcdb = shelve.open(fcdb_filename)
+    libdb = {}
+    apidata = {'apiid': apiid, 'apikey': apikey}
+    for seq in sequences:
+        flowcell = seq.flowcell
+        flowcell_info = None
+
+        # get info about flowcell from server or shelf
+        if not fcdb.has_key(flowcell):
+            url = api.flowcell_url(baseurl, flowcell)
+            flowcell_info = api.retrieve_info(url, apidata)
+            if flowcell_info is not None:
+                fcdb[flowcell] = flowcell_info
+        else:
+            flowcell_info = fcdb[flowcell]
+
+        # make library id db
+        if flowcell_info is not None:
+            seq_library_id = flowcell_info['lane_set'][unicode(seq.lane)]['library_id']
+            libdb.setdefault(seq_library_id, []).append(seq)
+           
+    fcdb.sync()
+    return fcdb, libdb
+
+def carefully_make_hardlink(source, destination, dry_run=False):
+    """
+    Make a hard link, failing if a different link already exists
+
+    Checking to see if the link already exists and is
+    the same as the link we want to make.
+    If the link already exists and is different, throw an error.
+
+    If we didn't update anything return 0, if we did update
+    return 1.
+    """
+    logging.debug("CHECKING: %s -> %s", source, destination)
+
+    if not os.path.exists(source):
+        logging.warning("%s doesn't exist", source)
+        return 0
+
+    if os.path.exists(destination):
+        if os.path.samefile(source, destination):
+            logging.debug('SAME: %s -> %s' % (source, destination))
+            return 0
+        else:
+            logging.error('%s and %s are different files, skipping' % \
+                          (source, destination)) 
+            return 0
+    logging.debug('Linking: %s -> %s' % (source, destination))
+
+    # we would do something by this part
+    if dry_run: return 1
+
+    os.link(source, destination)
+    os.chmod(destination,
+             stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
+    return 1
+    
+def make_library_links(root, library_db, dry_run=False):
+    """
+    Make a tree of sequencer roots organized by library id
+
+    Root is the root of the library tree
+    library_db is a dictionary of SequenceFiles organized by library id
+    """
+    count = 0
+    root = os.path.abspath(root)
+    for lib_id, sequences in library_db.items():
+        target_dir = os.path.join(root, lib_id)
+        if not os.path.exists(target_dir):
+            logging.info("mkdir %s" % (target_dir,))
+            if not dry_run:
+                os.mkdir(target_dir)
+            
+        for s in sequences:
+            count += carefully_make_hardlink(s.path,
+                                             s.make_target_name(target_dir),
+                                             dry_run=dry_run)
+    return count
+
+def configure_logging(opts):
+    # setup logging
+    level = logging.WARN
+    if opts.verbose:
+        level = logging.INFO
+    if opts.debug:
+        level = logging.DEBUG
+    logging.basicConfig(level=level)
+    
+
+def configure_opts(opts):
+    """
+    Load in options from config file
+    """
+    SECTION_NAME = 'sequence_archive'
+    ARCHIVE_OPT = 'sequence_archive'
+    CACHE_OPT = 'cache'
+    HOST_OPT = 'host'
+    APIID_OPT = 'apiid'
+    APIKEY_OPT = 'apikey'
+
+    # figure out what config file to read
+    config_path = [os.path.expanduser('~/.htsworkflow.ini'),
+                   '/etc/htsworkflow.ini']
+    if opts.config is not None:
+        config_path = [opts.config]
+    # parse options from config file
+    config_file = SafeConfigParser()
+    config_file.read(config_path)
+
+    # load defaults from config file if not overriden by the command line
+    if opts.cache is None:
+        if config_file.has_option(SECTION_NAME, CACHE_OPT):
+            opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
+        else:
+            opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
+
+    if opts.sequence_archive is None and \
+       config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
+        opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
+        opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
+
+    opts.sequence_archive = os.path.abspath(opts.sequence_archive)
+    opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
+    opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
+    opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
+
+    if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
+        opts.host = config_file.get(SECTION_NAME, HOST_OPT)
+
+    if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
+        opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
+
+    if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
+        opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
+      
+    return opts
+
+def make_parser():
+    """
+    Make parser
+    """
+    parser = OptionParser()
+    parser.add_option('-c', '--config', default=None,
+                      help='path to a configuration file containing a '
+                           'sequence archive section')
+    parser.add_option('--cache', default=None,
+                      help="default flowcell cache")
+    
+    parser.add_option('--host', default=None,
+                      help="specify http://host for quering flowcell information")
+    parser.add_option('--apiid', default=None,
+                      help="API ID to use when retriving information")
+    parser.add_option("--apikey", default=None,
+                      help="API Key for when retriving information")
+    
+    parser.add_option('-a', '--sequence-archive', default=None,
+                      help='path to where the sequence archive lives')
+
+    parser.add_option('-v', '--verbose', action='store_true', default=False,
+                      help='be more verbose')
+    parser.add_option('-d', '--debug', action='store_true', default=False,
+                      help='report everything')
+             
+    parser.add_option("--dry-run", dest="dry_run", action="store_true",
+                      default=False,
+                      help="Don't modify the filesystem")
+    return parser
+
+def main(cmdline=None):
+    parser = make_parser()
+    opts, args = parser.parse_args(cmdline)
+
+    configure_logging(opts)
+    opts = configure_opts(opts)
+  
+    # complain if critical things are missing
+    if opts.cache is None:
+       parser.error('Need location of htsworkflow frontend database')
+
+    if opts.sequence_archive is None:
+       parser.error('Need the root path for the sequence archive')
+
+    seq_dirs = [ opts.flowcells, opts.srfs ]
+    if len(args) > 0:
+        seq_dirs = [os.path.abspath(f) for f in args]
+    
+    seqs = scan_for_sequences(seq_dirs)
+    fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
+    updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
+    
+    logging.warn("%s flowcells in database" % (len(fcdb),))
+    logging.warn("found %s sequence files" % (len(seqs),))
+    logging.warn("%s libraries being checked" % (len(libdb),))
+    logging.warn("%s sequence files were linked" % (updates,))
+    
+    return 0
+    
+if __name__ == "__main__":
+    main()
diff --git a/scripts/library.py b/scripts/library.py
deleted file mode 100644 (file)
index 35532f4..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-from optparse import OptionParser
-import sys
-
-from htsworkflow.util import fctracker
-
-def make_parser():
-    """
-    Make parser
-    """
-    parser = OptionParser()
-    parser.add_option("-d", "--database", dest="database",
-                      help="path to the fctracker.db",
-                      default=None)
-    parser.add_option("-w", "--where", dest="where",
-                      help="add a where clause",
-                      default=None)
-    return parser
-
-def main(argv=None):
-    if argv is None:
-        argv = []
-    parser = make_parser()
-
-    opt, args = parser.parse_args(argv)
-    
-    fc = fctracker.fctracker(opt.database)
-    cells = fc._get_flowcells(opt.where)
-
-    print fctracker.recoverable_drive_report(cells)
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/make-library-tree b/scripts/make-library-tree
deleted file mode 100644 (file)
index 2ccbec6..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/usr/bin/env python
-
-from ConfigParser import SafeConfigParser
-
-import logging
-import os
-from optparse import OptionParser
-import stat
-import shelve
-
-from htsworkflow.util import api
-from htsworkflow.pipelines.sequences import scan_for_sequences
-
-def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
-    """
-    compare our flowcell database with our list of sequences and return
-    a fully populated database
-    """
-    fcdb = shelve.open(fcdb_filename)
-    libdb = {}
-    apidata = {'apiid': apiid, 'apikey': apikey}
-    for seq in sequences:
-        flowcell = seq.flowcell
-        flowcell_info = None
-
-        # get info about flowcell from server or shelf
-        if not fcdb.has_key(flowcell):
-            url = api.flowcell_url(baseurl, flowcell)
-            flowcell_info = api.retrieve_info(url, apidata)
-            if flowcell_info is not None:
-                fcdb[flowcell] = flowcell_info
-        else:
-            flowcell_info = fcdb[flowcell]
-
-        # make library id db
-        if flowcell_info is not None:
-            seq_library_id = flowcell_info['lane_set'][unicode(seq.lane)]['library_id']
-            libdb.setdefault(seq_library_id, []).append(seq)
-           
-    fcdb.sync()
-    return fcdb, libdb
-
-def carefully_make_hardlink(source, destination, dry_run=False):
-    """
-    Make a hard link, failing if a different link already exists
-
-    Checking to see if the link already exists and is
-    the same as the link we want to make.
-    If the link already exists and is different, throw an error.
-
-    If we didn't update anything return 0, if we did update
-    return 1.
-    """
-    logging.debug("CHECKING: %s -> %s", source, destination)
-
-    if not os.path.exists(source):
-        logging.warning("%s doesn't exist", source)
-        return 0
-
-    if os.path.exists(destination):
-        if os.path.samefile(source, destination):
-            logging.debug('SAME: %s -> %s' % (source, destination))
-            return 0
-        else:
-            logging.error('%s and %s are different files, skipping' % \
-                          (source, destination)) 
-            return 0
-    logging.debug('Linking: %s -> %s' % (source, destination))
-
-    # we would do something by this part
-    if dry_run: return 1
-
-    os.link(source, destination)
-    os.chmod(destination,
-             stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
-    return 1
-    
-def make_library_links(root, library_db, dry_run=False):
-    """
-    Make a tree of sequencer roots organized by library id
-
-    Root is the root of the library tree
-    library_db is a dictionary of SequenceFiles organized by library id
-    """
-    count = 0
-    root = os.path.abspath(root)
-    for lib_id, sequences in library_db.items():
-        target_dir = os.path.join(root, lib_id)
-        if not os.path.exists(target_dir):
-            logging.info("mkdir %s" % (target_dir,))
-            if not dry_run:
-                os.mkdir(target_dir)
-            
-        for s in sequences:
-            count += carefully_make_hardlink(s.path,
-                                             s.make_target_name(target_dir),
-                                             dry_run=dry_run)
-    return count
-
-def configure_logging(opts):
-    # setup logging
-    level = logging.WARN
-    if opts.verbose:
-        level = logging.INFO
-    if opts.debug:
-        level = logging.DEBUG
-    logging.basicConfig(level=level)
-    
-
-def configure_opts(opts):
-    """
-    Load in options from config file
-    """
-    SECTION_NAME = 'sequence_archive'
-    ARCHIVE_OPT = 'sequence_archive'
-    CACHE_OPT = 'cache'
-    HOST_OPT = 'host'
-    APIID_OPT = 'apiid'
-    APIKEY_OPT = 'apikey'
-
-    # figure out what config file to read
-    config_path = [os.path.expanduser('~/.htsworkflow.ini'),
-                   '/etc/htsworkflow.ini']
-    if opts.config is not None:
-        config_path = [opts.config]
-    # parse options from config file
-    config_file = SafeConfigParser()
-    config_file.read(config_path)
-
-    # load defaults from config file if not overriden by the command line
-    if opts.cache is None:
-        if config_file.has_option(SECTION_NAME, CACHE_OPT):
-            opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
-        else:
-            opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
-
-    if opts.sequence_archive is None and \
-       config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
-        opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
-        opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
-
-    opts.sequence_archive = os.path.abspath(opts.sequence_archive)
-    opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
-    opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
-    opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
-
-    if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
-        opts.host = config_file.get(SECTION_NAME, HOST_OPT)
-
-    if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
-        opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
-
-    if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
-        opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
-      
-    return opts
-
-def make_parser():
-    """
-    Make parser
-    """
-    parser = OptionParser()
-    parser.add_option('-c', '--config', default=None,
-                      help='path to a configuration file containing a '
-                           'sequence archive section')
-    parser.add_option('--cache', default=None,
-                      help="default flowcell cache")
-    
-    parser.add_option('--host', default=None,
-                      help="specify http://host for quering flowcell information")
-    parser.add_option('--apiid', default=None,
-                      help="API ID to use when retriving information")
-    parser.add_option("--apikey", default=None,
-                      help="API Key for when retriving information")
-    
-    parser.add_option('-a', '--sequence-archive', default=None,
-                      help='path to where the sequence archive lives')
-
-    parser.add_option('-v', '--verbose', action='store_true', default=False,
-                      help='be more verbose')
-    parser.add_option('-d', '--debug', action='store_true', default=False,
-                      help='report everything')
-             
-    parser.add_option("--dry-run", dest="dry_run", action="store_true",
-                      default=False,
-                      help="Don't modify the filesystem")
-    return parser
-
-def main(cmdline=None):
-    parser = make_parser()
-    opts, args = parser.parse_args(cmdline)
-
-    configure_logging(opts)
-    opts = configure_opts(opts)
-  
-    # complain if critical things are missing
-    if opts.cache is None:
-       parser.error('Need location of htsworkflow frontend database')
-
-    if opts.sequence_archive is None:
-       parser.error('Need the root path for the sequence archive')
-
-    seq_dirs = [ opts.flowcells, opts.srfs ]
-    if len(args) > 0:
-        seq_dirs = [os.path.abspath(f) for f in args]
-    
-    seqs = scan_for_sequences(seq_dirs)
-    fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
-    updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
-    
-    logging.warn("%s flowcells in database" % (len(fcdb),))
-    logging.warn("found %s sequence files" % (len(seqs),))
-    logging.warn("%s libraries being checked" % (len(libdb),))
-    logging.warn("%s sequence files were linked" % (updates,))
-    
-    return 0
-    
-if __name__ == "__main__":
-    main()
diff --git a/scripts/makebed b/scripts/makebed
deleted file mode 100755 (executable)
index 577b868..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/python
-import optparse
-import sys
-import os
-
-from htsworkflow.util.opener import autoopen
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
-
-def make_parser():
-  parser = optparse.OptionParser()
-  parser.add_option('-e', '--eland', dest='inname',
-                    help='specify input eland filename')
-  parser.add_option('-b', '--bed', dest='outname',
-                    help='specify output befilename')
-  parser.add_option('-n', '--name', dest='name',
-                    help='specify the track (short) name.',
-                    default=None)
-  parser.add_option('-d', '--description', dest='description',
-                    help='specify the track description',
-                    default=None)
-  parser.add_option('--chromosome', dest='prefix',
-                    help='Set the chromosome prefix name. defaults to "chr"',
-                    default='chr')
-  parser.add_option("--database", dest='database',
-                    help="specify location of fctracker database",
-                    default=None)
-  parser.add_option("--flowcell", dest='flowcell',
-                    help="compute name and description from database using flowcell id",
-                    default=None)
-  parser.add_option("--lane", dest='lane',
-                    help='specify which lane to use when retrieving description from database',
-                    default=None)
-
-  multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
-
-  multi.add_option('-m', '--multi', action='store_true',
-                    help='Enable parsing multi-read eland files',
-                    default=False)
-  multi.add_option('--reads', type='int',
-                   help='limit reporting multi reads to this many reads'
-                        '(most usefully --reads=1 will turn a multi-read '
-                        'file into a single read file)',
-                   default=255)
-  parser.add_option_group(multi)
-
-  return parser
-
-def main(command_line=None):
-  instream = None
-  outstream = None
-
-  if command_line is None:
-    command_line = sys.argv[1:]
-
-  parser = make_parser()
-  (options, args) = parser.parse_args(command_line)
-
-  if options.inname is None:
-    parser.error("Need eland input file name")
-    return 1
-
-  if options.inname == '-':
-    instream = sys.stdin
-  elif os.path.exists(options.inname):
-    instream = autoopen(options.inname, 'r')
-  else:
-    parser.error('%s was not found' % (options.inname))
-    return 1
-
-  # figure out name for output file
-  if options.outname is None:
-      # if outname wasn't defined, and we're reading from stdout
-      if instream is sys.stdin:
-          # write to stdout
-          outstream = sys.stdout
-      else:
-          # if there's a name write to name.bed
-          options.outname = os.path.splitext(options.inname)[0]+'.bed'
-          print >>sys.stderr, "defaulting to outputname", options.outname
-  elif options.outname == '-':
-      outstream = sys.stdout
-
-  if outstream is None:
-      if os.path.exists(options.outname):
-          parser.error("not overwriting %s" % (options.outname))
-          return 1
-      else:
-          outstream = open(options.outname, 'w')
-
-  if options.flowcell is not None and options.lane is not None:
-    # get our name/description out of the database
-    name, description = make_description(
-                           options.database, options.flowcell, options.lane
-                        )
-  else:
-    name = options.name
-    description = options.description
-
-  if options.multi:
-    make_bed_from_multi_eland_stream(instream, outstream, 
-                                     name, description, 
-                                     options.prefix,
-                                     options.reads)
-
-  else:
-    make_bed_from_eland_stream(instream, outstream, 
-                               name, description, 
-                               options.prefix)
-  return 0
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/mark_archived_data b/scripts/mark_archived_data
deleted file mode 100755 (executable)
index 288ec1a..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-
-from htsworkflow.util.hdquery import get_hd_serial_num
-from htsworkflow.frontend import settings
-
-from optparse import OptionParser
-import os
-import re
-import sys
-import urllib2
-import urlparse
-
-runfolder_pattern = re.compile(r'[0-9]{6}_[-A-Za-z\d]+_\d+_(?P<flowcell>[A-Z\d]+)\.tgz')
-
-def extract_flowcell(runfolder_name):
-    path, basename = os.path.split(runfolder_name)
-    match = runfolder_pattern.match(basename)
-    if match is not None:
-        return match.group('flowcell')
-    else:
-        return None
-    
-def construct_parser():
-    """
-    """
-    msg = "usage: %prog [-d </dev/sdX> | -s <serial_number] [-f <flowcell>] [archived dirs]"
-    parser = OptionParser()
-    parser.add_option('-u', '--url', default=None,
-                      help="Alternate url for marking archived flowcells")
-    parser.add_option("-f", "--flowcell",  type="string", help="flowcell being archived")
-    parser.add_option("-d", "--device", type="string",
-                      help="device flowcell is being archived to")
-    parser.add_option("-s", "--serial", type="string", help="serial num. of archive device")
-    parser.add_option("-v", "--verbose", action="store_true", default=False)
-    
-    return parser
-
-
-def update_db(root_url, flowcells, serial, debug=False):
-    """
-    Creates link between flowcell and storage device over http
-    """
-    for fc in flowcells:
-        url = urlparse.urljoin(root_url, '%s/%s/' % (fc, serial))
-        
-        req = urllib2.Request(url)
-        try:
-            response = urllib2.urlopen(req)
-        except urllib2.URLError, e:
-            print 'ERROR - HTTP OUTPUT (Return Code: %s); use -v/--verbose for more details.' % (e.code)
-            if debug:
-                print e.read()
-            sys.exit(e.code)
-        
-        print "DB Update of %s & %s succeeded" % (fc, serial)
-        print response.read()
-    
-
-def process_args(parser):
-    """
-    returns flowcell and serial#
-    """
-    options, args = parser.parse_args()
-    
-    msg = []
-    
-    # Only provide device or serial
-    if options.device is not None and options.serial is not None:
-        parser.error("Please provide only --device or --serial.\n"\
-                     "The serial number is extracted automatically if the"\
-                     "device is provided.")
-
-    # allow user to override the default destination URL
-    if options.url is not None:
-        root_url = options.url
-    else:
-        root_url = settings.LINK_FLOWCELL_STORAGE_DEVICE_URL
-
-    # if device and serial missing:
-    if options.device is None and options.serial is None:
-        parser.error('One of --device or --serial is required')
-
-    flowcells = []
-    
-    # sanitize args    
-    for runfolder in args:
-        flowcell_id = extract_flowcell(runfolder)
-        if flowcell_id is None:
-            parser.error('archive names must look like YYMMDD_MACHINE_RUN_FLOWCELLID.tgz\n'\
-                         '(got %s)' % (runfolder,))
-        else:
-            flowcells.append(flowcell_id)
-            
-    if options.flowcell is not None:
-        flowcells.append(options.flowcell)
-        
-    if len(flowcells) == 0:
-        parser.error('please specify a  --flowcell or list of runfolder archives\n'\
-                     'for archival. I need something to do.')
-
-    # Update db records
-    if options.device is not None:
-        serial = get_hd_serial_num(options.device)
-        update_db(root_url, flowcells, serial=serial, debug=options.verbose)
-    elif options.serial is not None:
-        update_db(root_url, flowcells, serial=options.serial, debug=options.verbose)
-    else:
-        msg ="FATAL should not happen error occured; i.e. the best kind!"
-        raise ValueError, msg
-    
-    
-
-def main():
-    """
-    """
-    parser = construct_parser()
-    process_args(parser)
-    
-    #print "Database Updated."
-    sys.exit(0)
-
-if __name__ == '__main__':
-    main()
diff --git a/scripts/qseq2fastq b/scripts/qseq2fastq
deleted file mode 100755 (executable)
index 555e668..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/python
-import sys
-from htsworkflow.pipelines.qseq2fastq import main
-
-if __name__ == "__main__":    
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/retrieve_config b/scripts/retrieve_config
deleted file mode 100755 (executable)
index e4fdff1..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-import logging
-import sys
-from htsworkflow.pipelines.retrieve_config import *
-from htsworkflow.pipelines import retrieve_config
-
-#Turn on built-in command-line parsing.
-retrieve_config.DISABLE_CMDLINE = False
-
-def main(argv=None):
-  if argv is None:
-    argv = sys.argv
-    
-  #Display help if no args are presented
-  options = getCombinedOptions(argv)
-
-  if options.verbose:
-    logging.basicConfig(level=logging.DEBUG)
-  else:
-    logging.basicConfig(level=logging.INFO)
-  
-  msg_list = ['ERROR MESSAGES:']
-  if options.flowcell is None:
-    msg_list.append("  Flow cell argument required. -f <flowcell> or --flowcell=<flowcell>")
-    
-  if options.url is None:
-    msg_list.append("  URL argument required (-u <url> or --url=<url>), or entry\n" \
-                    "    in /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
-  if options.genome_dir is None:
-    msg_list.append("  genome_dir argument required (-g <genome_dir> or \n" \
-                    "    --genome_dir=<genome_dir>, or entry in \n" \
-                    "    /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf")
-    
-  if len(msg_list) > 1:
-    print '\n'.join(msg_list)
-    return 1
-  
-  saveConfigFile(options)
-  
-  return 0
-  
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/runfolder b/scripts/runfolder
deleted file mode 100644 (file)
index 145fd7a..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python
-"""
-Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
-
-  * Flowcell ID
-  * run dates
-  * start/stop cycle numbers
-  * Firecrest, bustard, gerald version numbers
-  * Eland analysis types, and everything in the eland configuration file.
-  * cluster numbers and other values from the Summary.htm 
-    LaneSpecificParameters table. 
-  * How many reads mapped to a genome from an eland file
-
-The ELAND "mapped reads" counter will also check for eland squashed file
-that were symlinked from another directory. This is so I can track how 
-many reads landed on the genome of interest and on the spike ins. 
-
-Basically my subdirectories something like:
-
-genomes/hg18
-genomes/hg18/chr*.2bpb <- files for hg18 genome
-genomes/hg18/chr*.vld  
-genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
-genomes/spikein 
-
-runfolder.py can also spit out a simple summary report (-s option) 
-that contains the per lane post filter cluster numbers and the mapped 
-read counts. (The report isn't currently very pretty)
-"""
-from glob import glob
-import logging
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines import runfolder
-from htsworkflow.pipelines.runfolder import ElementTree
-        
-def make_parser():
-    usage = 'usage: %prog [options] runfolder_root_dir'
-    parser = optparse.OptionParser(usage)
-
-    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
-                      default=False,
-                      help='turn on verbose mode')
-    parser.add_option('--dry-run', action='store_true', default=False,
-                      help="Don't delete anything (in clean mode)")
-
-    commands = optparse.OptionGroup(parser, 'Commands')
-
-    commands.add_option('-s', '--summary', dest='summary', action='store_true',
-                        default=False,
-                        help='produce summary report')
-    commands.add_option('-a', '--archive', dest='archive', action='store_true',
-                        default=False,
-                        help='generate run configuration archive')
-    commands.add_option('--extract-results', action='store_true',
-           default=False,
-           help='create run-xml summary, compress the eland result files, build srf files and '
-                'copy all that and the Summary.htm file into an archival directory.')
-    commands.add_option('-c', '--clean', action='store_true', default=False,
-                        help='Clean runfolder, preparing it for long-term storage')
-    parser.add_option_group(commands)
-
-    parser.add_option('-j', '--max-jobs', default=1,
-                      help='sepcify the maximum number of processes to run '
-                           '(used in extract-results)')
-    parser.add_option('-o', '--output-dir', default=None,
-           help="specify the default output directory for extract results")
-    parser.add_option('--run-xml', dest='run_xml',
-           default=None,
-           help='specify a run_<FlowCell>.xml file for summary reports')
-    parser.add_option('--site', default=None,
-                      help='create srf files tagged with the provided site name')
-    parser.add_option('-u', '--use-run', dest='use_run', default=None,
-                      help='Specify which run to use instead of autoscanning '
-                           'the runfolder. You do this by providing the final '
-                           ' GERALD directory, and it assumes the parent '
-                           'directories are the bustard and image processing '
-                           'directories.')
-
-    return parser
-
-def main(cmdlist=None):
-    parser = make_parser()
-    opt, args = parser.parse_args(cmdlist)
-
-    logging.basicConfig()
-    if opt.verbose:
-        root_log = logging.getLogger()
-        root_log.setLevel(logging.INFO)
-
-    logging.info('Starting htsworkflow illumina runfolder processing tool.')
-    runs = []
-    if opt.run_xml:
-        # handle ~ shortcut
-        opt.run_xml = os.path.expanduser(opt.run_xml)
-        tree = ElementTree.parse(opt.run_xml).getroot()
-        runs.append(runfolder.PipelineRun(xml=tree))
-
-    # look for manually specified run
-    if opt.use_run is not None:
-        specific_run = runfolder.get_specific_run(opt.use_run)
-        if specific_run is not None:
-            runs.append(specific_run)
-        else:
-            logging.warn("Couldn't find a run in %s" % (opt.use_run,))
-
-    # scan runfolders for runs
-    for run_pattern in args:
-        # expand args on our own if needed
-        for run_dir in glob(run_pattern):
-            runs.extend(runfolder.get_runs(run_dir))
-
-    if len(runs) > 0:
-        command_run = False
-        if opt.summary:
-            print runfolder.summary_report(runs)
-            command_run = True
-        if opt.archive:
-            runfolder.extract_run_parameters(runs)
-            command_run = True
-        if opt.extract_results:
-            if opt.dry_run:
-                parser.error("Dry-run is not supported for extract-results")
-            runfolder.extract_results(runs, 
-                                      opt.output_dir, 
-                                      opt.site, 
-                                      opt.max_jobs)
-            command_run = True
-        if opt.clean:
-            runfolder.clean_runs(runs, opt.dry_run)
-            command_run = True
-
-        if command_run == False:
-            print "You need to specify a command."+os.linesep
-            parser.print_help()
-    else:
-        print "You need to specify some run folders to process..."+os.linesep
-        parser.print_help()
-
-    return 0
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/runner b/scripts/runner
deleted file mode 100644 (file)
index 560299f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.runner import main
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/spoolwatcher b/scripts/spoolwatcher
deleted file mode 100644 (file)
index b2f833e..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.spoolwatcher import main
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/srf b/scripts/srf
deleted file mode 100755 (executable)
index bcf835d..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/python
-
-import logging
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines import runfolder
-from htsworkflow.pipelines.srf import make_srf_commands, make_qseq_commands, \
-                                      run_commands, pathname_to_run_name
-from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
-
-def make_parser():
-  usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
-
-  parser = optparse.OptionParser(usage)
-  parser.add_option('--dry-run', action='store_true',
-                    help='print what would be done',
-                    default=False)
-
-  parser.add_option('-d', '--dest-dir', dest='dest_dir',
-                    help='location to write srf files to',
-                    default='.')
-  parser.add_option('-s', '--site',
-                    help='site name',
-                    default='Individual')
-  parser.add_option('-l', '--lanes', dest='lanes', action="append",
-         default=[],
-         help='comma seperated list of lanes to add to srf'
-  )
-  parser.add_option('-j', '--jobs', default=1, type='int',
-                    help='how many jobs to run simultaneously')
-  parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
-                    help='Which class of srf file should we attempt to create\n'
-                         '0 = Solexa pipeline 0.2.6 - 0.3\n'
-                         '1 = illumina pipeline 1.0\n'
-                         '2 = illumina pipeline 1.1rc1 and later \n')
-                     
-  parser.add_option('-v', '--verbose', dest='verbose',
-                    default=False, action='store_true',
-                    help='report more about internals (INFO)')
-  parser.add_option('--debug', dest='debug',
-                    default=False, action='store_true',
-                    help='report even more about internals (DEBUG)')
-  return parser
-
-def parse_lane_arg(lane_arg):
-    """
-    Convert comma sperated list of lane ids to a list of integers
-    """
-    lanes = []
-    for lane in lane_arg.split(','):
-        try:
-            lane = int(lane)
-            if lane < 1 or lane > 8:
-                parser.error('Lanes must be in range [1..8]')
-            lanes.append(lane)
-        except ValueError:
-            parser.error('Lane selections must be integers')
-    return lanes
-
-def main(cmdline=None):
-    parser = make_parser()
-    opts, args = parser.parse_args(cmdline)
-   
-    if opts.debug: 
-        logging.basicConfig(level=logging.DEBUG)
-    elif opts.verbose:
-        logging.basicConfig(level=logging.INFO)
-    else:
-        logging.basicConfig(level=logging.WARNING)
-
-    if len(args) == 0:
-        parser.error('need runfolder arguments')
-
-    # parse lane arguemnts
-    lanes_list = []
-    if len(opts.lanes) == 0:
-        lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
-    elif len(opts.lanes) == len(args):
-        for lane_arg in opts.lanes:
-            lanes_list.append(parse_lane_arg(lane_arg))
-    else:
-        parser.error(
-          "Number of lane arguments must match number of runfolders"
-        )
-
-    make_commands = make_qseq_commands
-    # build list of commands
-    cmds = {}
-    for runfolder_path, lanes in zip(args, lanes_list):
-        # normalize paths, either relative to home dirs or current dir
-        runfolder_path = os.path.abspath(runfolder_path)
-        run_name = pathname_to_run_name(runfolder_path)
-        # so any bustard directories?
-        runs = runfolder.get_runs(runfolder_path)
-        # give up if there are anything other than 1 run
-        if len(runs) > 1:
-          print 'ERROR: Too many run directories in %s' %(runfolder_path,)
-          return 1
-        elif len(runs) == 1:
-          bustard_dir = runs[0].bustard.pathname
-          cmds[bustard_dir] = make_commands(run_name,
-                                            bustard_dir,
-                                            lanes,
-                                            opts.site,
-                                            opts.dest_dir,
-                                            opts.runfolder_version)
-        else:
-          print "ERROR: Couldn't find a bustard directory in", runfolder_path
-          return 1
-
-    if not opts.dry_run:
-      for cwd, cmd_list in cmds.items():
-        run_commands(cwd, cmd_list, opts.jobs)
-    else:
-      for cwd, cmd_list in cmds.items():
-        print cwd
-        print cmd_list
-        print 'jobs: ', opts.jobs
-
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/srf2fastq b/scripts/srf2fastq
deleted file mode 100755 (executable)
index 0361e43..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/python
-import sys
-from htsworkflow.pipelines.srf2fastq import main
-
-if __name__ == "__main__":    
-    sys.exit(main(sys.argv[1:]))
index a226a4fb886904251880b9364243d3450c9cb270..f074100418f83647cc60481d8f1af4493763b608 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -21,17 +21,18 @@ setup(
               "htsworkflow.frontend.samples",
               ],
     scripts=[
-        'scripts/copier',
-        'scripts/library.py',
-        'scripts/makebed',
-        'scripts/make-library-tree',
-        'scripts/mark_archived_data',
-        'scripts/qseq2fastq',
-        'scripts/retrieve_config',
-        'scripts/runfolder',
-        'scripts/runner',
-        'scripts/spoolwatcher', 
-        'scripts/srf',
-        'scripts/srf2fastq'
+        "scripts/htsw-copier",
+        "scripts/htsw-eland2bed",
+        "scripts/htsw-elandseq",
+        "scripts/htsw-gerald2bed",
+        "scripts/htsw-get-config",
+        "scripts/htsw-qseq2fastq",
+        "scripts/htsw-record-runfolder",
+        "scripts/htsw-runfolder",
+        "scripts/htsw-runner",
+        "scripts/htsw-spoolwatcher",
+        "scripts/htsw-srf",
+        "scripts/htsw-srf2fastq",
+        "scripts/htsw-update-archive",
         ],
     )