remove all caltech pipeline specific code
authorDiane Trout <diane@caltech.edu>
Sat, 14 Feb 2009 00:33:23 +0000 (00:33 +0000)
committerDiane Trout <diane@caltech.edu>
Sat, 14 Feb 2009 00:33:23 +0000 (00:33 +0000)
17 files changed:
htsworkflow/automation/__init__.py [deleted file]
htsworkflow/automation/copier.py [deleted file]
htsworkflow/automation/runner.py [deleted file]
htsworkflow/automation/spoolwatcher.py [deleted file]
htsworkflow/automation/test/test_runner.py [deleted file]
htsworkflow/util/fctracker.py [deleted file]
scripts/copier [deleted file]
scripts/elandseq [deleted file]
scripts/gerald2bed.py [deleted file]
scripts/library.py [deleted file]
scripts/make-library-tree [deleted file]
scripts/makebed [deleted file]
scripts/rerun_eland.py [deleted file]
scripts/runner [deleted file]
scripts/spoolwatcher [deleted file]
test/test_copier.py [deleted file]
test/tree.py [deleted file]

diff --git a/htsworkflow/automation/__init__.py b/htsworkflow/automation/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/htsworkflow/automation/copier.py b/htsworkflow/automation/copier.py
deleted file mode 100644 (file)
index 572cfb3..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-import ConfigParser
-import copy
-import logging
-import logging.handlers
-import os
-import re
-import subprocess
-import sys
-import time
-import traceback
-
-from benderjab import rpc
-
-def runfolder_validate(fname):
-    """
-    Return True if fname looks like a runfolder name
-    """
-    if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
-        return True
-    else:
-        return False
-    
-class rsync(object):
-  def __init__(self, source, dest, pwfile):
-    self.pwfile = os.path.expanduser(pwfile)
-    self.cmd = ['/usr/bin/rsync', ]
-    self.cmd.append('--password-file=%s' % (self.pwfile))
-    self.source_base = source
-    self.dest_base = dest
-    self.processes = {}
-    self.exit_code = None
-
-  def list(self):
-    """Get a directory listing"""
-    args = copy.copy(self.cmd)
-    args.append(self.source_base)
-
-    logging.debug("Rsync cmd:" + " ".join(args))
-    short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
-    return self.list_filter(short_process.stdout)
-
-  def list_filter(self, lines):
-    """
-    parse rsync directory listing
-    """
-    dirs_to_copy = []
-    direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
-    for permissions, size, filedate, filetime, filename in direntries:
-      if permissions[0] == 'd':
-        # hey its a directory, the first step to being something we want to 
-        # copy
-        if re.match("[0-9]{6}", filename):
-          # it starts with something that looks like a 6 digit date
-          # aka good enough for me
-          dirs_to_copy.append(filename)
-    return dirs_to_copy
-
-  def create_copy_process(self, dirname):
-    args = copy.copy(self.cmd)
-    # we want to copy everything
-    args.append('-rlt') 
-    # from here
-    args.append(os.path.join(self.source_base, dirname))
-    # to here
-    args.append(self.dest_base)
-    logging.debug("Rsync cmd:" + " ".join(args))
-    return subprocess.Popen(args)
-  def copy(self):
-    """
-    copy any interesting looking directories over
-    return list of items that we started copying.
-    """
-    # clean up any lingering non-running processes
-    self.poll()
-    
-    # what's available to copy?
-    dirs_to_copy = self.list()
-    
-    # lets start copying
-    started = []
-    for d in dirs_to_copy:
-      process = self.processes.get(d, None)
-      
-      if process is None:
-        # we don't have a process, so make one
-        logging.info("rsyncing %s" % (d))
-        self.processes[d] = self.create_copy_process(d)
-        started.append(d)           
-    return started
-      
-  def poll(self):
-      """
-      check currently running processes to see if they're done
-      
-      return path roots that have finished.
-      """
-      for dir_key, proc_value in self.processes.items():
-          retcode = proc_value.poll()
-          if retcode is None:
-              # process hasn't finished yet
-              pass
-          elif retcode == 0:
-              logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
-              del self.processes[dir_key]
-          else:
-              logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
-              
-  def __len__(self):
-      """
-      Return how many active rsync processes we currently have
-      
-      Call poll first to close finished processes.
-      """
-      return len(self.processes)
-  
-  def keys(self):
-      """
-      Return list of current run folder names
-      """
-      return self.processes.keys()
-
-class CopierBot(rpc.XmlRpcBot):
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    configfile = '~/.htsworkflow'
-            
-        super(CopierBot, self).__init__(section, configfile)
-        
-        # options for rsync command
-        self.cfg['rsync_password_file'] = None
-        self.cfg['rsync_source'] = None
-        self.cfg['rsync_destination'] = None 
-        
-        # options for reporting we're done 
-        self.cfg['notify_users'] = None
-        self.cfg['notify_runner'] = None
-                            
-        self.pending = []
-        self.rsync = None
-        self.notify_users = None
-        self.notify_runner = None
-        
-        self.register_function(self.startCopy)
-        self.register_function(self.sequencingFinished)
-        self.eventTasks.append(self.update)
-        
-    def read_config(self, section=None, configfile=None):
-        """
-        read the config file
-        """
-        super(CopierBot, self).read_config(section, configfile)
-        
-        password = self._check_required_option('rsync_password_file')
-        source = self._check_required_option('rsync_source')
-        destination = self._check_required_option('rsync_destination')
-        self.rsync = rsync(source, destination, password)
-        
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        try:
-          self.notify_runner = \
-             self._parse_user_list(self.cfg['notify_runner'],
-                                   require_resource=True)
-        except bot.JIDMissingResource:
-            msg = 'need a full jabber ID + resource for xml-rpc destinations'
-            logging.FATAL(msg)
-            raise bot.JIDMissingResource(msg)
-
-    def startCopy(self, *args):
-        """
-        start our copy
-        """
-        logging.info("starting copy scan")
-        started = self.rsync.copy()
-        logging.info("copying:" + " ".join(started)+".")
-        return started
-        
-    def sequencingFinished(self, runDir, *args):
-        """
-        The run was finished, if we're done copying, pass the message on        
-        """
-        # close any open processes
-        self.rsync.poll()
-        
-        # see if we're still copying
-        if runfolder_validate(runDir):
-            logging.info("recevied sequencing finshed for %s" % (runDir))
-            self.pending.append(runDir)
-            self.startCopy()
-            return "PENDING"
-        else:
-            errmsg = "received bad runfolder name (%s)" % (runDir)
-            logging.warning(errmsg)
-            # maybe I should use a different error message
-            raise RuntimeError(errmsg)
-    
-    def reportSequencingFinished(self, runDir):
-        """
-        Send the sequencingFinished message to the interested parties
-        """
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Sequencing run %s finished' % (runDir))
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, (runDir,), 'sequencingFinished')
-        logging.info("forwarding sequencingFinshed message for %s" % (runDir))
-        
-    def update(self, *args):
-        """
-        Update our current status.
-        Report if we've finished copying files.
-        """
-        self.rsync.poll()
-        for p in self.pending:
-            if p not in self.rsync.keys():
-                self.reportSequencingFinished(p)
-                self.pending.remove(p)
-        
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can [copy], or report current [status]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("copy", msg):            
-            started = self.startCopy()
-            reply = u"started copying " + ", ".join(started)
-        elif re.match(u"status", msg):
-            msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
-            for d in self.rsync.keys():
-              msg.append(u"  " + d)
-            reply = os.linesep.join(msg)
-        else:
-            reply = u"I didn't understand '%s'" % (unicode(msg))
-        return reply
-
-def main(args=None):
-    bot = CopierBot()
-    bot.main(args)
-    
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
-
diff --git a/htsworkflow/automation/runner.py b/htsworkflow/automation/runner.py
deleted file mode 100644 (file)
index 45d4ffc..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/env python
-from glob import glob
-import logging
-import os
-import re
-import sys
-import time
-import threading
-
-from benderjab import rpc
-
-from htsworkflow.pipelines.configure_run import *
-
-#s_fc = re.compile('FC[0-9]+')
-s_fc = re.compile('_[0-9a-zA-Z]*$')
-
-
-def _get_flowcell_from_rundir(run_dir):
-    """
-    Returns flowcell string based on run_dir.
-    Returns None and logs error if flowcell can't be found.
-    """
-    junk, dirname = os.path.split(run_dir)
-    mo = s_fc.search(dirname)
-    if not mo:
-        logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
-        return None
-
-    return dirname[mo.start()+1:]
-    
-
-
-class Runner(rpc.XmlRpcBot):
-    """
-    Manage running pipeline jobs.
-    """    
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    self.configfile = "~/.htsworkflow"
-        super(Runner, self).__init__(section, configfile)
-        
-        self.cfg['notify_users'] = None
-        self.cfg['genome_dir'] = None
-        self.cfg['base_analysis_dir'] = None
-
-        self.cfg['notify_users'] = None
-        self.cfg['notify_postanalysis'] = None
-
-        self.conf_info_dict = {}
-        
-        self.register_function(self.sequencingFinished)
-        #self.eventTasks.append(self.update)
-
-    
-    def read_config(self, section=None, configfile=None):
-        super(Runner, self).read_config(section, configfile)
-
-        self.genome_dir = self._check_required_option('genome_dir')
-        self.base_analysis_dir = self._check_required_option('base_analysis_dir')
-
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        #FIXME: process notify_postpipeline cfg
-        
-    
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can send [start] a run, or report [status]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("status", msg):
-            words = msg.split()
-            if len(words) == 2:
-                reply = self.getStatusReport(words[1])
-            else:
-                reply = u"Status available for: %s" \
-                        % (', '.join([k for k in self.conf_info_dict.keys()]))
-        elif re.match(u"start", msg):
-            words = msg.split()
-            if len(words) == 2:
-                self.sequencingFinished(words[1])
-                reply = u"starting run for %s" % (words[1])
-            else:
-                reply = u"need runfolder name"
-        elif re.match(u"path", msg):
-           reply = u"My path is: " + unicode(os.environ['PATH'])
-        else:
-            reply = u"I didn't understand '%s'" %(msg)
-
-        logging.debug("reply: " + str(reply))
-        return reply
-
-
-    def getStatusReport(self, fc_num):
-        """
-        Returns text status report for flow cell number 
-        """
-        if fc_num not in self.conf_info_dict:
-            return "No record of a %s run." % (fc_num)
-
-        status = self.conf_info_dict[fc_num].status
-
-        if status is None:
-            return "No status information for %s yet." \
-                   " Probably still in configure step. Try again later." % (fc_num)
-
-        output = status.statusReport()
-
-        return '\n'.join(output)
-    
-            
-    def sequencingFinished(self, run_dir):
-        """
-        Sequenceing (and copying) is finished, time to start pipeline
-        """
-        logging.debug("received sequencing finished message")
-
-        # Setup config info object
-        ci = ConfigInfo()
-        ci.base_analysis_dir = self.base_analysis_dir
-        ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)        
-
-        # get flowcell from run_dir name
-        flowcell = _get_flowcell_from_rundir(run_dir)
-
-        # Store ci object in dictionary
-        self.conf_info_dict[flowcell] = ci
-
-
-        # Launch the job in it's own thread and turn.
-        self.launchJob(run_dir, flowcell, ci)
-        return "started"
-        
-        
-    def pipelineFinished(self, run_dir):
-        # need to strip off self.watch_dir from rundir I suspect.
-        logging.info("pipeline finished in" + str(run_dir))
-        #pattern = self.watch_dir
-        #if pattern[-1] != os.path.sep:
-        #    pattern += os.path.sep
-        #stripped_run_dir = re.sub(pattern, "", run_dir)
-        #logging.debug("stripped to " + stripped_run_dir)
-
-        # Notify each user that the run has finished.
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Pipeline run %s finished' % (run_dir))
-                
-        #if self.notify_runner is not None:
-        #    for r in self.notify_runner:
-        #        self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
-    def reportMsg(self, msg):
-
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, msg)
-
-
-    def _runner(self, run_dir, flowcell, conf_info):
-
-        # retrieve config step
-        cfg_filepath = os.path.join(conf_info.analysis_dir,
-                                    'config-auto.txt')
-        status_retrieve_cfg = retrieve_config(conf_info,
-                                          flowcell,
-                                          cfg_filepath,
-                                          self.genome_dir)
-        if status_retrieve_cfg:
-            logging.info("Runner: Retrieve config: success")
-            self.reportMsg("Retrieve config (%s): success" % (run_dir))
-        else:
-            logging.error("Runner: Retrieve config: failed")
-            self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
-
-        
-        # configure step
-        if status_retrieve_cfg:
-            status = configure(conf_info)
-            if status:
-                logging.info("Runner: Configure: success")
-                self.reportMsg("Configure (%s): success" % (run_dir))
-                self.reportMsg(
-                    os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
-                )
-            else:
-                logging.error("Runner: Configure: failed")
-                self.reportMsg("Configure (%s): FAILED" % (run_dir))
-
-            #if successful, continue
-            if status:
-                # Setup status cmdline status monitor
-                #startCmdLineStatusMonitor(ci)
-                
-                # running step
-                print 'Running pipeline now!'
-                run_status = run_pipeline(conf_info)
-                if run_status is True:
-                    logging.info('Runner: Pipeline: success')
-                    self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
-                else:
-                    logging.info('Runner: Pipeline: failed')
-                    self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
-
-
-    def launchJob(self, run_dir, flowcell, conf_info):
-        """
-        Starts up a thread for running the pipeline
-        """
-        t = threading.Thread(target=self._runner,
-                        args=[run_dir, flowcell, conf_info])
-        t.setDaemon(True)
-        t.start()
-        
-
-        
-def main(args=None):
-    bot = Runner()
-    return bot.main(args)
-    
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-    
diff --git a/htsworkflow/automation/spoolwatcher.py b/htsworkflow/automation/spoolwatcher.py
deleted file mode 100644 (file)
index 2a57535..0000000
+++ /dev/null
@@ -1,229 +0,0 @@
-#!/usr/bin/env python
-import logging
-import os
-import re
-import sys
-import time
-#import glob
-
-from htsworkflow.util import mount
-
-# this uses pyinotify
-import pyinotify
-from pyinotify import EventsCodes
-
-from benderjab import rpc
-
-
-class WatcherEvents(object):
-    # two events need to be tracked
-    # one to send startCopy
-    # one to send OMG its broken
-    # OMG its broken needs to stop when we've seen enough
-    #  cycles
-    # this should be per runfolder. 
-    # read the xml files 
-    def __init__(self):
-        pass
-        
-
-class Handler(pyinotify.ProcessEvent):
-    def __init__(self, watchmanager, bot):
-        self.last_event_time = None
-        self.watchmanager = watchmanager
-        self.bot = bot
-
-    def process_IN_CREATE(self, event):
-        self.last_event_time = time.time()
-        msg = "Create: %s" %  os.path.join(event.path, event.name)
-        if event.name.lower() == "run.completed":
-            try:
-                self.bot.sequencingFinished(event.path)
-            except IOError, e:
-                logging.error("Couldn't send sequencingFinished")
-        logging.debug(msg)
-
-    def process_IN_DELETE(self, event):
-        logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
-
-    def process_IN_UNMOUNT(self, event):
-        pathname = os.path.join(event.path, event.name)
-        logging.debug("IN_UNMOUNT: %s" % (pathname,))
-        self.bot.unmount_watch()
-
-class SpoolWatcher(rpc.XmlRpcBot):
-    """
-    Watch a directory and send a message when another process is done writing.
-    
-    This monitors a directory tree using inotify (linux specific) and
-    after some files having been written will send a message after <timeout>
-    seconds of no file writing.
-    
-    (Basically when the solexa machine finishes dumping a round of data
-    this'll hopefully send out a message saying hey look theres data available
-    
-    """
-    # these params need to be in the config file
-    # I wonder where I should put the documentation
-    #:Parameters:
-    #    `watchdir` - which directory tree to monitor for modifications
-    #    `profile` - specify which .htsworkflow profile to use
-    #    `write_timeout` - how many seconds to wait for writes to finish to
-    #                      the spool
-    #    `notify_timeout` - how often to timeout from notify
-    
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    self.configfile = "~/.htsworkflow"
-        super(SpoolWatcher, self).__init__(section, configfile)
-        
-        self.cfg['watchdir'] = None
-        self.cfg['write_timeout'] = 10
-        self.cfg['notify_users'] = None
-        self.cfg['notify_runner'] = None
-        
-        self.notify_timeout = 0.001
-        self.wm = pyinotify.WatchManager()
-        self.handler = Handler(self.wm, self)
-        self.notifier = pyinotify.Notifier(self.wm, self.handler)
-        self.wdd = None
-        self.mount_point = None
-        self.mounted = True
-        
-        self.notify_users = None
-        self.notify_runner = None
-        
-        self.eventTasks.append(self.process_notify)
-
-    def read_config(self, section=None, configfile=None):
-        super(SpoolWatcher, self).read_config(section, configfile)
-        
-        self.watch_dir = self._check_required_option('watchdir')
-        self.write_timeout = int(self.cfg['write_timeout'])
-        
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        try:
-          self.notify_runner = \
-             self._parse_user_list(self.cfg['notify_runner'],
-                                   require_resource=True)
-        except bot.JIDMissingResource:
-            msg = 'need a full jabber ID + resource for xml-rpc destinations'
-            logging.FATAL(msg)
-            raise bot.JIDMissingResource(msg)
-
-    def add_watch(self, watchdir=None):
-        """
-        start watching watchdir or self.watch_dir
-        we're currently limited to watching one directory tree.
-        """
-        # the one tree limit is mostly because self.wdd is a single item
-        # but managing it as a list might be a bit more annoying
-        if watchdir is None:
-            watchdir = self.watch_dir
-        logging.info("Watching:"+str(watchdir))
-
-        self.mount_point = mount.find_mount_point_for(watchdir)
-
-        mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
-        # rec traverses the tree and adds all the directories that are there
-        # at the start.
-        # auto_add will add in new directories as they are created
-        self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
-
-    def unmount_watch(self):
-        if self.wdd is not None:
-            self.wm.rm_watch(self.wdd.values())
-            self.wdd = None
-            self.mounted = False
-            
-    def process_notify(self, *args):
-        # process the queue of events as explained above
-        self.notifier.process_events()
-        #check events waits timeout
-        if self.notifier.check_events(self.notify_timeout):
-            # read notified events and enqeue them
-            self.notifier.read_events()
-            # should we do something?
-        # has something happened?
-        last_event_time = self.handler.last_event_time
-        if last_event_time is not None:
-            time_delta = time.time() - last_event_time
-            if time_delta > self.write_timeout:
-                self.startCopy()
-                self.handler.last_event_time = None
-        # handle unmounted filesystems
-        if not self.mounted:
-            if mount.is_mounted(self.mount_point):
-                # we've been remounted. Huzzah!
-                # restart the watch
-                self.add_watch()
-                self.mounted = True
-                logging.info(
-                    "%s was remounted, restarting watch" % \
-                        (self.mount_point)
-                )
-
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can send [copy] message, or squencer [finished]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("copy", msg):            
-            self.startCopy()
-            reply = u"sent copy message"
-        elif re.match(u"finished", msg):
-            words = msg.split()
-            if len(words) == 2:
-                self.sequencingFinished(words[1])
-                reply = u"sending sequencing finished for %s" % (words[1])
-            else:
-                reply = u"need runfolder name"
-        else:
-            reply = u"I didn't understand '%s'" %(msg)            
-        return reply
-        
-    def start(self, daemonize):
-        """
-        Start application
-        """
-        self.add_watch()
-        super(SpoolWatcher, self).start(daemonize)
-        
-    def stop(self):
-        """
-        shutdown application
-        """
-        # destroy the inotify's instance on this interrupt (stop monitoring)
-        self.notifier.stop()
-        super(SpoolWatcher, self).stop()
-    
-    def startCopy(self):
-        logging.debug("writes seem to have stopped")
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, tuple(), 'startCopy')
-        
-    def sequencingFinished(self, run_dir):
-        # need to strip off self.watch_dir from rundir I suspect.
-        logging.info("run.completed in " + str(run_dir))
-        pattern = self.watch_dir
-        if pattern[-1] != os.path.sep:
-            pattern += os.path.sep
-        stripped_run_dir = re.sub(pattern, "", run_dir)
-        logging.debug("stripped to " + stripped_run_dir)
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-        
-def main(args=None):
-    bot = SpoolWatcher()
-    return bot.main(args)
-    
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/htsworkflow/automation/test/test_runner.py b/htsworkflow/automation/test/test_runner.py
deleted file mode 100644 (file)
index 6c3b9df..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-import unittest
-
-
-import os
-from htsworkflow.automation.copier import runfolder_validate
-
-def extract_runfolder_path(watchdir, event):
-  runfolder_path = watchdir
-  path = event.path
-  if not path.startswith(watchdir):
-    return None
-
-  fragments = path[len(watchdir):].split(os.path.sep)
-  for f in fragments:
-    runfolder_path = os.path.join(runfolder_path, f)
-    if runfolder_validate(f):
-      return runfolder_path
-  return None
-
-class Event(object):
-  def __init__(self, path=None, name=None):
-    self.path = path
-    self.name = name
-
-class testRunner(unittest.TestCase):
-
-    def test_extract_runfolder(self):
-        watchdir = os.path.join('root', 'server', 'mount')
-        runfolder = os.path.join(watchdir, '080909_HWI-EAS229_0052_1234ABCD')
-        ipar = os.path.join(runfolder, 'Data', 'IPAR_1.01')
-        other = os.path.join(watchdir, 'other')
-
-        event = Event( path=runfolder )
-        self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder)
-        
-        event = Event( path=ipar )
-        self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder)
-
-        event = Event( path=other)
-        self.failUnlessEqual(extract_runfolder_path(watchdir, event), None )
-        
-def suite():
-    return unittest.makeSuite(testRunner,'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
diff --git a/htsworkflow/util/fctracker.py b/htsworkflow/util/fctracker.py
deleted file mode 100644 (file)
index 57b5dcf..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-import datetime
-import os
-import re
-import sys
-import time
-
-if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
-  # we're python 2.5
-  import sqlite3
-else:
-  import pysqlite2.dbapi2 as sqlite3
-
-
-class fctracker:
-    """
-    provide a simple way to interact with the flowcell data in fctracker.db
-    """
-    def __init__(self, database):
-        # default to the current directory
-        if database is None: 
-            self.database = self._guess_fctracker_path()
-        else:
-            self.database = database
-        self.conn = sqlite3.connect(self.database)
-        self._get_library()
-        self._get_species()
-
-    def _guess_fctracker_path(self):
-        """
-        Guess a few obvious places for the database
-        """
-        fctracker = 'fctracker.db'
-        name = fctracker
-        # is it in the current dir?
-        if os.path.exists(name): 
-            return name
-        name = os.path.expanduser(os.path.join('~', fctracker))
-        if os.path.exists(name):
-            return name
-        raise RuntimeError("Can't find fctracker")
-
-    def _make_dict_from_table(self, table_name, pkey_name):
-        """
-        Convert a django table into a dictionary indexed by the primary key.
-        Yes, it really does just load everything into memory, hopefully
-        we stay under a few tens of thousands of runs for a while.
-        """
-        table = {}
-        c = self.conn.cursor()
-        c.execute('select * from %s;' % (table_name))
-        # extract just the field name
-        description = [ f[0] for f in c.description]
-        for row in c:
-            row_dict = dict(zip(description, row))
-            table[row_dict[pkey_name]] = row_dict
-        c.close()
-        return table
-
-    def _add_lanes_to_libraries(self):
-        """
-        add flowcell/lane ids to new attribute 'lanes' in the library dictionary
-        """
-        library_id_re = re.compile('lane_\d_library_id')
-
-        for fc_id, fc in self.flowcells.items():
-            lane_library = [ (x[0][5], x[1]) for x in fc.items() 
-                                             if library_id_re.match(x[0]) ]
-            for lane, library_id in lane_library:
-                if not self.library[library_id].has_key('lanes'):
-                    self.library[library_id]['lanes'] = []
-                self.library[library_id]['lanes'].append((fc_id, lane))
-
-    def _get_library(self):
-        """
-        attach the library dictionary to the instance
-        """
-        self.library = self._make_dict_from_table(
-                         'fctracker_library', 
-                         'library_id')
-                                                  
-        
-    def _get_species(self):
-        """
-        attach the species dictionary to the instance
-        """
-        self.species = self._make_dict_from_table(
-                         'fctracker_species',
-                         'id'
-                       )
-        
-    def _get_flowcells(self, where=None):
-        """
-        attach the flowcell dictionary to the instance
-
-        where is a sql where clause. (eg "where run_date > '2008-1-1'")
-        that can be used to limit what flowcells we select
-        FIXME: please add sanitization code
-        """
-        if where is None:
-            where = ""
-        self.flowcells = {}
-        c = self.conn.cursor()
-        c.execute('select * from fctracker_flowcell %s;' % (where))
-        # extract just the field name
-        description = [ f[0] for f in c.description ]
-        for row in c:
-            row_dict = dict(zip(description, row))
-            fcid, status = self._parse_flowcell_id(row_dict)
-            row_dict['flowcell_id'] = fcid
-            row_dict['flowcell_status'] = status
-
-            for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
-                lane_library = self.library[row_dict[lane+"_id"]]
-                species_id = lane_library['library_species_id']
-                lane_library['library_species'] = self.species[species_id]
-                row_dict[lane] = lane_library
-            # some useful parsing
-            run_date = time.strptime(row_dict['run_date'],  '%Y-%m-%d %H:%M:%S')
-            run_date = datetime.datetime(*run_date[:6])
-            row_dict['run_date'] = run_date
-            self.flowcells[row_dict['flowcell_id']] = row_dict
-
-        self._add_lanes_to_libraries()
-        return self.flowcells
-
-    def _parse_flowcell_id(self, flowcell_row):
-      """
-      Return flowcell id and status
-      
-      We stored the status information in the flowcell id name.
-      this was dumb, but database schemas are hard to update.
-      """
-      fields = flowcell_row['flowcell_id'].split()
-      fcid = None
-      status = None
-      if len(fields) > 0:
-        fcid = fields[0]
-      if len(fields) > 1:
-        status = fields[1]
-      return fcid, status
-      
-
-def flowcell_gone(cell):
-    """
-    Use a variety of heuristics to determine if the flowcell drive
-    has been deleted.
-    """
-    status = cell['flowcell_status']
-    if status is None:
-        return False
-    failures = ['failed', 'deleted', 'not run']
-    for f in failures:
-      if re.search(f, status):
-        return True
-    else:
-      return False
-
-def recoverable_drive_report(flowcells):
-    """
-    Attempt to report what flowcells are still on a hard drive
-    """
-    def format_status(status):
-      if status is None:
-        return ""
-      else:
-        return status+" "
-
-    # sort flowcells by run date
-    flowcell_list = []
-    for key, cell in flowcells.items():
-        flowcell_list.append( (cell['run_date'], key) )
-    flowcell_list.sort()
-
-    report = []
-    line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
-    line += "%(species)s"
-    for run_date, flowcell_id in flowcell_list:
-        cell = flowcells[flowcell_id]
-        if flowcell_gone(cell):
-            continue
-        for l in range(1,9):
-            lane = 'lane_%d' % (l)
-            cell_library = cell['%s_library'%(lane)]
-            fields = {
-              'date': cell['run_date'].strftime('%y-%b-%d'),
-              'id': cell['flowcell_id'],
-              'lane': l,
-              'library_name': cell_library['library_name'],
-              'library_id': cell['%s_library_id'%(lane)],
-              'species': cell_library['library_species']['scientific_name'],
-              'status': format_status(cell['flowcell_status']),
-            }
-            report.append(line % (fields))
-    return os.linesep.join(report)
-
diff --git a/scripts/copier b/scripts/copier
deleted file mode 100644 (file)
index 9338b07..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.copier import main
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/elandseq b/scripts/elandseq
deleted file mode 100755 (executable)
index 6a5178c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines.eland import extract_eland_sequence
-
-def make_parser():
-  usage = "usage: %prog [options] infile [outfile]"
-
-  parser = optparse.OptionParser(usage)
-  parser.add_option("-e", "--extract", dest="slice",
-    default=":",
-    help="provide a python slice operator to select a portion of an eland file")
-  return parser
-
-def main(argv):
-  parser = make_parser()
-
-  (opt, args) = parser.parse_args(argv)
-
-  if len(args) not in (0, 1, 2):
-    parser.error('incorrect number of arguments')
-
-  # get our slice coordinates
-  start, end = opt.slice.split(':')
-  if len(start) > 0:
-    start = int(start)
-  else:
-    start = None
-  if len(end) > 0:
-    end = int(end)
-  else:
-    end = None
-
-  # open infile
-  if len(args) > 0:
-    instream = open(args[0],'r')
-  else:
-    instream = sys.stdin
-
-  if len(args) > 1:
-    outstream = open(args[1],'w')
-  else:
-    outstream = sys.stdout
-
-  extract_eland_sequence(instream, outstream, start, end)
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/gerald2bed.py b/scripts/gerald2bed.py
deleted file mode 100644 (file)
index 7a726e7..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/python
-"""
-Convert a group of eland_result files from a sequencer run to bed files.
-"""
-from glob import glob
-import logging
-import optparse
-import sys
-import os
-
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
-
-def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
-    """
-    convert s_[1-8]_eland_result.txt to corresponding bed files
-    """
-    eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
-    out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
-    if len(out_files) > 0:
-        raise RuntimeError("please move old bedfiles")
-
-    logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
-    for pathname in eland_files:
-        path, name = os.path.split(pathname)
-        lane = int(name[2])
-        outname = 's_%d_eland_result.bed' %(lane,)
-        logging.info('Converting lane %d to %s' % (lane, outname))
-
-        outpathname = os.path.join(eland_dir, outname)
-        # look up descriptions
-        bed_name, description = make_description(database, flowcell, lane)
-
-        # open files
-        instream = open(pathname,'r')
-        outstream = open(outpathname,'w')
-
-        make_bed_from_eland_stream(
-          instream, outstream, name, description, prefix
-        )
-
-def make_parser():
-  usage = """%prog: --flowcell <flowcell id> directory_name
-
-directory should contain a set of 8 eland result files named like
-s_[12345678]_eland_result.txt"""
-
-
-  parser = optparse.OptionParser(usage)
-
-  parser.add_option('-o', '--output', dest='output',
-                    help="destination directory for our bed files" \
-                         "defaults to eland directory",
-                    default=None)
-  parser.add_option('--chromosome', dest='prefix',
-                    help='Set the chromosome prefix name. defaults to "chr"',
-                    default='chr')
-  parser.add_option("--database", dest='database',
-                    help="specify location of fctracker database",
-                    default=None)
-  parser.add_option("--flowcell", dest='flowcell',
-                    help="specify the flowcell id for this run",
-                    default=None)
-  parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
-                    help='increase verbosity',
-                    default=False)
-  return parser
-
-def main(command_line=None):
-    logging.basicConfig(level=logging.WARNING)
-    if command_line is None:
-        command_line = sys.argv[1:]
-
-    parser = make_parser()
-    (opts, args) = parser.parse_args(command_line)
-
-    if len(args) != 1:
-        parser.error('Directory name required')
-
-    eland_dir = args[0]
-    if not os.path.isdir(eland_dir):
-        parser.error('%s must be a directory' % (eland_dir,))
-
-    if opts.flowcell is None:
-        parser.error('Flowcell ID required')
-
-    if opts.verbose:
-        logger = logging.getLogger()
-        logger.setLevel(logging.INFO)
-
-    make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
-
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/library.py b/scripts/library.py
deleted file mode 100644 (file)
index 35532f4..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-from optparse import OptionParser
-import sys
-
-from htsworkflow.util import fctracker
-
-def make_parser():
-    """
-    Make parser
-    """
-    parser = OptionParser()
-    parser.add_option("-d", "--database", dest="database",
-                      help="path to the fctracker.db",
-                      default=None)
-    parser.add_option("-w", "--where", dest="where",
-                      help="add a where clause",
-                      default=None)
-    return parser
-
-def main(argv=None):
-    if argv is None:
-        argv = []
-    parser = make_parser()
-
-    opt, args = parser.parse_args(argv)
-    
-    fc = fctracker.fctracker(opt.database)
-    cells = fc._get_flowcells(opt.where)
-
-    print fctracker.recoverable_drive_report(cells)
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/make-library-tree b/scripts/make-library-tree
deleted file mode 100644 (file)
index 67a9282..0000000
+++ /dev/null
@@ -1,225 +0,0 @@
-"""
-Make a tree of symlinks organized by library id.
-"""
-from ConfigParser import SafeConfigParser
-from glob import glob
-import logging
-from optparse import OptionParser
-import os
-import stat
-import sys
-
-from htsworkflow.util import fctracker
-
-
-def find_lanes(flowcell_dir, flowcell_id, lane):
-    lane_name = "s_%s_eland_result*" %(lane)
-    pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
-    lanes = glob(pattern)
-    return lanes
-
-def make_long_lane_name(flowcell_dir, lane_pathname):
-    """
-    make a name from the eland result file name
-    """
-    if flowcell_dir == lane_pathname[0:len(flowcell_dir)]:
-        subpath = lane_pathname[len(flowcell_dir):]
-        long_name = subpath.replace(os.path.sep, "_")
-        return long_name
-    else:
-        return None
-    
-def parse_srf_directory(srf_dir):
-    """
-    search srf_dir for *.srf files
-
-    builds a dictionary indexed by flowcell name.
-    """
-    flowcells = {}
-    srfs = glob(os.path.join(srf_dir,'*.srf'))
-    for pathname in srfs:
-        path, filename = os.path.split(pathname)
-        basename, ext = os.path.splitext(filename)
-        record = basename.split('_')
-        assert len(record) == 6
-
-        site = record[0]
-        date = record[1]
-        machine = record[2]
-        runid = record[3]
-        flowcellid = record[4]
-        laneid = record[5]
-
-        desc = "_".join([site,date,machine,runid,flowcellid])
-        flowcells[flowcellid] = desc
-    return flowcells
-
-
-def carefully_make_hardlink(source, destination, dry_run=False):
-    """
-    Make a hard link, failing if a different link already exists
-
-    Checking to see if the link already exists and is
-    the same as the link we want to make.
-    If the link already exists and is different, throw an error.
-    """
-    logging.debug("%s -> %s", source, destination)
-
-    if not os.path.exists(source):
-        logging.warning("%s doesn't exist", source)
-        return
-
-    if os.path.exists(destination):
-        if os.path.samefile(source, destination):
-            return
-        else:
-            raise IOError('%s and %s are different files' % \
-                           (source, destination))
-
-    if dry_run: return 
-
-    os.link(source, destination)
-    os.chmod(destination,
-             stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
-
-def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run):
-    """
-    find eland files at different alignment lengths
-    and put each of those in the file 
-    """
-    lanes = find_lanes(flowcell_dir, flowcell_id, lane)
-    for lane_pathname in lanes:
-        long_name = make_long_lane_name(flowcell_dir, 
-                                        lane_pathname)
-        long_pathname = os.path.join(library_path, long_name)
-        carefully_make_hardlink(lane_pathname,
-                                long_pathname,
-                                dry_run)
-
-def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run):
-    """
-    Link srf files into our library directories.
-
-    the srf files must be named:
-    <site>_<date>_<machine>_<run>_<flowcellid>_<lane>.srf
-    """
-    srf_basename = srf_names.get(flowcell_id, None)
-    if srf_basename is None:
-        logging.info("srf file for %s was not found", flowcell_id)
-    else:
-        srf_filename = "%s_%s.srf" % (srf_basename, lane)
-        source = os.path.join(srf_dir, srf_filename)
-        destination = os.path.join(library_path, srf_filename)
-        carefully_make_hardlink(source, destination, dry_run)
-    
-
-def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir,
-                      dry_run=False):
-    """
-    Iterate over the library 
-    """
-    library_dir = os.path.normpath(library_dir) + os.path.sep
-    flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep
-    srfs_dir = os.path.normpath(srfs_dir) + os.path.sep
-
-    srf_names = parse_srf_directory(srfs_dir)
-
-    for lib_id, lib in fcdb.library.items():
-        library_path = os.path.join(library_dir, str(lib_id))
-        if not os.path.exists(library_path):
-            os.mkdir(library_path)
-
-        for flowcell_id, lane in lib.get('lanes', []):
-            link_all_eland_lanes(library_path, 
-                                 flowcell_dir, 
-                                 flowcell_id, 
-                                 lane, 
-                                 dry_run)
-
-            link_srf_lanes(srf_names, 
-                           library_path, 
-                           srfs_dir,
-                           flowcell_id,
-                           lane,
-                           dry_run)
-
-def make_parser():
-    """
-    Make parser
-    """
-    parser = OptionParser()
-    parser.add_option('-c', '--config', default=None,
-                      help='path to a configuration file containing a '
-                           'sequence archive section')
-                      
-    parser.add_option("-d", "--database", dest="database",
-                      help="path to the fctracker.db",
-                      default=None)
-    parser.add_option('-a', '--sequence-archive', default=None,
-                      help='path to where the sequence archive lives')
-    parser.add_option("-w", "--where", dest="where",
-                      help="add a where clause",
-                      default=None)
-
-    parser.add_option("--dry-run", dest="dry_run", action="store_true",
-                      default=False,
-                      help="Don't modify the filesystem")
-    return parser
-
-def main(argv=None):
-    logging.basicConfig(level=logging.INFO)
-
-    FRONTEND_NAME = 'frontend'
-    SECTION_NAME = 'sequence_archive'
-    DATABASE_OPT = 'database_name'
-    ARCHIVE_OPT = 'archive_path'
-
-    if argv is None:
-        argv = []
-    parser = make_parser()
-
-    # parse command line arguments
-    opt, args = parser.parse_args(argv)
-
-    # figure out what config file to read
-    config_path = [os.path.expanduser('~/.htsworkflow.ini'),
-                   '/etc/htsworkflow.ini']
-    if opt.config is not None:
-        config_path = [opt.config]
-    
-    # parse options from config file
-    config_file = SafeConfigParser()
-    config_file.read(config_path)
-
-    # load defaults from config file if not overriden by the command line
-    print opt.database
-    if opt.database is None and \
-       config_file.has_option(FRONTEND_NAME, DATABASE_OPT):
-        opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT)
-
-    if opt.sequence_archive is None and \
-       config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
-        opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
-  
-    # complain if critical things are missing
-    if opt.database is None:
-       parser.error('Need location of htsworkflow frontend database')
-
-    if opt.sequence_archive is None:
-       parser.error('Need the root path for the sequence archive')
-
-    fcdb = fctracker.fctracker(opt.database)
-    cells = fcdb._get_flowcells(opt.where)
-
-    library_dir = os.path.join(opt.sequence_archive, 'libraries')
-    flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells')
-    srfs_dir = os.path.join(opt.sequence_archive, 'srfs')
-    make_library_tree(fcdb, 
-                      library_dir, flowcell_dir, srfs_dir, 
-                      opt.dry_run)
-
-    return 0
-
-if __name__ == "__main__":
-    rv = main(sys.argv[1:])
-    # sys.exit(rv)
diff --git a/scripts/makebed b/scripts/makebed
deleted file mode 100755 (executable)
index 577b868..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/python
-import optparse
-import sys
-import os
-
-from htsworkflow.util.opener import autoopen
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
-
-def make_parser():
-  parser = optparse.OptionParser()
-  parser.add_option('-e', '--eland', dest='inname',
-                    help='specify input eland filename')
-  parser.add_option('-b', '--bed', dest='outname',
-                    help='specify output befilename')
-  parser.add_option('-n', '--name', dest='name',
-                    help='specify the track (short) name.',
-                    default=None)
-  parser.add_option('-d', '--description', dest='description',
-                    help='specify the track description',
-                    default=None)
-  parser.add_option('--chromosome', dest='prefix',
-                    help='Set the chromosome prefix name. defaults to "chr"',
-                    default='chr')
-  parser.add_option("--database", dest='database',
-                    help="specify location of fctracker database",
-                    default=None)
-  parser.add_option("--flowcell", dest='flowcell',
-                    help="compute name and description from database using flowcell id",
-                    default=None)
-  parser.add_option("--lane", dest='lane',
-                    help='specify which lane to use when retrieving description from database',
-                    default=None)
-
-  multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
-
-  multi.add_option('-m', '--multi', action='store_true',
-                    help='Enable parsing multi-read eland files',
-                    default=False)
-  multi.add_option('--reads', type='int',
-                   help='limit reporting multi reads to this many reads'
-                        '(most usefully --reads=1 will turn a multi-read '
-                        'file into a single read file)',
-                   default=255)
-  parser.add_option_group(multi)
-
-  return parser
-
-def main(command_line=None):
-  instream = None
-  outstream = None
-
-  if command_line is None:
-    command_line = sys.argv[1:]
-
-  parser = make_parser()
-  (options, args) = parser.parse_args(command_line)
-
-  if options.inname is None:
-    parser.error("Need eland input file name")
-    return 1
-
-  if options.inname == '-':
-    instream = sys.stdin
-  elif os.path.exists(options.inname):
-    instream = autoopen(options.inname, 'r')
-  else:
-    parser.error('%s was not found' % (options.inname))
-    return 1
-
-  # figure out name for output file
-  if options.outname is None:
-      # if outname wasn't defined, and we're reading from stdout
-      if instream is sys.stdin:
-          # write to stdout
-          outstream = sys.stdout
-      else:
-          # if there's a name write to name.bed
-          options.outname = os.path.splitext(options.inname)[0]+'.bed'
-          print >>sys.stderr, "defaulting to outputname", options.outname
-  elif options.outname == '-':
-      outstream = sys.stdout
-
-  if outstream is None:
-      if os.path.exists(options.outname):
-          parser.error("not overwriting %s" % (options.outname))
-          return 1
-      else:
-          outstream = open(options.outname, 'w')
-
-  if options.flowcell is not None and options.lane is not None:
-    # get our name/description out of the database
-    name, description = make_description(
-                           options.database, options.flowcell, options.lane
-                        )
-  else:
-    name = options.name
-    description = options.description
-
-  if options.multi:
-    make_bed_from_multi_eland_stream(instream, outstream, 
-                                     name, description, 
-                                     options.prefix,
-                                     options.reads)
-
-  else:
-    make_bed_from_eland_stream(instream, outstream, 
-                               name, description, 
-                               options.prefix)
-  return 0
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
-
diff --git a/scripts/rerun_eland.py b/scripts/rerun_eland.py
deleted file mode 100644 (file)
index af06cdd..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python
-
-import logging
-from optparse import OptionParser
-import os
-import subprocess
-import sys
-
-from htsworkflow.pipelines import gerald
-from htsworkflow.pipelines.eland import extract_eland_sequence
-from htsworkflow.pipelines import runfolder
-
-def make_query_filename(eland_obj, output_dir):
-    query_name = '%s_%s_eland_query.txt' 
-    query_name %= (eland_obj.sample_name, eland_obj.lane_id)
-
-    query_pathname = os.path.join(output_dir, query_name)
-    
-    if os.path.exists(query_pathname):
-        logging.warn("overwriting %s" % (query_pathname,))
-
-    return query_pathname
-
-def make_result_filename(eland_obj, output_dir):
-    result_name = '%s_%s_eland_result.txt' 
-    result_name %= (eland_obj.sample_name, eland_obj.lane_id)
-
-    result_pathname = os.path.join(output_dir, result_name)
-    
-    if os.path.exists(result_pathname):
-        logging.warn("overwriting %s" % (result_pathname,))
-
-    return result_pathname
-
-def extract_sequence(inpathname, query_pathname, length, dry_run=False):
-    logging.info('extracting %d bases' %(length,))
-    logging.info('extracting from %s' %(inpathname,))
-    logging.info('extracting to %s' %(query_pathname,))
-    
-    if not dry_run: 
-        try:
-            instream = open(inpathname, 'r')
-            outstream = open(query_pathname, 'w')
-            extract_eland_sequence(instream, outstream, 0, length)
-        finally:
-            outstream.close()
-            instream.close()
-    
-def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False):
-    cmdline = ['eland_%d' % (length,), query_name, genome, result_name]
-    if multi:
-        cmdline += ['--multi']
-
-    logging.info('running eland: ' + " ".join(cmdline))
-    if not dry_run:
-        return subprocess.Popen(cmdline)
-    else:
-        return None
-
-
-def rerun(gerald_dir, output_dir, length=25, dry_run=False):
-    """
-    look for eland files in gerald_dir and write a subset to output_dir
-    """
-    logging.info("Extracting %d bp from files in %s" % (length, gerald_dir))
-    g = gerald.gerald(gerald_dir)
-
-    # this will only work if we're only missing the last dir in output_dir
-    if not os.path.exists(output_dir):
-        logging.info("Making %s" %(output_dir,))
-        if not dry_run: os.mkdir(output_dir)
-
-    processes = []
-    for lane_id, lane_param in g.lanes.items():
-        eland = g.eland_results[lane_id]
-
-        inpathname = eland.pathname
-        query_pathname = make_query_filename(eland, output_dir)
-        result_pathname = make_result_filename(eland, output_dir)
-
-        extract_sequence(inpathname, query_pathname, length, dry_run=dry_run)
-
-        p = run_eland(length, 
-                      query_pathname, 
-                      lane_param.eland_genome, 
-                      result_pathname, 
-                      dry_run=dry_run)
-        if p is not None:
-            processes.append(p)
-
-    for p in processes:
-        p.wait()
-        
-def make_parser():
-    usage = '%prog: [options] runfolder'
-
-    parser = OptionParser(usage)
-    
-    parser.add_option('--gerald', 
-                      help='specify location of GERALD directory',
-                      default=None)
-    parser.add_option('-o', '--output',
-                      help='specify output location of files',
-                      default=None)
-    parser.add_option('-l', '--read-length', type='int',
-                      help='specify new eland length',
-                      dest='length',
-                      default=25)
-    parser.add_option('--dry-run', action='store_true',
-                      help='only pretend to run',
-                      default=False)
-    parser.add_option('-v', '--verbose', action='store_true',
-                      help='increase verbosity',
-                      default=False)
-
-    return parser
-
-
-def main(cmdline=None):
-    logging.basicConfig(level=logging.WARNING)
-
-    parser = make_parser()
-    opts, args = parser.parse_args(cmdline)
-
-    if opts.length < 16 or opts.length > 32:
-        parser.error("eland can only process reads in the range 16-32")
-
-    if len(args) > 1:
-        parser.error("Can only process one runfolder directory")
-    elif len(args) == 1:
-        runs = runfolder.get_runs(args[0])
-        if len(runs) != 1:
-            parser.error("Not a runfolder")
-        opts.gerald = runs[0].gerald.pathname
-        if opts.output is None:
-            opts.output = os.path.join(
-                runs[0].pathname, 
-                'Data', 
-                # pythons 0..n ==> elands 1..n+1
-                'C1-%d' % (opts.length+1,) 
-            )
-
-    elif opts.gerald is None:
-        parser.error("need gerald directory")
-    
-    if opts.output is None:
-        parser.error("specify location for the new eland files")
-
-    if opts.verbose:
-        root_logger = logging.getLogger()
-        root_logger.setLevel(logging.INFO)
-
-    rerun(opts.gerald, opts.output, opts.length, dry_run=opts.dry_run)
-
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/runner b/scripts/runner
deleted file mode 100644 (file)
index 560299f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.runner import main
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
diff --git a/scripts/spoolwatcher b/scripts/spoolwatcher
deleted file mode 100644 (file)
index b2f833e..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.spoolwatcher import main
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/test/test_copier.py b/test/test_copier.py
deleted file mode 100644 (file)
index 9a1b738..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-import unittest
-
-from StringIO import StringIO
-from htsworkflow.automation import copier
-
-class testCopier(unittest.TestCase):
-    def test_runfolder_validate(self):
-        self.failUnlessEqual(copier.runfolder_validate(""), False)
-        self.failUnlessEqual(copier.runfolder_validate("1345_23"), False)
-        self.failUnlessEqual(copier.runfolder_validate("123456_asdf-$23'"), False)
-        self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44"), True)
-        self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44 "), False)
-        
-    def test_empty_config(self):
-        cfg = StringIO("""[fake]
-something: unrelated
-""")
-        bot = copier.CopierBot('fake', configfile=cfg)
-        self.failUnlessRaises(RuntimeError, bot.read_config)
-        
-    def test_full_config(self):
-        cfg = StringIO("""[copier]        
-jid: copier@example.fake
-password: badpassword
-authorized_users: user1@example.fake user2@example.fake
-rsync_password_file: ~/.sequencer
-rsync_source: /tmp/sequencer_source
-rsync_destination: /tmp/sequencer_destination
-notify_users: user3@example.fake
-# who to run to
-#runner:
-""")
-        c = copier.CopierBot("copier", configfile=cfg)
-        c.read_config()
-        self.failUnlessEqual(c.jid, 'copier@example.fake')
-        self.failUnlessEqual(c.cfg['password'], 'badpassword')
-        self.failUnlessEqual(len(c.authorized_users), 2)
-        self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake')
-        self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake')
-        self.failUnlessEqual(c.rsync.source_base, '/tmp/sequencer_source')
-        self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination')
-        self.failUnlessEqual(len(c.notify_users), 1)
-        self.failUnlessEqual(c.notify_users[0], 'user3@example.fake')
-
-    def test_dirlist_filter(self):
-       """
-       test our dir listing parser
-       """
-       # everyone should have a root dir, and since we're not
-       # currently writing files... it should all be good
-       r = copier.rsync('/', '/', '/')
-
-       listing = [
-         'drwxrwxr-x           0 2007/12/29 12:34:56 071229_USI-EAS229_001_FC1234\n',
-         '-rwxrw-r--      123268 2007/12/29 17:39:31 2038EAAXX.rtf\n',
-         '-rwxrw-r--           6 2007/12/29 15:10:29 New Text Document.txt\n',
-       ]
-
-       result = r.list_filter(listing)
-       self.failUnlessEqual(len(result), 1)
-       self.failUnlessEqual(result[0][-1], '4')
-
-def suite():
-    return unittest.makeSuite(testCopier,'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
diff --git a/test/tree.py b/test/tree.py
deleted file mode 100644 (file)
index 4f666cc..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Build a fake directory tree for testing rsync management code.
-"""
-
-import os
-import random
-
-def make_random_string(length=8):
-  """Make a random string, length characters long
-  """
-  symbols = "abcdefhijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
-  name = []
-  for i in xrange(length):
-    name.append(random.choice(symbols))
-  return "".join(name)
-
-def make_file(pathname):
-  """Make a file with some random stuff in it
-  """
-  stream = open(pathname,'w')
-  stream.write(make_random_string(16))
-  stream.close()
-
-def make_tree(root, depth=3, directories=5, files=10):
-  """
-  Make a tree of random directories and files
-
-  depth is how many levels of subdirectories
-  directories is how many directories each subdirectory should have
-  files is how many files to create in each directory
-  """
-  if not os.path.exists(root):
-    os.mkdir(root)
-
-  paths = []
-  # make files
-  for i in range(files):
-    name = make_random_string()
-    paths.append(name)
-    pathname = os.path.join(root, name)
-    make_file(pathname)
-
-  # make subdirectories if we still have some depth to go
-  if depth > 0:
-    for i in range(directories):
-      name = make_random_string()
-      # paths.append(name)
-      pathname = os.path.join(root, name)
-      subpaths = make_tree(pathname, depth-1, directories, files)
-      paths.extend([ os.path.join(name, x) for x in subpaths ])
-
-  return paths
-
-def generate_paths(root):
-  """Make a list of relative paths like generated by make_tree
-  """
-  paths = []
-  for curdir, subdirs, files in os.walk(root):
-    paths.extend([ os.path.join(curdir, f) for f in files ])
-
-  # an inefficient way of getting the correct common prefix
-  # (e.g. root might not have a trailing /)
-  common_root = os.path.commonprefix(paths)
-  common_len = len(common_root)
-  return [ p[common_len:] for p in paths ]
-    
-def compare_tree(root, paths, verbose=False):
-  """Make sure the tree matches our relative list of paths
-  """
-  # what we find when we look
-  experimental_set = set(generate_paths(root))
-  # what we expect
-  theoretical_set = set(paths)
-  # true if the difference of the two sets is the empty set
-  difference = experimental_set - theoretical_set
-  issame = (len(difference) == 0)
-  if verbose and not issame:
-    print difference
-  return issame