Rename trunk from gaworkflow to htsworkflow (and update all of the imports)
authorDiane Trout <diane@caltech.edu>
Wed, 15 Oct 2008 18:59:34 +0000 (18:59 +0000)
committerDiane Trout <diane@caltech.edu>
Wed, 15 Oct 2008 18:59:34 +0000 (18:59 +0000)
Fix the queuecommands test script to deal with the 1 sec delay hack

96 files changed:
gaworkflow/__init__.py [deleted file]
gaworkflow/automation/__init__.py [deleted file]
gaworkflow/automation/copier.py [deleted file]
gaworkflow/automation/runner.py [deleted file]
gaworkflow/automation/spoolwatcher.py [deleted file]
gaworkflow/frontend/__init__.py [deleted file]
gaworkflow/frontend/eland_config/__init__.py [deleted file]
gaworkflow/frontend/eland_config/forms.py [deleted file]
gaworkflow/frontend/eland_config/models.py [deleted file]
gaworkflow/frontend/eland_config/urls.py [deleted file]
gaworkflow/frontend/eland_config/views.py [deleted file]
gaworkflow/frontend/fctracker/__init__.py [deleted file]
gaworkflow/frontend/fctracker/models.py [deleted file]
gaworkflow/frontend/fctracker/views.py [deleted file]
gaworkflow/frontend/manage.py [deleted file]
gaworkflow/frontend/settings.py [deleted file]
gaworkflow/frontend/urls.py [deleted file]
gaworkflow/pipeline/__init__.py [deleted file]
gaworkflow/pipeline/bustard.py [deleted file]
gaworkflow/pipeline/configure_run.py [deleted file]
gaworkflow/pipeline/firecrest.py [deleted file]
gaworkflow/pipeline/genome_mapper.py [deleted file]
gaworkflow/pipeline/gerald.py [deleted file]
gaworkflow/pipeline/recipe_parser.py [deleted file]
gaworkflow/pipeline/retrieve_config.py [deleted file]
gaworkflow/pipeline/run_status.py [deleted file]
gaworkflow/pipeline/runfolder.py [deleted file]
gaworkflow/pipeline/test/test_genome_mapper.py [deleted file]
gaworkflow/pipeline/test/test_runfolder026.py [deleted file]
gaworkflow/pipeline/test/test_runfolder030.py [deleted file]
gaworkflow/util/__init__.py [deleted file]
gaworkflow/util/alphanum.py [deleted file]
gaworkflow/util/ethelp.py [deleted file]
gaworkflow/util/fctracker.py [deleted file]
gaworkflow/util/makebed.py [deleted file]
gaworkflow/util/mount.py [deleted file]
gaworkflow/util/opener.py [deleted file]
gaworkflow/util/queuecommands.py [deleted file]
gaworkflow/util/test/test_ethelp.py [deleted file]
gaworkflow/util/test/test_makebed.py [deleted file]
gaworkflow/util/test/test_queuecommands.py [deleted file]
htsworkflow/__init__.py [new file with mode: 0644]
htsworkflow/automation/__init__.py [new file with mode: 0644]
htsworkflow/automation/copier.py [new file with mode: 0644]
htsworkflow/automation/runner.py [new file with mode: 0644]
htsworkflow/automation/spoolwatcher.py [new file with mode: 0644]
htsworkflow/frontend/__init__.py [new file with mode: 0644]
htsworkflow/frontend/eland_config/__init__.py [new file with mode: 0644]
htsworkflow/frontend/eland_config/forms.py [new file with mode: 0644]
htsworkflow/frontend/eland_config/models.py [new file with mode: 0644]
htsworkflow/frontend/eland_config/urls.py [new file with mode: 0644]
htsworkflow/frontend/eland_config/views.py [new file with mode: 0644]
htsworkflow/frontend/fctracker/__init__.py [new file with mode: 0644]
htsworkflow/frontend/fctracker/models.py [new file with mode: 0644]
htsworkflow/frontend/fctracker/views.py [new file with mode: 0644]
htsworkflow/frontend/manage.py [new file with mode: 0644]
htsworkflow/frontend/settings.py [new file with mode: 0644]
htsworkflow/frontend/urls.py [new file with mode: 0644]
htsworkflow/pipeline/__init__.py [new file with mode: 0644]
htsworkflow/pipeline/bustard.py [new file with mode: 0644]
htsworkflow/pipeline/configure_run.py [new file with mode: 0644]
htsworkflow/pipeline/firecrest.py [new file with mode: 0644]
htsworkflow/pipeline/genome_mapper.py [new file with mode: 0644]
htsworkflow/pipeline/gerald.py [new file with mode: 0644]
htsworkflow/pipeline/recipe_parser.py [new file with mode: 0644]
htsworkflow/pipeline/retrieve_config.py [new file with mode: 0644]
htsworkflow/pipeline/run_status.py [new file with mode: 0644]
htsworkflow/pipeline/runfolder.py [new file with mode: 0644]
htsworkflow/pipeline/test/test_genome_mapper.py [new file with mode: 0644]
htsworkflow/pipeline/test/test_runfolder026.py [new file with mode: 0644]
htsworkflow/pipeline/test/test_runfolder030.py [new file with mode: 0644]
htsworkflow/util/__init__.py [new file with mode: 0644]
htsworkflow/util/alphanum.py [new file with mode: 0644]
htsworkflow/util/ethelp.py [new file with mode: 0644]
htsworkflow/util/fctracker.py [new file with mode: 0644]
htsworkflow/util/makebed.py [new file with mode: 0755]
htsworkflow/util/mount.py [new file with mode: 0644]
htsworkflow/util/opener.py [new file with mode: 0644]
htsworkflow/util/queuecommands.py [new file with mode: 0644]
htsworkflow/util/test/test_ethelp.py [new file with mode: 0644]
htsworkflow/util/test/test_makebed.py [new file with mode: 0644]
htsworkflow/util/test/test_queuecommands.py [new file with mode: 0644]
scripts/configure_pipeline
scripts/copier
scripts/elandseq
scripts/gerald2bed.py
scripts/library.py
scripts/makebed
scripts/rerun_eland.py
scripts/retrieve_config
scripts/runfolder
scripts/runner
scripts/spoolwatcher
scripts/srf
setup.py
test/test_copier.py

diff --git a/gaworkflow/__init__.py b/gaworkflow/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/automation/__init__.py b/gaworkflow/automation/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/automation/copier.py b/gaworkflow/automation/copier.py
deleted file mode 100644 (file)
index 2e0d3ae..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-import ConfigParser
-import copy
-import logging
-import logging.handlers
-import os
-import re
-import subprocess
-import sys
-import time
-import traceback
-
-from benderjab import rpc
-
-def runfolder_validate(fname):
-    """
-    Return True if fname looks like a runfolder name
-    """
-    if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
-        return True
-    else:
-        return False
-    
-class rsync(object):
-  def __init__(self, source, dest, pwfile):
-    self.pwfile = os.path.expanduser(pwfile)
-    self.cmd = ['/usr/bin/rsync', ]
-    self.cmd.append('--password-file=%s' % (self.pwfile))
-    self.source_base = source
-    self.dest_base = dest
-    self.processes = {}
-    self.exit_code = None
-
-  def list(self):
-    """Get a directory listing"""
-    args = copy.copy(self.cmd)
-    args.append(self.source_base)
-
-    logging.debug("Rsync cmd:" + " ".join(args))
-    short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
-    return self.list_filter(short_process.stdout)
-
-  def list_filter(self, lines):
-    """
-    parse rsync directory listing
-    """
-    dirs_to_copy = []
-    direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
-    for permissions, size, filedate, filetime, filename in direntries:
-      if permissions[0] == 'd':
-        # hey its a directory, the first step to being something we want to 
-        # copy
-        if re.match("[0-9]{6}", filename):
-          # it starts with something that looks like a 6 digit date
-          # aka good enough for me
-          dirs_to_copy.append(filename)
-    return dirs_to_copy
-
-  def create_copy_process(self, dirname):
-    args = copy.copy(self.cmd)
-    # we want to copy everything
-    args.append('-rlt') 
-    # from here
-    args.append(os.path.join(self.source_base, dirname))
-    # to here
-    args.append(self.dest_base)
-    logging.debug("Rsync cmd:" + " ".join(args))
-    return subprocess.Popen(args)
-  def copy(self):
-    """
-    copy any interesting looking directories over
-    return list of items that we started copying.
-    """
-    # clean up any lingering non-running processes
-    self.poll()
-    
-    # what's available to copy?
-    dirs_to_copy = self.list()
-    
-    # lets start copying
-    started = []
-    for d in dirs_to_copy:
-      process = self.processes.get(d, None)
-      
-      if process is None:
-        # we don't have a process, so make one
-        logging.info("rsyncing %s" % (d))
-        self.processes[d] = self.create_copy_process(d)
-        started.append(d)           
-    return started
-      
-  def poll(self):
-      """
-      check currently running processes to see if they're done
-      
-      return path roots that have finished.
-      """
-      for dir_key, proc_value in self.processes.items():
-          retcode = proc_value.poll()
-          if retcode is None:
-              # process hasn't finished yet
-              pass
-          elif retcode == 0:
-              logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
-              del self.processes[dir_key]
-          else:
-              logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
-              
-  def __len__(self):
-      """
-      Return how many active rsync processes we currently have
-      
-      Call poll first to close finished processes.
-      """
-      return len(self.processes)
-  
-  def keys(self):
-      """
-      Return list of current run folder names
-      """
-      return self.processes.keys()
-
-class CopierBot(rpc.XmlRpcBot):
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    configfile = '~/.gaworkflow'
-            
-        super(CopierBot, self).__init__(section, configfile)
-        
-        # options for rsync command
-        self.cfg['rsync_password_file'] = None
-        self.cfg['rsync_source'] = None
-        self.cfg['rsync_destination'] = None 
-        
-        # options for reporting we're done 
-        self.cfg['notify_users'] = None
-        self.cfg['notify_runner'] = None
-                            
-        self.pending = []
-        self.rsync = None
-        self.notify_users = None
-        self.notify_runner = None
-        
-        self.register_function(self.startCopy)
-        self.register_function(self.sequencingFinished)
-        self.eventTasks.append(self.update)
-        
-    def read_config(self, section=None, configfile=None):
-        """
-        read the config file
-        """
-        super(CopierBot, self).read_config(section, configfile)
-        
-        password = self._check_required_option('rsync_password_file')
-        source = self._check_required_option('rsync_source')
-        destination = self._check_required_option('rsync_destination')
-        self.rsync = rsync(source, destination, password)
-        
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        try:
-          self.notify_runner = \
-             self._parse_user_list(self.cfg['notify_runner'],
-                                   require_resource=True)
-        except bot.JIDMissingResource:
-            msg = 'need a full jabber ID + resource for xml-rpc destinations'
-            logging.FATAL(msg)
-            raise bot.JIDMissingResource(msg)
-
-    def startCopy(self, *args):
-        """
-        start our copy
-        """
-        logging.info("starting copy scan")
-        started = self.rsync.copy()
-        logging.info("copying:" + " ".join(started)+".")
-        return started
-        
-    def sequencingFinished(self, runDir, *args):
-        """
-        The run was finished, if we're done copying, pass the message on        
-        """
-        # close any open processes
-        self.rsync.poll()
-        
-        # see if we're still copying
-        if runfolder_validate(runDir):
-            logging.info("recevied sequencing finshed for %s" % (runDir))
-            self.pending.append(runDir)
-            self.startCopy()
-            return "PENDING"
-        else:
-            errmsg = "received bad runfolder name (%s)" % (runDir)
-            logging.warning(errmsg)
-            # maybe I should use a different error message
-            raise RuntimeError(errmsg)
-    
-    def reportSequencingFinished(self, runDir):
-        """
-        Send the sequencingFinished message to the interested parties
-        """
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Sequencing run %s finished' % (runDir))
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, (runDir,), 'sequencingFinished')
-        logging.info("forwarding sequencingFinshed message for %s" % (runDir))
-        
-    def update(self, *args):
-        """
-        Update our current status.
-        Report if we've finished copying files.
-        """
-        self.rsync.poll()
-        for p in self.pending:
-            if p not in self.rsync.keys():
-                self.reportSequencingFinished(p)
-                self.pending.remove(p)
-        
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can [copy], or report current [status]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("copy", msg):            
-            started = self.startCopy()
-            reply = u"started copying " + ", ".join(started)
-        elif re.match(u"status", msg):
-            msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
-            for d in self.rsync.keys():
-              msg.append(u"  " + d)
-            reply = os.linesep.join(msg)
-        else:
-            reply = u"I didn't understand '%s'" % (unicode(msg))
-        return reply
-
-def main(args=None):
-    bot = CopierBot()
-    bot.main(args)
-    
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
-
diff --git a/gaworkflow/automation/runner.py b/gaworkflow/automation/runner.py
deleted file mode 100644 (file)
index a5f6843..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-#!/usr/bin/env python
-from glob import glob
-import logging
-import os
-import re
-import sys
-import time
-import threading
-
-from benderjab import rpc
-
-from gaworkflow.pipeline.configure_run import *
-
-#s_fc = re.compile('FC[0-9]+')
-s_fc = re.compile('_[0-9a-zA-Z]*$')
-
-
-def _get_flowcell_from_rundir(run_dir):
-    """
-    Returns flowcell string based on run_dir.
-    Returns None and logs error if flowcell can't be found.
-    """
-    junk, dirname = os.path.split(run_dir)
-    mo = s_fc.search(dirname)
-    if not mo:
-        logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
-        return None
-
-    return dirname[mo.start()+1:]
-    
-
-
-class Runner(rpc.XmlRpcBot):
-    """
-    Manage running pipeline jobs.
-    """    
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    self.configfile = "~/.gaworkflow"
-        super(Runner, self).__init__(section, configfile)
-        
-        self.cfg['notify_users'] = None
-        self.cfg['genome_dir'] = None
-        self.cfg['base_analysis_dir'] = None
-
-        self.cfg['notify_users'] = None
-        self.cfg['notify_postanalysis'] = None
-
-        self.conf_info_dict = {}
-        
-        self.register_function(self.sequencingFinished)
-        #self.eventTasks.append(self.update)
-
-    
-    def read_config(self, section=None, configfile=None):
-        super(Runner, self).read_config(section, configfile)
-
-        self.genome_dir = self._check_required_option('genome_dir')
-        self.base_analysis_dir = self._check_required_option('base_analysis_dir')
-
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        #FIXME: process notify_postpipeline cfg
-        
-    
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can send [start] a run, or report [status]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("status", msg):
-            words = msg.split()
-            if len(words) == 2:
-                reply = self.getStatusReport(words[1])
-            else:
-                reply = u"Status available for: %s" \
-                        % (', '.join([k for k in self.conf_info_dict.keys()]))
-        elif re.match(u"start", msg):
-            words = msg.split()
-            if len(words) == 2:
-                self.sequencingFinished(words[1])
-                reply = u"starting run for %s" % (words[1])
-            else:
-                reply = u"need runfolder name"
-        else:
-            reply = u"I didn't understand '%s'" %(msg)
-
-        logging.debug("reply: " + str(reply))
-        return reply
-
-
-    def getStatusReport(self, fc_num):
-        """
-        Returns text status report for flow cell number 
-        """
-        if fc_num not in self.conf_info_dict:
-            return "No record of a %s run." % (fc_num)
-
-        status = self.conf_info_dict[fc_num].status
-
-        if status is None:
-            return "No status information for %s yet." \
-                   " Probably still in configure step. Try again later." % (fc_num)
-
-        output = status.statusReport()
-
-        return '\n'.join(output)
-    
-            
-    def sequencingFinished(self, run_dir):
-        """
-        Sequenceing (and copying) is finished, time to start pipeline
-        """
-        logging.debug("received sequencing finished message")
-
-        # Setup config info object
-        ci = ConfigInfo()
-        ci.base_analysis_dir = self.base_analysis_dir
-        ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)        
-
-        # get flowcell from run_dir name
-        flowcell = _get_flowcell_from_rundir(run_dir)
-
-        # Store ci object in dictionary
-        self.conf_info_dict[flowcell] = ci
-
-
-        # Launch the job in it's own thread and turn.
-        self.launchJob(run_dir, flowcell, ci)
-        return "started"
-        
-        
-    def pipelineFinished(self, run_dir):
-        # need to strip off self.watch_dir from rundir I suspect.
-        logging.info("pipeline finished in" + str(run_dir))
-        #pattern = self.watch_dir
-        #if pattern[-1] != os.path.sep:
-        #    pattern += os.path.sep
-        #stripped_run_dir = re.sub(pattern, "", run_dir)
-        #logging.debug("stripped to " + stripped_run_dir)
-
-        # Notify each user that the run has finished.
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Pipeline run %s finished' % (run_dir))
-                
-        #if self.notify_runner is not None:
-        #    for r in self.notify_runner:
-        #        self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
-    def reportMsg(self, msg):
-
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, msg)
-
-
-    def _runner(self, run_dir, flowcell, conf_info):
-
-        # retrieve config step
-        cfg_filepath = os.path.join(conf_info.analysis_dir,
-                                    'config32auto.txt')
-        status_retrieve_cfg = retrieve_config(conf_info,
-                                          flowcell,
-                                          cfg_filepath,
-                                          self.genome_dir)
-        if status_retrieve_cfg:
-            logging.info("Runner: Retrieve config: success")
-            self.reportMsg("Retrieve config (%s): success" % (run_dir))
-        else:
-            logging.error("Runner: Retrieve config: failed")
-            self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
-
-        
-        # configure step
-        if status_retrieve_cfg:
-            status = configure(conf_info)
-            if status:
-                logging.info("Runner: Configure: success")
-                self.reportMsg("Configure (%s): success" % (run_dir))
-                self.reportMsg(
-                    os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
-                )
-            else:
-                logging.error("Runner: Configure: failed")
-                self.reportMsg("Configure (%s): FAILED" % (run_dir))
-
-            #if successful, continue
-            if status:
-                # Setup status cmdline status monitor
-                #startCmdLineStatusMonitor(ci)
-                
-                # running step
-                print 'Running pipeline now!'
-                run_status = run_pipeline(conf_info)
-                if run_status is True:
-                    logging.info('Runner: Pipeline: success')
-                    self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
-                else:
-                    logging.info('Runner: Pipeline: failed')
-                    self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
-
-
-    def launchJob(self, run_dir, flowcell, conf_info):
-        """
-        Starts up a thread for running the pipeline
-        """
-        t = threading.Thread(target=self._runner,
-                        args=[run_dir, flowcell, conf_info])
-        t.setDaemon(True)
-        t.start()
-        
-
-        
-def main(args=None):
-    bot = Runner()
-    return bot.main(args)
-    
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-    
diff --git a/gaworkflow/automation/spoolwatcher.py b/gaworkflow/automation/spoolwatcher.py
deleted file mode 100644 (file)
index 56ad42f..0000000
+++ /dev/null
@@ -1,229 +0,0 @@
-#!/usr/bin/env python
-import logging
-import os
-import re
-import sys
-import time
-#import glob
-
-from gaworkflow.util import mount
-
-# this uses pyinotify
-import pyinotify
-from pyinotify import EventsCodes
-
-from benderjab import rpc
-
-
-class WatcherEvents(object):
-    # two events need to be tracked
-    # one to send startCopy
-    # one to send OMG its broken
-    # OMG its broken needs to stop when we've seen enough
-    #  cycles
-    # this should be per runfolder. 
-    # read the xml files 
-    def __init__(self):
-        pass
-        
-
-class Handler(pyinotify.ProcessEvent):
-    def __init__(self, watchmanager, bot):
-        self.last_event_time = None
-        self.watchmanager = watchmanager
-        self.bot = bot
-
-    def process_IN_CREATE(self, event):
-        self.last_event_time = time.time()
-        msg = "Create: %s" %  os.path.join(event.path, event.name)
-        if event.name.lower() == "run.completed":
-            try:
-                self.bot.sequencingFinished(event.path)
-            except IOError, e:
-                logging.error("Couldn't send sequencingFinished")
-        logging.debug(msg)
-
-    def process_IN_DELETE(self, event):
-        logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
-
-    def process_IN_UNMOUNT(self, event):
-        pathname = os.path.join(event.path, event.name)
-        logging.debug("IN_UNMOUNT: %s" % (pathname,))
-        self.bot.unmount_watch()
-
-class SpoolWatcher(rpc.XmlRpcBot):
-    """
-    Watch a directory and send a message when another process is done writing.
-    
-    This monitors a directory tree using inotify (linux specific) and
-    after some files having been written will send a message after <timeout>
-    seconds of no file writing.
-    
-    (Basically when the solexa machine finishes dumping a round of data
-    this'll hopefully send out a message saying hey look theres data available
-    
-    """
-    # these params need to be in the config file
-    # I wonder where I should put the documentation
-    #:Parameters:
-    #    `watchdir` - which directory tree to monitor for modifications
-    #    `profile` - specify which .gaworkflow profile to use
-    #    `write_timeout` - how many seconds to wait for writes to finish to
-    #                      the spool
-    #    `notify_timeout` - how often to timeout from notify
-    
-    def __init__(self, section=None, configfile=None):
-        #if configfile is None:
-        #    self.configfile = "~/.gaworkflow"
-        super(SpoolWatcher, self).__init__(section, configfile)
-        
-        self.cfg['watchdir'] = None
-        self.cfg['write_timeout'] = 10
-        self.cfg['notify_users'] = None
-        self.cfg['notify_runner'] = None
-        
-        self.notify_timeout = 0.001
-        self.wm = pyinotify.WatchManager()
-        self.handler = Handler(self.wm, self)
-        self.notifier = pyinotify.Notifier(self.wm, self.handler)
-        self.wdd = None
-        self.mount_point = None
-        self.mounted = True
-        
-        self.notify_users = None
-        self.notify_runner = None
-        
-        self.eventTasks.append(self.process_notify)
-
-    def read_config(self, section=None, configfile=None):
-        super(SpoolWatcher, self).read_config(section, configfile)
-        
-        self.watch_dir = self._check_required_option('watchdir')
-        self.write_timeout = int(self.cfg['write_timeout'])
-        
-        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
-        try:
-          self.notify_runner = \
-             self._parse_user_list(self.cfg['notify_runner'],
-                                   require_resource=True)
-        except bot.JIDMissingResource:
-            msg = 'need a full jabber ID + resource for xml-rpc destinations'
-            logging.FATAL(msg)
-            raise bot.JIDMissingResource(msg)
-
-    def add_watch(self, watchdir=None):
-        """
-        start watching watchdir or self.watch_dir
-        we're currently limited to watching one directory tree.
-        """
-        # the one tree limit is mostly because self.wdd is a single item
-        # but managing it as a list might be a bit more annoying
-        if watchdir is None:
-            watchdir = self.watch_dir
-        logging.info("Watching:"+str(watchdir))
-
-        self.mount_point = mount.find_mount_point_for(watchdir)
-
-        mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
-        # rec traverses the tree and adds all the directories that are there
-        # at the start.
-        # auto_add will add in new directories as they are created
-        self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
-
-    def unmount_watch(self):
-        if self.wdd is not None:
-            self.wm.rm_watch(self.wdd.values())
-            self.wdd = None
-            self.mounted = False
-            
-    def process_notify(self, *args):
-        # process the queue of events as explained above
-        self.notifier.process_events()
-        #check events waits timeout
-        if self.notifier.check_events(self.notify_timeout):
-            # read notified events and enqeue them
-            self.notifier.read_events()
-            # should we do something?
-        # has something happened?
-        last_event_time = self.handler.last_event_time
-        if last_event_time is not None:
-            time_delta = time.time() - last_event_time
-            if time_delta > self.write_timeout:
-                self.startCopy()
-                self.handler.last_event_time = None
-        # handle unmounted filesystems
-        if not self.mounted:
-            if mount.is_mounted(self.mount_point):
-                # we've been remounted. Huzzah!
-                # restart the watch
-                self.add_watch()
-                self.mounted = True
-                logging.info(
-                    "%s was remounted, restarting watch" % \
-                        (self.mount_point)
-                )
-
-    def _parser(self, msg, who):
-        """
-        Parse xmpp chat messages
-        """
-        help = u"I can send [copy] message, or squencer [finished]"
-        if re.match(u"help", msg):
-            reply = help
-        elif re.match("copy", msg):            
-            self.startCopy()
-            reply = u"sent copy message"
-        elif re.match(u"finished", msg):
-            words = msg.split()
-            if len(words) == 2:
-                self.sequencingFinished(words[1])
-                reply = u"sending sequencing finished for %s" % (words[1])
-            else:
-                reply = u"need runfolder name"
-        else:
-            reply = u"I didn't understand '%s'" %(msg)            
-        return reply
-        
-    def start(self, daemonize):
-        """
-        Start application
-        """
-        self.add_watch()
-        super(SpoolWatcher, self).start(daemonize)
-        
-    def stop(self):
-        """
-        shutdown application
-        """
-        # destroy the inotify's instance on this interrupt (stop monitoring)
-        self.notifier.stop()
-        super(SpoolWatcher, self).stop()
-    
-    def startCopy(self):
-        logging.debug("writes seem to have stopped")
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, tuple(), 'startCopy')
-        
-    def sequencingFinished(self, run_dir):
-        # need to strip off self.watch_dir from rundir I suspect.
-        logging.info("run.completed in " + str(run_dir))
-        pattern = self.watch_dir
-        if pattern[-1] != os.path.sep:
-            pattern += os.path.sep
-        stripped_run_dir = re.sub(pattern, "", run_dir)
-        logging.debug("stripped to " + stripped_run_dir)
-        if self.notify_users is not None:
-            for u in self.notify_users:
-                self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
-        if self.notify_runner is not None:
-            for r in self.notify_runner:
-                self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-        
-def main(args=None):
-    bot = SpoolWatcher()
-    return bot.main(args)
-    
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
-
diff --git a/gaworkflow/frontend/__init__.py b/gaworkflow/frontend/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/frontend/eland_config/__init__.py b/gaworkflow/frontend/eland_config/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/frontend/eland_config/forms.py b/gaworkflow/frontend/eland_config/forms.py
deleted file mode 100644 (file)
index 2488359..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-from django import newforms as forms
-from django.newforms.util import ErrorList
-
-
-SPECIES_LIST = [#('--choose--', '--Choose--'),
-                ('hg18', 'Homo sapiens (Hg18)'),
-                ('Mm8', 'Mus musculus (Mm8)'),
-                ('arabv6', 'Arabadopsis Thaliana v6'),
-                ('other', 'Other species (Include in description)')]
-
-
-class DivErrorList(ErrorList):
-  def __unicode__(self):
-    return self.as_divs()
-  
-  def as_divs(self):
-    if not self: return u''
-    return u'<div class="errorlist">%s</div>' % (''.join([u'<div class="error">%s</div>' % e for e in self]))
-
-
-
-class ConfigForm(forms.Form):
-  
-  flow_cell_number = forms.CharField(min_length=2)
-  run_date = forms.DateTimeField()
-  advanced_run = forms.BooleanField(required=False)
-  read_length = forms.IntegerField(min_value=1, initial=32)
-  #eland_repeat = forms.BooleanField()
-  
-  #needs a for loop or something to allow for n configurations
-  #analysis_type = forms.ChoiceField(choices=[('eland','eland')])
-  lane1_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane1_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane2_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane2_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane3_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane3_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane4_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane4_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane5_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane5_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane6_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane6_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane7_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane7_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  lane8_species = forms.ChoiceField(choices=SPECIES_LIST)
-  lane8_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-  
-  notes = forms.CharField(widget=forms.Textarea(attrs={'cols':'70'}), required=False)
-  
-  #lane_specific_read_length = forms.IntegerField(min_value=1)
-  
-  #eland_genome_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
-  #                                              ('lane2','2'),
-  #                                              ('lane3','3'),
-  #                                              ('lane4','4'),
-  #                                              ('lane5','5'),
-  #                                              ('lane6','6'),
-  #                                              ('lane7','7'),
-  #                                              ('lane8','8') ])
-  
-  #eland_genome = forms.ChoiceField(choices=)
-  
-  #use_bases_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
-  #                                              ('lane2','2'),
-  #                                              ('lane3','3'),
-  #                                              ('lane4','4'),
-  #                                              ('lane5','5'),
-  #                                              ('lane6','6'),
-  #                                              ('lane7','7'),
-  #                                              ('lane8','8') ])
-  
-  #use_bases_mask = forms.CharField()
-  
-  #sequence_format = forms.ChoiceField(choices=[('scarf', 'scarf')])
-  
-  
-  
-  #subject = forms.CharField(max_length=100)
-  #message = forms.CharField()
-  #sender = forms.EmailField()
-  #cc_myself = forms.BooleanField()
-  
-  def as_custom(self):
-    """
-    Displays customized html output
-    """
-    html = []
-    
-    fcn = self['flow_cell_number']
-    
-    html.append(fcn.label_tag() + ': ' + str(fcn) + str(fcn.errors) + '<br />')
-    
-    run_date = self['run_date']
-    html.append(run_date.label_tag() + ': ' + str(run_date) + str(run_date.errors) + '<br />')
-    
-    arun = self['advanced_run']
-    html.append(arun.label_tag() + ': ' + str(arun) + str(arun.errors) + '<br />')
-    
-    rl = self['read_length']
-    html.append(rl.label_tag() + ': ' + str(rl) + str(rl.errors) + '<br /><br />')
-    
-    html.append('<table border="0">')
-    html.append(' <tr><td>%s</td><td>%s</td><td>%s</td></tr>' \
-                % ('Lane', 'Species', 'Description'))
-    
-    l1s = self['lane1_species']
-    l1d = self['lane1_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('1', str(l1s), str(l1s.errors), str(l1d), str(l1d.errors)))
-    
-    l2s = self['lane2_species']
-    l2d = self['lane2_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('2', str(l2s), str(l2s.errors), str(l2d), str(l2d.errors)))
-    
-    l3s = self['lane3_species']
-    l3d = self['lane3_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('3', str(l3s), str(l3s.errors), str(l3d), str(l3d.errors)))
-    
-    l4s = self['lane4_species']
-    l4d = self['lane4_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('4', str(l4s), str(l4s.errors), str(l4d), str(l4d.errors)))
-    
-    l5s = self['lane5_species']
-    l5d = self['lane5_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('5', str(l5s), str(l5s.errors), str(l5d), str(l5d.errors)))
-    
-    l6s = self['lane6_species']
-    l6d = self['lane6_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('6', str(l6s), str(l6s.errors), str(l6d), str(l6d.errors)))
-    
-    l7s = self['lane7_species']
-    l7d = self['lane7_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('7', str(l7s), str(l7s.errors), str(l7d), str(l7d.errors)))
-    
-    l8s = self['lane8_species']
-    l8d = self['lane8_description']
-    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
-                % ('8', str(l8s), str(l8s.errors), str(l8d), str(l8d.errors)))
-    
-    html.append('</table><br />')
-    
-    notes = self['notes']
-    html.append('<p>Notes:</p>')
-    html.append(' %s<br />' % (str(notes)))
-    
-    return '\n'.join(html)
-    
-    
-    
\ No newline at end of file
diff --git a/gaworkflow/frontend/eland_config/models.py b/gaworkflow/frontend/eland_config/models.py
deleted file mode 100644 (file)
index 71a8362..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.db import models
-
-# Create your models here.
diff --git a/gaworkflow/frontend/eland_config/urls.py b/gaworkflow/frontend/eland_config/urls.py
deleted file mode 100644 (file)
index 2eeee70..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-from django.conf.urls.defaults import *
-
-urlpatterns = patterns('',
-    # Example:
-    
-    (r'^(?P<flowcell>\w+)/$', 'gaworkflow.frontend.eland_config.views.config'),
-    (r'^$', 'gaworkflow.frontend.eland_config.views.config'),
-    #(r'^$', 'gaworkflow.frontend.eland_config.views.index')
-
-)
diff --git a/gaworkflow/frontend/eland_config/views.py b/gaworkflow/frontend/eland_config/views.py
deleted file mode 100644 (file)
index f23f7ae..0000000
+++ /dev/null
@@ -1,413 +0,0 @@
-from django.http import HttpResponse
-from django.shortcuts import render_to_response
-from django.core.exceptions import ObjectDoesNotExist
-
-from gaworkflow.frontend.eland_config import forms
-from gaworkflow.frontend import settings
-from gaworkflow.frontend.fctracker import models
-
-import os
-import glob
-# Create your views here.
-
-
-def _validate_input(data):
-  #if data.find('..') == -1 or data.find('/') == -1 or data.find('\\') == -1:
-  return data.replace('..', '').replace('/', '_').replace('\\', '_')
-
-#def contact(request):
-#    if request.method == 'POST':
-#        form = ContactForm(request.POST)
-#        if form.is_valid():
-#            # Do form processing here...
-#            return HttpResponseRedirect('/url/on_success/')
-#    else:
-#        form = ContactForm()
-#    return
-
-
-
-#def _saveConfigFile(form):
-#  """
-#  Given a valid form, save eland config to file based on flowcell number.
-#  """
-#  assert form.is_valid()
-#  
-#  clean_data = form.cleaned_data
-#  flowcell = clean_data['flow_cell_number'].replace('/','_').replace('..', '__')
-#  
-#  file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
-#  
-#  f = open(file_path, 'w')
-#  cfg = generateElandConfig(form)
-#  f.write(cfg)
-#  f.close()
-#  
-#
-#def _saveToDb(form):
-#  """
-#  Save info to the database.
-#  """
-#  clean_data = form.cleaned_data
-#  
-#  fc_id = clean_data['flow_cell_number']
-#  
-#  try:
-#    fc = models.FlowCell.objects.get(flowcell_id=fc_id)
-#  except models.FlowCell.DoesNotExist:
-#    fc = models.FlowCell()
-#    
-#  fc.flowcell_id = fc_id
-#  fc.run_date = clean_data['run_date']
-#  
-#  #LANE 1
-#  fc.lane1_sample = clean_data['lane1_description']
-#  species_name = clean_data['lane1_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane1_species = specie
-#  
-#  #LANE 2
-#  fc.lane2_sample = clean_data['lane2_description']
-#  species_name = clean_data['lane2_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane2_species = specie
-#  
-#  #LANE 3
-#  fc.lane3_sample = clean_data['lane3_description']
-#  species_name = clean_data['lane3_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane3_species = specie
-#  
-#  #LANE 4
-#  fc.lane4_sample = clean_data['lane4_description']
-#  species_name = clean_data['lane4_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane4_species = specie
-#  
-#  #LANE 5
-#  fc.lane5_sample = clean_data['lane5_description']
-#  species_name = clean_data['lane5_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane5_species = specie
-#  
-#  #LANE 6
-#  fc.lane6_sample = clean_data['lane6_description']
-#  species_name = clean_data['lane6_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane6_species = specie
-#  
-#  #LANE 7
-#  fc.lane7_sample = clean_data['lane7_description']
-#  species_name = clean_data['lane7_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane7_species = specie
-#  
-#  #LANE 8
-#  fc.lane8_sample = clean_data['lane8_description']
-#  species_name = clean_data['lane8_species']
-#  try:
-#    specie = models.Specie.objects.get(scientific_name=species_name)
-#  except models.Specie.DoesNotExist:
-#    specie = models.Specie(scientific_name=species_name)
-#    specie.save()
-#  fc.lane8_species = specie
-#  
-#  fc.notes = clean_data['notes']
-#  
-#  fc.save()
-#  
-#  return fc
-#  
-#
-#def generateElandConfig(form):
-#  data = []
-#  
-#  form = form.cleaned_data
-#  
-#  BASE_DIR = '/data-store01/compbio/genomes'
-#  
-#  data.append("# FLOWCELL: %s" % (form['flow_cell_number']))
-#  data.append("#")
-#  
-#  notes = form['notes'].replace('\r\n', '\n').replace('\r', '\n')
-#  notes = notes.replace('\n', '\n#  ')
-#  data.append("# NOTES:")
-#  data.append("#  %s\n#" % (notes))
-#  
-#  #Convert all newline conventions to unix style
-#  l1d = form['lane1_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l2d = form['lane2_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l3d = form['lane3_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l4d = form['lane4_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l5d = form['lane5_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l6d = form['lane6_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l7d = form['lane7_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  l8d = form['lane8_description'].replace('\r\n', '\n').replace('\r', '\n')
-#  
-#  # Turn new lines into indented commented newlines
-#  l1d = l1d.replace('\n', '\n#  ')
-#  l2d = l2d.replace('\n', '\n#  ')
-#  l3d = l3d.replace('\n', '\n#  ')
-#  l4d = l4d.replace('\n', '\n#  ')
-#  l5d = l5d.replace('\n', '\n#  ')
-#  l6d = l6d.replace('\n', '\n#  ')
-#  l7d = l7d.replace('\n', '\n#  ')
-#  l8d = l8d.replace('\n', '\n#  ')
-#  
-#  data.append("# Lane1: %s" % (l1d))
-#  data.append("# Lane2: %s" % (l2d))
-#  data.append("# Lane3: %s" % (l3d))
-#  data.append("# Lane4: %s" % (l4d))
-#  data.append("# Lane5: %s" % (l5d))
-#  data.append("# Lane6: %s" % (l6d))
-#  data.append("# Lane7: %s" % (l7d))
-#  data.append("# Lane8: %s" % (l8d))
-#  
-#  #data.append("GENOME_DIR %s" % (BASE_DIR))
-#  #data.append("CONTAM_DIR %s" % (BASE_DIR))
-#  read_length = form['read_length']
-#  data.append("READ_LENGTH %d" % (read_length))
-#  #data.append("ELAND_REPEAT")
-#  data.append("ELAND_MULTIPLE_INSTANCES 8")
-#  
-#  #Construct genome dictionary to figure out what lanes to put
-#  # in the config file.
-#  genome_dict = {}
-#  l1s = form['lane1_species']
-#  genome_dict.setdefault(l1s, []).append('1')
-#  l2s = form['lane2_species']
-#  genome_dict.setdefault(l2s, []).append('2')
-#  l3s = form['lane3_species']
-#  genome_dict.setdefault(l3s, []).append('3')
-#  l4s = form['lane4_species']
-#  genome_dict.setdefault(l4s, []).append('4')
-#  l5s = form['lane5_species']
-#  genome_dict.setdefault(l5s, []).append('5')
-#  l6s = form['lane6_species']
-#  genome_dict.setdefault(l6s, []).append('6')
-#  l7s = form['lane7_species']
-#  genome_dict.setdefault(l7s, []).append('7')
-#  l8s = form['lane8_species']
-#  genome_dict.setdefault(l8s, []).append('8')
-#  
-#  genome_list = genome_dict.keys()
-#  genome_list.sort()
-#  
-#  #Loop through and create entries for each species.
-#  for genome in genome_list:
-#    lanes = ''.join(genome_dict[genome])
-#    data.append('%s:ANALYSIS eland' % (lanes))
-#    data.append('%s:READ_LENGTH %s' % (lanes, read_length))
-#    data.append('%s:ELAND_GENOME %s' % (lanes, os.path.join(BASE_DIR, genome)))
-#    data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
-#    
-#  data.append('SEQUENCE_FORMAT --scarf')
-#  
-#  return '\n'.join(data)
-
-
-def getElandConfig(flowcell, regenerate=False):
-  
-  file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
-  
-  #If we are regenerating the config file, skip
-  # reading of existing file. If the file doesn't
-  # exist, try to generate it form the DB.
-  if not regenerate and os.path.isfile(file_path):
-    f = open(file_path, 'r')
-    data = f.read()
-    f.close()
-    return data
-  
-  try:
-    fcObj = models.FlowCell.objects.get(flowcell_id__iexact=flowcell)
-  except ObjectDoesNotExist:
-    return None
-  
-  data = []
-  
-  #form = form.cleaned_data
-  
-  BASE_DIR = '/data-store01/compbio/genomes'
-  
-  data.append("# FLOWCELL: %s" % (fcObj.flowcell_id))
-  data.append("#")
-  
-  notes = fcObj.notes.replace('\r\n', '\n').replace('\r', '\n')
-  notes = notes.replace('\n', '\n#  ')
-  data.append("# NOTES:")
-  data.append("#  %s\n#" % (notes))
-  
-  #Convert all newline conventions to unix style
-  l1d = str(fcObj.lane_1_library.library_id) + '|' \
-          + fcObj.lane_1_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l2d = str(fcObj.lane_2_library.library_id) + '|' \
-          + fcObj.lane_2_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l3d = str(fcObj.lane_3_library.library_id) + '|' \
-          + fcObj.lane_3_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l4d = str(fcObj.lane_4_library.library_id) + '|' \
-          + fcObj.lane_4_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  
-  l5d = str(fcObj.lane_5_library.library_id) + '|' \
-          + fcObj.lane_5_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l6d = str(fcObj.lane_6_library.library_id) + '|' \
-          + fcObj.lane_6_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l7d = str(fcObj.lane_7_library.library_id) + '|' \
-          + fcObj.lane_7_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  l8d = str(fcObj.lane_8_library.library_id) + '|' \
-          + fcObj.lane_8_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-  
-  # Turn new lines into indented commented newlines
-  l1d = l1d.replace('\n', '\n#  ')
-  l2d = l2d.replace('\n', '\n#  ')
-  l3d = l3d.replace('\n', '\n#  ')
-  l4d = l4d.replace('\n', '\n#  ')
-  l5d = l5d.replace('\n', '\n#  ')
-  l6d = l6d.replace('\n', '\n#  ')
-  l7d = l7d.replace('\n', '\n#  ')
-  l8d = l8d.replace('\n', '\n#  ')
-  
-  data.append("# Lane1: %s" % (l1d))
-  data.append("# Lane2: %s" % (l2d))
-  data.append("# Lane3: %s" % (l3d))
-  data.append("# Lane4: %s" % (l4d))
-  data.append("# Lane5: %s" % (l5d))
-  data.append("# Lane6: %s" % (l6d))
-  data.append("# Lane7: %s" % (l7d))
-  data.append("# Lane8: %s" % (l8d))
-  
-  #data.append("GENOME_DIR %s" % (BASE_DIR))
-  #data.append("CONTAM_DIR %s" % (BASE_DIR))
-  read_length = fcObj.read_length
-  data.append("READ_LENGTH %d" % (read_length))
-  #data.append("ELAND_REPEAT")
-  data.append("ELAND_MULTIPLE_INSTANCES 8")
-  
-  #Construct genome dictionary to figure out what lanes to put
-  # in the config file.
-  genome_dict = {}
-  
-  #l1s = form['lane1_species']
-  l1s = fcObj.lane_1_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_1_library.library_species.use_genome_build
-  genome_dict.setdefault(l1s, []).append('1')
-  l2s = fcObj.lane_2_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_2_library.library_species.use_genome_build
-  genome_dict.setdefault(l2s, []).append('2')
-  l3s = fcObj.lane_3_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_3_library.library_species.use_genome_build
-  genome_dict.setdefault(l3s, []).append('3')
-  l4s = fcObj.lane_4_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_4_library.library_species.use_genome_build
-  genome_dict.setdefault(l4s, []).append('4')
-  l5s = fcObj.lane_5_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_5_library.library_species.use_genome_build
-  genome_dict.setdefault(l5s, []).append('5')
-  l6s = fcObj.lane_6_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_6_library.library_species.use_genome_build
-  genome_dict.setdefault(l6s, []).append('6')
-  l7s = fcObj.lane_7_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_7_library.library_species.use_genome_build
-  genome_dict.setdefault(l7s, []).append('7')
-  l8s = fcObj.lane_8_library.library_species.scientific_name #+ '|' + \
-        #fcObj.lane_8_library.library_species.use_genome_build
-  genome_dict.setdefault(l8s, []).append('8')
-  
-  genome_list = genome_dict.keys()
-  genome_list.sort()
-  
-  #Loop through and create entries for each species.
-  for genome in genome_list:
-    lanes = ''.join(genome_dict[genome])
-    data.append('%s:ANALYSIS eland' % (lanes))
-    data.append('%s:READ_LENGTH %s' % (lanes, read_length))
-    data.append('%s:ELAND_GENOME %s' % (lanes, '%%(%s)s' % (genome)))
-    data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
-    
-  data.append('SEQUENCE_FORMAT --scarf')
-  
-  data = '\n'.join(data)
-  
-  f = open(file_path, 'w')
-  f.write(data)
-  f.close()
-  
-  return data
-
-
-
-def config(request, flowcell=None):
-  """
-  Returns eland config file for a given flowcell number,
-  or returns a list of available flowcell numbers.
-  """
-  
-  # Provide INDEX of available Flowcell config files.
-  if flowcell is None:
-    #Find all FC* config files and report an index html file
-    #fc_list = [ os.path.split(file_path)[1] for file_path in glob.glob(os.path.join(settings.UPLOADTO_CONFIG_FILE, 'FC*')) ]
-    fc_list = [ fc.flowcell_id for fc in models.FlowCell.objects.all() ]
-    
-    #Convert FC* list to html links
-    fc_html = [ '<a href="/eland_config/%s/">%s</a>' % (fc_name, fc_name) for fc_name in fc_list ]
-      
-    return HttpResponse('<br />'.join(fc_html))
-  
-  #FIXME: Should validate flowcell input before using.
-  flowcell = _validate_input(flowcell)
-  cfg = getElandConfig(flowcell, regenerate=True)
-  
-  if not cfg:
-    return HttpResponse("Hmm, config file for %s does not seem to exist." % (flowcell))
-  
-  
-  return HttpResponse(cfg, mimetype="text/plain")
-
-
-
-
-#def index(request):
-#  """
-#  Return a form for filling out information about the flowcell
-#  """
-#  if request.method == 'POST':
-#    form = forms.ConfigForm(request.POST, error_class=forms.DivErrorList)
-#    if form.is_valid():
-#      #cfg = generateElandConfig(form)
-#      _saveConfigFile(form)
-#      _saveToDb(form)
-#      return HttpResponse("Eland Config Saved!", mimetype="text/plain")
-#    else:
-#      return render_to_response('config_form.html', {'form': form })
-#  
-#  else:   
-#    fm = forms.ConfigForm(error_class=forms.DivErrorList)
-#    return render_to_response('config_form.html', {'form': fm })
diff --git a/gaworkflow/frontend/fctracker/__init__.py b/gaworkflow/frontend/fctracker/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/frontend/fctracker/models.py b/gaworkflow/frontend/fctracker/models.py
deleted file mode 100644 (file)
index 231afec..0000000
+++ /dev/null
@@ -1,283 +0,0 @@
-from django.db import models
-from django.contrib.auth.models import User
-from gaworkflow.frontend import settings
-
-# Create your models here.
-
-class Antibody(models.Model):
-  antigene = models.CharField(max_length=500, db_index=True)
-  catalog = models.CharField(max_length=50, unique=True, db_index=True)
-  antibodies = models.CharField(max_length=500, db_index=True)
-  source = models.CharField(max_length=500, blank=True, db_index=True)
-  biology = models.TextField(blank=True)
-  notes = models.TextField(blank=True)
-  def __str__(self):
-    return '%s - %s (%s)' % (self.antigene, self.antibodies, self.catalog)
-  class Meta:
-    verbose_name_plural = "antibodies"
-    ordering = ["antigene"]
-  class Admin:
-      list_display = ('antigene','antibodies','catalog','source','biology','notes')
-      list_filter = ('antibodies','source')
-      fields = (
-        (None, {
-            'fields': (('antigene','antibodies'),('catalog','source'),('biology'),('notes'))
-        }),
-       )
-
-class Cellline(models.Model):
-  cellline_name = models.CharField(max_length=100, unique=True, db_index=True)
-  notes = models.TextField(blank=True)
-  def __str__(self):
-    return '%s' % (self.cellline_name)
-
-  class Meta:
-    ordering = ["cellline_name"]
-
-  class Admin:
-      fields = (
-        (None, {
-            'fields': (('cellline_name'),('notes'),)
-        }),
-       )
-
-class Condition(models.Model):
-  condition_name = models.CharField(max_length=2000, unique=True, db_index=True)
-  notes = models.TextField(blank=True)
-  def __str__(self):
-    return '%s' % (self.condition_name)
-
-  class Meta:
-    ordering = ["condition_name"]
-
-  class Admin:
-      fields = (
-        (None, {
-            'fields': (('condition_name'),('notes'),)
-        }),
-       )
-
-class Species(models.Model):
-  
-  scientific_name = models.CharField(max_length=256, unique=False, db_index=True, core=True)
-  common_name = models.CharField(max_length=256, blank=True)
-  use_genome_build = models.CharField(max_length=100, blank=False, null=False)
-
-  def __str__(self):
-    return '%s (%s)|%s' % (self.scientific_name, self.common_name, self.use_genome_build)
-  
-  class Meta:
-    verbose_name_plural = "species"
-    ordering = ["scientific_name"]
-  
-  class Admin:
-      fields = (
-        (None, {
-            'fields': (('scientific_name', 'common_name'), ('use_genome_build'))
-        }),
-      )
-
-class Lab(models.Model):
-  
-  name = models.CharField(max_length=100, blank=False, unique=True)
-  
-  def __str__(self):
-    return self.name
-  
-  class Admin:
-    pass
-
-class UserProfile(models.Model):
-  
-  # This allows you to use user.get_profile() to get this object
-  user = models.ForeignKey(User, unique=True)
-
-  lab = models.ForeignKey(Lab)
-  #email = models.CharField(max_length=50, blank=True, null=True)
-  
-  def __str__(self):
-    return '%s (%s lab)' % (self.user, self.lab)
-  
-  class Meta:
-    #verbose_name_plural = "people"
-    #ordering = ["lab"]
-    pass
-    
-  class Admin:
-    #fields = (
-    #  (None, {
-    #      'fields': (('email', 'lab'), ('email'))
-    #  }),
-    #)
-    pass
-
-
-class Library(models.Model):
-  
-  library_id = models.CharField(max_length=30, primary_key=True, db_index=True, core=True)
-  library_name = models.CharField(max_length=100, unique=True, core=True)
-  library_species = models.ForeignKey(Species, core=True)
-  cell_line = models.ForeignKey(Cellline,core=True)
-  condition = models.ForeignKey(Condition,core=True)
-  antibody = models.ForeignKey(Antibody,blank=True,null=True,core=True)
-  
-  EXPERIMENT_TYPES = (
-      ('INPUT_RXLCh','INPUT_RXLCh'),
-      ('ChIP-seq', 'ChIP-seq'),
-      ('Sheared', 'Sheared'),
-      ('RNA-seq', 'RNA-seq'),
-      ('Methyl-seq', 'Methyl-seq'),
-      ('DIP-seq', 'DIP-seq'),
-    ) 
-  experiment_type = models.CharField(max_length=50, choices=EXPERIMENT_TYPES,
-                                     default='RNA-seq')
-  
-  creation_date = models.DateField(blank=True, null=True)
-  made_for = models.ForeignKey(User)
-  made_by = models.CharField(max_length=50, blank=True, default="Lorian")
-  
-  PROTOCOL_END_POINTS = (
-      ('?', 'Unknown'),
-      ('Sample', 'Raw sample'),
-      ('Progress', 'In progress'),
-      ('1A', 'Ligation, then gel'),
-      ('PCR', 'Ligation, then PCR'),
-      ('1Ab', 'Ligation, PCR, then gel'),
-      ('1Aa', 'Ligation, gel, then PCR'),
-      ('2A', 'Ligation, PCR, gel, PCR'),
-      ('Done', 'Completed'),
-    )
-  stopping_point = models.CharField(max_length=25, choices=PROTOCOL_END_POINTS, default='Done')
-  amplified_from_sample = models.ForeignKey('self', blank=True, null=True)  
-  
-  undiluted_concentration = models.DecimalField("Undiluted concentration (ng/ul)", max_digits=5, decimal_places=2, default=0, blank=True, null=True)
-  successful_pM = models.DecimalField(max_digits=5, decimal_places=2, blank=True, null=True)
-  ten_nM_dilution = models.BooleanField()
-  avg_lib_size = models.IntegerField(default=225, blank=True, null=True)
-  notes = models.TextField(blank=True)
-  
-  def __str__(self):
-    return '#%s: %s' % (self.library_id, self.library_name)
-  
-  class Meta:
-    verbose_name_plural = "libraries"
-    ordering = ["-library_id"]
-  
-  class Admin:
-    date_hierarchy = "creation_date"
-    save_as = True
-    save_on_top = True
-    search_fields = ['library_name', 'library_id']
-    list_display = ('library_id', 'library_name', 'made_for', 'creation_date', 'stopping_point')
-    list_display_links = ('library_id', 'library_name')
-    list_filter = ('stopping_point', 'library_species', 'made_for', 'made_by', 'experiment_type')
-    fields = (
-        (None, {
-            'fields': (('library_id', 'library_name'), ('library_species', 'experiment_type'),)
-        }),
-        ('Creation Information:', {
-            'fields' : (('made_for', 'made_by', 'creation_date'), ('stopping_point', 'amplified_from_sample'), ('undiluted_concentration', 'library_size'), 'notes',)
-        }),
-       ('Run Information:', {
-           'fields' : (('ten_nM_dilution','successful_pM'),)
-       }),
-    )
-
-class FlowCell(models.Model):
-  
-  flowcell_id = models.CharField(max_length=20, unique=True, db_index=True, core=True)
-  run_date = models.DateTimeField(core=True)
-  advanced_run = models.BooleanField(default=False)
-  read_length = models.IntegerField(default=32)
-  
-  
-  FLOWCELL_STATUSES = (
-      ('No', 'Not run'),
-      ('F', 'Failed'),
-      ('Del', 'Data deleted'),
-      ('A', 'Data available'),
-      ('In', 'In progress'),
-    )
-  flowcell_status = models.CharField(max_length=10, choices=FLOWCELL_STATUSES)
-  
-  lane_1_library = models.ForeignKey(Library, related_name="lane_1_library")
-  lane_2_library = models.ForeignKey(Library, related_name="lane_2_library")
-  lane_3_library = models.ForeignKey(Library, related_name="lane_3_library")
-  lane_4_library = models.ForeignKey(Library, related_name="lane_4_library")
-  lane_5_library = models.ForeignKey(Library, related_name="lane_5_library")
-  lane_6_library = models.ForeignKey(Library, related_name="lane_6_library")
-  lane_7_library = models.ForeignKey(Library, related_name="lane_7_library")
-  lane_8_library = models.ForeignKey(Library, related_name="lane_8_library")
-
-  lane_1_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_2_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_3_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_4_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_5_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_6_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_7_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  lane_8_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-  
-  lane_1_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_2_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_3_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_4_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_5_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_6_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_7_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  lane_8_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-  
-  kit_1000148 = models.IntegerField(blank=True, null=True)
-  kit_1000147 = models.IntegerField(blank=True, null=True)
-  kit_1000183 = models.IntegerField(blank=True, null=True)
-  kit_1001625 = models.IntegerField(blank=True, null=True)
-  
-  cluster_station_id = models.CharField(max_length=50, blank=True, null=True)
-  sequencer_id = models.CharField(max_length=50, blank=True, null=True)
-  
-  notes = models.TextField(blank=True)
-
-  def __str__(self):
-    return '%s (%s)' % (self.flowcell_id, self.run_date) 
-  
-  class Meta:
-    ordering = ["-run_date"]
-  
-  class Admin:
-    date_hierarchy = "run_date"
-    save_as = True
-    save_on_top = True
-    search_fields = ['flowcell_id', 'lane_1_library__library_id', 'lane_1_library__library_name', 'lane_2_library__library_id', 'lane_2_library__library_name', 'lane_3_library__library_id', 'lane_3_library__library_name', 'lane_4_library__library_id', 'lane_4_library__library_name', 'lane_5_library__library_id', 'lane_5_library__library_name', 'lane_6_library__library_id', 'lane_6_library__library_name', 'lane_7_library__library_id', 'lane_7_library__library_name', 'lane_8_library__library_id', 'lane_8_library__library_name']
-    list_display = ('run_date', 'flowcell_status', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
-    list_display_links = ('run_date', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
-    fields = (
-        (None, {
-            'fields': ('run_date', ('flowcell_id', 'flowcell_status'), ('read_length', 'advanced_run'),)
-        }),
-        ('Lanes:', {
-            'fields' : (('lane_1_library', 'lane_1_pM'), ('lane_2_library', 'lane_2_pM'), ('lane_3_library', 'lane_3_pM'), ('lane_4_library', 'lane_4_pM'), ('lane_5_library', 'lane_5_pM'), ('lane_6_library', 'lane_6_pM'), ('lane_7_library', 'lane_7_pM'), ('lane_8_library', 'lane_8_pM'),)
-        }),
-       (None, {
-           'fields' : ('notes',)
-       }),
-       ('Kits & Machines:', {
-           'classes': 'collapse',
-           'fields' : (('kit_1000148', 'kit_1000147', 'kit_1000183', 'kit_1001625'), ('cluster_station_id', 'sequencer_id'),)
-       }),
-       ('Cluster Estimates:', {
-           'classes': 'collapse',
-           'fields' : (('lane_1_cluster_estimate', 'lane_2_cluster_estimate'), ('lane_3_cluster_estimate', 'lane_4_cluster_estimate'), ('lane_5_cluster_estimate', 'lane_6_cluster_estimate'), ('lane_7_cluster_estimate', 'lane_8_cluster_estimate',),)
-       }),
-    )
-
-# Did not finish implementing, removing to avoid further confusion.
-#class ElandResult(models.Model):
-#  
-#  class Admin: pass
-#  
-#  flow_cell = models.ForeignKey(FlowCell)
-#  config_file = models.FileField(upload_to=settings.UPLOADTO_CONFIG_FILE)
-#  eland_result_pack = models.FileField(upload_to=settings.UPLOADTO_ELAND_RESULT_PACKS)
-#  bed_file_pack = models.FileField(upload_to=settings.UPLOADTO_BED_PACKS)
-#  
-#  notes = models.TextField(blank=True)
diff --git a/gaworkflow/frontend/fctracker/views.py b/gaworkflow/frontend/fctracker/views.py
deleted file mode 100644 (file)
index 2299e4f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-# Create your views here.
\ No newline at end of file
diff --git a/gaworkflow/frontend/manage.py b/gaworkflow/frontend/manage.py
deleted file mode 100644 (file)
index 5e78ea9..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python
-from django.core.management import execute_manager
-try:
-    import settings # Assumed to be in the same directory.
-except ImportError:
-    import sys
-    sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
-    sys.exit(1)
-
-if __name__ == "__main__":
-    execute_manager(settings)
diff --git a/gaworkflow/frontend/settings.py b/gaworkflow/frontend/settings.py
deleted file mode 100644 (file)
index 853cc1e..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-import os
-
-# Django settings for elandifier project.
-
-DEBUG = True
-TEMPLATE_DEBUG = DEBUG
-
-ADMINS = (
-    # ('Your Name', 'your_email@domain.com'),
-)
-
-MANAGERS = ADMINS
-
-DATABASE_ENGINE = 'sqlite3'           # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'.
-DATABASE_NAME = os.path.abspath('../../fctracker.db')             # Or path to database file if using sqlite3.
-DATABASE_USER = ''             # Not used with sqlite3.
-DATABASE_PASSWORD = ''         # Not used with sqlite3.
-DATABASE_HOST = ''             # Set to empty string for localhost. Not used with sqlite3.
-DATABASE_PORT = ''             # Set to empty string for default. Not used with sqlite3.
-
-# Local time zone for this installation. Choices can be found here:
-# http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE
-# although not all variations may be possible on all operating systems.
-# If running in a Windows environment this must be set to the same as your
-# system time zone.
-TIME_ZONE = 'America/Los_Angeles'
-
-# Language code for this installation. All choices can be found here:
-# http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
-# http://blogs.law.harvard.edu/tech/stories/storyReader$15
-LANGUAGE_CODE = 'en-us'
-
-SITE_ID = 1
-
-# If you set this to False, Django will make some optimizations so as not
-# to load the internationalization machinery.
-USE_I18N = True
-
-# Absolute path to the directory that holds media.
-# Example: "/home/media/media.lawrence.com/"
-MEDIA_ROOT = ''
-
-# URL that handles the media served from MEDIA_ROOT.
-# Example: "http://media.lawrence.com"
-MEDIA_URL = ''
-
-# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
-# trailing slash.
-# Examples: "http://foo.com/media/", "/media/".
-ADMIN_MEDIA_PREFIX = '/media/'
-
-# Make this unique, and don't share it with anybody.
-SECRET_KEY = '(ekv^=gf(j9f(x25@a7r+8)hqlz%&_1!tw^75l%^041#vi=@4n'
-
-# List of callables that know how to import templates from various sources.
-TEMPLATE_LOADERS = (
-    'django.template.loaders.filesystem.load_template_source',
-    'django.template.loaders.app_directories.load_template_source',
-#     'django.template.loaders.eggs.load_template_source',
-)
-
-MIDDLEWARE_CLASSES = (
-    'django.middleware.common.CommonMiddleware',
-    'django.contrib.sessions.middleware.SessionMiddleware',
-    'django.contrib.auth.middleware.AuthenticationMiddleware',
-    'django.middleware.doc.XViewMiddleware',
-)
-
-ROOT_URLCONF = 'gaworkflow.frontend.urls'
-
-TEMPLATE_DIRS = (
-    # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
-    # Always use forward slashes, even on Windows.
-    # Don't forget to use absolute paths, not relative paths.
-    os.path.abspath("../../templates"),
-)
-
-INSTALLED_APPS = (
-    'django.contrib.admin',
-    'django.contrib.auth',
-    'django.contrib.contenttypes',
-    'django.contrib.sessions',
-    'django.contrib.sites',
-    'gaworkflow.frontend.eland_config',
-    'gaworkflow.frontend.fctracker',
-    'django.contrib.databrowse',
-)
-
-# Project specific settings
-UPLOADTO_HOME = os.path.abspath('../../uploads')
-UPLOADTO_CONFIG_FILE = os.path.join(UPLOADTO_HOME, 'eland_config')
-UPLOADTO_ELAND_RESULT_PACKS = os.path.join(UPLOADTO_HOME, 'eland_results')
-UPLOADTO_BED_PACKS = os.path.join(UPLOADTO_HOME, 'bed_packs')
-
diff --git a/gaworkflow/frontend/urls.py b/gaworkflow/frontend/urls.py
deleted file mode 100644 (file)
index 0c67015..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-from django.conf.urls.defaults import *
-
-# Databrowser:
-from django.contrib import databrowse
-from fctracker.models import Library, FlowCell
-databrowse.site.register(Library)
-databrowse.site.register(FlowCell)
-
-urlpatterns = patterns('',
-    # Base:
-    (r'^eland_config/', include('gaworkflow.frontend.eland_config.urls')),
-    # Admin:
-     (r'^admin/', include('django.contrib.admin.urls')),
-    # Databrowser:
-     (r'^databrowse/(.*)', databrowse.site.root),
-)
diff --git a/gaworkflow/pipeline/__init__.py b/gaworkflow/pipeline/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/pipeline/bustard.py b/gaworkflow/pipeline/bustard.py
deleted file mode 100644 (file)
index 5a7a1f3..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-
-from datetime import date
-from glob import glob
-import logging
-import os
-import time
-import re
-
-from gaworkflow.pipeline.runfolder import \
-   ElementTree, \
-   VERSION_RE, \
-   EUROPEAN_STRPTIME
-
-class Phasing(object):
-    PHASING = 'Phasing'
-    PREPHASING = 'Prephasing'
-
-    def __init__(self, fromfile=None, xml=None):
-        self.lane = None
-        self.phasing = None
-        self.prephasing = None
-
-        if fromfile is not None:
-            self._initialize_from_file(fromfile)
-        elif xml is not None:
-            self.set_elements(xml)
-
-    def _initialize_from_file(self, pathname):
-        path, name = os.path.split(pathname)
-        basename, ext = os.path.splitext(name)
-        # the last character of the param base filename should be the
-        # lane number
-        tree = ElementTree.parse(pathname).getroot()
-        self.set_elements(tree)
-        self.lane = int(basename[-1])
-
-    def get_elements(self):
-        root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
-        phasing = ElementTree.SubElement(root, Phasing.PHASING)
-        phasing.text = str(self.phasing)
-        prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
-        prephasing.text = str(self.prephasing)
-        return root
-
-    def set_elements(self, tree):
-        if tree.tag not in ('Phasing', 'Parameters'):
-            raise ValueError('exptected Phasing or Parameters')
-        lane = tree.attrib.get('lane', None)
-        if lane is not None:
-            self.lane = int(lane)
-        for element in list(tree):
-            if element.tag == Phasing.PHASING:
-                self.phasing = float(element.text)
-            elif element.tag == Phasing.PREPHASING:
-                self.prephasing = float(element.text)
-
-class Bustard(object):
-    XML_VERSION = 1
-
-    # Xml Tags
-    BUSTARD = 'Bustard'
-    SOFTWARE_VERSION = 'version'
-    DATE = 'run_time'
-    USER = 'user'
-    PARAMETERS = 'Parameters'
-
-    def __init__(self, xml=None):
-        self.version = None
-        self.date = date.today()
-        self.user = None
-        self.phasing = {}
-
-        if xml is not None:
-            self.set_elements(xml)
-
-    def _get_time(self):
-        return time.mktime(self.date.timetuple())
-    time = property(_get_time, doc='return run time as seconds since epoch')
-
-    def dump(self):
-        print "Bustard version:", self.version
-        print "Run date", self.date
-        print "user:", self.user
-        for lane, tree in self.phasing.items():
-            print lane
-            print tree
-
-    def get_elements(self):
-        root = ElementTree.Element('Bustard', 
-                                   {'version': str(Bustard.XML_VERSION)})
-        version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
-        version.text = self.version
-        run_date = ElementTree.SubElement(root, Bustard.DATE)
-        run_date.text = str(self.time)
-        user = ElementTree.SubElement(root, Bustard.USER)
-        user.text = self.user
-        params = ElementTree.SubElement(root, Bustard.PARAMETERS)
-        for p in self.phasing.values():
-            params.append(p.get_elements())
-        return root
-
-    def set_elements(self, tree):
-        if tree.tag != Bustard.BUSTARD:
-            raise ValueError('Expected "Bustard" SubElements')
-        xml_version = int(tree.attrib.get('version', 0))
-        if xml_version > Bustard.XML_VERSION:
-            logging.warn('Bustard XML tree is a higher version than this class')
-        for element in list(tree):
-            if element.tag == Bustard.SOFTWARE_VERSION:
-                self.version = element.text
-            elif element.tag == Bustard.DATE:
-                self.date = date.fromtimestamp(float(element.text))
-            elif element.tag == Bustard.USER:
-                self.user = element.text
-            elif element.tag == Bustard.PARAMETERS:
-                for param in element:
-                    p = Phasing(xml=param)
-                    self.phasing[p.lane] = p
-            else:
-                raise ValueError("Unrecognized tag: %s" % (element.tag,))
-        
-
-
-def bustard(pathname):
-    """
-    Construct a Bustard object from pathname
-    """
-    b = Bustard()
-    path, name = os.path.split(pathname)
-    groups = name.split("_")
-    version = re.search(VERSION_RE, groups[0])
-    b.version = version.group(1)
-    t = time.strptime(groups[1], EUROPEAN_STRPTIME)
-    b.date = date(*t[0:3])
-    b.user = groups[2]
-    paramfiles = glob(os.path.join(pathname, "params?.xml"))
-    for paramfile in paramfiles:
-        phasing = Phasing(paramfile)
-        assert (phasing.lane >= 1 and phasing.lane <= 8)
-        b.phasing[phasing.lane] = phasing
-    return b
-
-def fromxml(tree):
-    b = Bustard()
-    b.set_elements(tree)
-    return b
diff --git a/gaworkflow/pipeline/configure_run.py b/gaworkflow/pipeline/configure_run.py
deleted file mode 100644 (file)
index e75d73a..0000000
+++ /dev/null
@@ -1,606 +0,0 @@
-#!/usr/bin/python
-import subprocess
-import logging
-import time
-import re
-import os
-
-from gaworkflow.pipeline.retrieve_config import getCombinedOptions, saveConfigFile
-from gaworkflow.pipeline.retrieve_config import FlowCellNotFound, WebError404
-from gaworkflow.pipeline.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict
-from gaworkflow.pipeline.run_status import GARunStatus
-
-from pyinotify import WatchManager, ThreadedNotifier
-from pyinotify import EventsCodes, ProcessEvent
-
-class ConfigInfo:
-  
-  def __init__(self):
-    #run_path = firecrest analysis directory to run analysis from
-    self.run_path = None
-    self.bustard_path = None
-    self.config_filepath = None
-    self.status = None
-
-    #top level directory where all analyses are placed
-    self.base_analysis_dir = None
-    #analysis_dir, top level analysis dir...
-    # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150'
-    self.analysis_dir = None
-
-
-  def createStatusObject(self):
-    """
-    Creates a status object which can be queried for
-    status of running the pipeline
-
-    returns True if object created
-    returns False if object cannot be created
-    """
-    if self.config_filepath is None:
-      return False
-
-    self.status = GARunStatus(self.config_filepath)
-    return True
-
-
-
-####################################
-# inotify event processor
-
-s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt')
-s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt')
-s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt')
-
-s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/')
-s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/')
-s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/')
-
-class RunEvent(ProcessEvent):
-
-  def __init__(self, conf_info):
-
-    self.run_status_dict = {'firecrest': False,
-                            'bustard': False,
-                            'gerald': False}
-
-    self._ci = conf_info
-
-    ProcessEvent.__init__(self)
-    
-
-  def process_IN_CREATE(self, event):
-    fullpath = os.path.join(event.path, event.name)
-    if s_finished.search(fullpath):
-      logging.info("File Found: %s" % (fullpath))
-
-      if s_firecrest_finished.search(fullpath):
-        self.run_status_dict['firecrest'] = True
-        self._ci.status.updateFirecrest(event.name)
-      elif s_bustard_finished.search(fullpath):
-        self.run_status_dict['bustard'] = True
-        self._ci.status.updateBustard(event.name)
-      elif s_gerald_finished.search(fullpath):
-        self.run_status_dict['gerald'] = True
-        self._ci.status.updateGerald(event.name)
-
-    #WARNING: The following order is important!!
-    # Firecrest regex will catch all gerald, bustard, and firecrest
-    # Bustard regex will catch all gerald and bustard
-    # Gerald regex will catch all gerald
-    # So, order needs to be Gerald, Bustard, Firecrest, or this
-    #  won't work properly.
-    elif s_gerald_all.search(fullpath):
-      self._ci.status.updateGerald(event.name)
-    elif s_bustard_all.search(fullpath):
-      self._ci.status.updateBustard(event.name)
-    elif s_firecrest_all.search(fullpath):
-      self._ci.status.updateFirecrest(event.name)
-      
-    #print "Create: %s" % (os.path.join(event.path, event.name))
-
-  def process_IN_DELETE(self, event):
-    #print "Remove %s" % (os.path.join(event.path, event.name))
-    pass
-
-
-
-
-#FLAGS
-# Config Step Error
-RUN_ABORT = 'abort'
-# Run Step Error
-RUN_FAILED = 'failed'
-
-
-#####################################
-# Configure Step (goat_pipeline.py)
-#Info
-s_start = re.compile('Starting Genome Analyzer Pipeline')
-s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
-s_generating = re.compile('^Generating journals, Makefiles')
-s_seq_folder = re.compile('^Sequence folder: ')
-s_seq_folder_sub = re.compile('want to make ')
-s_stderr_taskcomplete = re.compile('^Task complete, exiting')
-
-#Errors
-s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
-s_species_dir_err = re.compile('Error: Lane [1-8]:')
-s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
-s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+')
-
-SUPPRESS_MISSING_CYCLES = False
-
-
-##Ignore - Example of out above each ignore regex.
-#NOTE: Commenting out an ignore will cause it to be
-# logged as DEBUG with the logging module.
-#CF_STDERR_IGNORE_LIST = []
-s_skip = re.compile('s_[0-8]_[0-9]+')
-
-
-##########################################
-# Pipeline Run Step (make -j8 recursive)
-
-##Info
-s_finished = re.compile('finished')
-
-##Errors
-s_make_error = re.compile('^make[\S\s]+Error')
-s_no_gnuplot = re.compile('gnuplot: command not found')
-s_no_convert = re.compile('^Can\'t exec "convert"')
-s_no_ghostscript = re.compile('gs: command not found')
-
-##Ignore - Example of out above each ignore regex.
-#NOTE: Commenting out an ignore will cause it to be
-# logged as DEBUG with the logging module.
-#
-PL_STDERR_IGNORE_LIST = []
-# Info: PF 11802
-PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
-# About to analyse intensity file s_4_0101_sig2.txt
-PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
-# Will send output to standard output
-PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
-# Found 31877 clusters
-PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
-# Will use quality criterion ((CHASTITY>=0.6)
-PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
-# Quality criterion translated to (($F[5]>=0.6))
-PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
-# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
-#  AND
-# opened s_4_0103_qhg.txt
-PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
-# 81129 sequences out of 157651 passed filter criteria
-PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
-
-
-def pl_stderr_ignore(line):
-  """
-  Searches lines for lines to ignore (i.e. not to log)
-
-  returns True if line should be ignored
-  returns False if line should NOT be ignored
-  """
-  for s in PL_STDERR_IGNORE_LIST:
-    if s.search(line):
-      return True
-  return False
-
-
-def config_stdout_handler(line, conf_info):
-  """
-  Processes each line of output from GOAT
-  and stores useful information using the logging module
-
-  Loads useful information into conf_info as well, for future
-  use outside the function.
-
-  returns True if found condition that signifies success.
-  """
-
-  # Skip irrelevant line (without logging)
-  if s_skip.search(line):
-    pass
-
-  # Detect invalid command-line arguments
-  elif s_invalid_cmdline.search(line):
-    logging.error("Invalid commandline options!")
-
-  # Detect starting of configuration
-  elif s_start.search(line):
-    logging.info('START: Configuring pipeline')
-
-  # Detect it made it past invalid arguments
-  elif s_gerald.search(line):
-    logging.info('Running make now')
-
-  # Detect that make files have been generated (based on output)
-  elif s_generating.search(line):
-    logging.info('Make files generted')
-    return True
-
-  # Capture run directory
-  elif s_seq_folder.search(line):
-    mo = s_seq_folder_sub.search(line)
-    #Output changed when using --tiles=<tiles>
-    # at least in pipeline v0.3.0b2
-    if mo:
-      firecrest_bustard_gerald_makefile = line[mo.end():]
-      firecrest_bustard_gerald, junk = \
-                                os.path.split(firecrest_bustard_gerald_makefile)
-      firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
-      firecrest, junk = os.path.split(firecrest_bustard)
-
-      conf_info.bustard_path = firecrest_bustard
-      conf_info.run_path = firecrest
-    
-    #Standard output handling
-    else:
-      print 'Sequence line:', line
-      mo = s_seq_folder.search(line)
-      conf_info.bustard_path = line[mo.end():]
-      conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
-
-  # Log all other output for debugging purposes
-  else:
-    logging.warning('CONF:?: %s' % (line))
-
-  return False
-
-
-
-def config_stderr_handler(line, conf_info):
-  """
-  Processes each line of output from GOAT
-  and stores useful information using the logging module
-
-  Loads useful information into conf_info as well, for future
-  use outside the function.
-
-  returns RUN_ABORT upon detecting failure;
-          True on success message;
-          False if neutral message
-            (i.e. doesn't signify failure or success)
-  """
-  global SUPPRESS_MISSING_CYCLES
-
-  # Detect invalid species directory error
-  if s_species_dir_err.search(line):
-    logging.error(line)
-    return RUN_ABORT
-  # Detect goat_pipeline.py traceback
-  elif s_goat_traceb.search(line):
-    logging.error("Goat config script died, traceback in debug output")
-    return RUN_ABORT
-  # Detect indication of successful configuration (from stderr; odd, but ok)
-  elif s_stderr_taskcomplete.search(line):
-    logging.info('Configure step successful (from: stderr)')
-    return True
-  # Detect missing cycles
-  elif s_missing_cycles.search(line):
-
-    # Only display error once
-    if not SUPPRESS_MISSING_CYCLES:
-      logging.error("Missing cycles detected; Not all cycles copied?")
-      logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line))
-      SUPPRESS_MISSING_CYCLES = True
-    return RUN_ABORT
-  
-  # Log all other output as debug output
-  else:
-    logging.debug('CONF:STDERR:?: %s' % (line))
-
-  # Neutral (not failure; nor success)
-  return False
-
-
-#def pipeline_stdout_handler(line, conf_info):
-#  """
-#  Processes each line of output from running the pipeline
-#  and stores useful information using the logging module
-#
-#  Loads useful information into conf_info as well, for future
-#  use outside the function.
-#
-#  returns True if found condition that signifies success.
-#  """
-#
-#  #f.write(line + '\n')
-#
-#  return True
-
-
-
-def pipeline_stderr_handler(line, conf_info):
-  """
-  Processes each line of stderr from pipelien run
-  and stores useful information using the logging module
-
-  ##FIXME: Future feature (doesn't actually do this yet)
-  #Loads useful information into conf_info as well, for future
-  #use outside the function.
-
-  returns RUN_FAILED upon detecting failure;
-          #True on success message; (no clear success state)
-          False if neutral message
-            (i.e. doesn't signify failure or success)
-  """
-
-  if pl_stderr_ignore(line):
-    pass
-  elif s_make_error.search(line):
-    logging.error("make error detected; run failed")
-    return RUN_FAILED
-  elif s_no_gnuplot.search(line):
-    logging.error("gnuplot not found")
-    return RUN_FAILED
-  elif s_no_convert.search(line):
-    logging.error("imagemagick's convert command not found")
-    return RUN_FAILED
-  elif s_no_ghostscript.search(line):
-    logging.error("ghostscript not found")
-    return RUN_FAILED
-  else:
-    logging.debug('PIPE:STDERR:?: %s' % (line))
-
-  return False
-
-
-def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir):
-  """
-  Gets the config file from server...
-  requires config file in:
-    /etc/ga_frontend/ga_frontend.conf
-   or
-    ~/.ga_frontend.conf
-
-  with:
-  [config_file_server]
-  base_host_url: http://host:port
-
-  return True if successful, False is failure
-  """
-  options = getCombinedOptions()
-
-  if options.url is None:
-    logging.error("~/.ga_frontend.conf or /etc/ga_frontend/ga_frontend.conf" \
-                  " missing base_host_url option")
-    return False
-
-  try:
-    saveConfigFile(flowcell, options.url, cfg_filepath)
-    conf_info.config_filepath = cfg_filepath
-  except FlowCellNotFound, e:
-    logging.error(e)
-    return False
-  except WebError404, e:
-    logging.error(e)
-    return False
-  except IOError, e:
-    logging.error(e)
-    return False
-  except Exception, e:
-    logging.error(e)
-    return False
-
-  f = open(cfg_filepath, 'r')
-  data = f.read()
-  f.close()
-
-  genome_dict = getAvailableGenomes(genome_dir)
-  mapper_dict = constructMapperDict(genome_dict)
-
-  logging.debug(data)
-
-  f = open(cfg_filepath, 'w')
-  f.write(data % (mapper_dict))
-  f.close()
-  
-  return True
-  
-
-
-def configure(conf_info):
-  """
-  Attempts to configure the GA pipeline using goat.
-
-  Uses logging module to store information about status.
-
-  returns True if configuration successful, otherwise False.
-  """
-  #ERROR Test:
-  #pipe = subprocess.Popen(['goat_pipeline.py',
-  #                         '--GERALD=config32bk.txt',
-  #                         '--make .',],
-  #                         #'.'],
-  #                        stdout=subprocess.PIPE,
-  #                        stderr=subprocess.PIPE)
-
-  #ERROR Test (2), causes goat_pipeline.py traceback
-  #pipe = subprocess.Popen(['goat_pipeline.py',
-  #                  '--GERALD=%s' % (conf_info.config_filepath),
-  #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
-  #                         '--make',
-  #                         '.'],
-  #                        stdout=subprocess.PIPE,
-  #                        stderr=subprocess.PIPE)
-
-  ##########################
-  # Run configuration step
-  #   Not a test; actual configure attempt.
-  #pipe = subprocess.Popen(['goat_pipeline.py',
-  #                  '--GERALD=%s' % (conf_info.config_filepath),
-  #                         '--make',
-  #                         '.'],
-  #                        stdout=subprocess.PIPE,
-  #                        stderr=subprocess.PIPE)
-
-
-  stdout_filepath = os.path.join(conf_info.analysis_dir,
-                                 "pipeline_configure_stdout.txt")
-  stderr_filepath = os.path.join(conf_info.analysis_dir,
-                                 "pipeline_configure_stderr.txt")
-
-  fout = open(stdout_filepath, 'w')
-  ferr = open(stderr_filepath, 'w')
-  
-  pipe = subprocess.Popen(['goat_pipeline.py',
-                    '--GERALD=%s' % (conf_info.config_filepath),
-                           #'--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
-                           '--make',
-                           conf_info.analysis_dir],
-                          stdout=fout,
-                          stderr=ferr)
-
-  print "Configuring pipeline: %s" % (time.ctime())
-  error_code = pipe.wait()
-
-  # Clean up
-  fout.close()
-  ferr.close()
-  
-  
-  ##################
-  # Process stdout
-  fout = open(stdout_filepath, 'r')
-  
-  stdout_line = fout.readline()
-
-  complete = False
-  while stdout_line != '':
-    # Handle stdout
-    if config_stdout_handler(stdout_line, conf_info):
-      complete = True
-    stdout_line = fout.readline()
-
-  fout.close()
-
-
-  #error_code = pipe.wait()
-  if error_code:
-    logging.error('Recieved error_code: %s' % (error_code))
-  else:
-    logging.info('We are go for launch!')
-
-  #Process stderr
-  ferr = open(stderr_filepath, 'r')
-  stderr_line = ferr.readline()
-
-  abort = 'NO!'
-  stderr_success = False
-  while stderr_line != '':
-    stderr_status = config_stderr_handler(stderr_line, conf_info)
-    if stderr_status == RUN_ABORT:
-      abort = RUN_ABORT
-    elif stderr_status is True:
-      stderr_success = True
-    stderr_line = ferr.readline()
-
-  ferr.close()
-
-
-  #Success requirements:
-  # 1) The stdout completed without error
-  # 2) The program exited with status 0
-  # 3) No errors found in stdout
-  print '#Expect: True, False, True, True'
-  print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
-  status = complete is True and \
-           bool(error_code) is False and \
-           abort != RUN_ABORT and \
-           stderr_success is True
-
-  # If everything was successful, but for some reason
-  #  we didn't retrieve the path info, log it.
-  if status is True:
-    if conf_info.bustard_path is None or conf_info.run_path is None:
-      logging.error("Failed to retrieve run_path")
-      return False
-  
-  return status
-
-
-def run_pipeline(conf_info):
-  """
-  Run the pipeline and monitor status.
-  """
-  # Fail if the run_path doesn't actually exist
-  if not os.path.exists(conf_info.run_path):
-    logging.error('Run path does not exist: %s' \
-              % (conf_info.run_path))
-    return False
-
-  # Change cwd to run_path
-  stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt')
-  stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt')
-
-  # Create status object
-  conf_info.createStatusObject()
-
-  # Monitor file creation
-  wm = WatchManager()
-  mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE
-  event = RunEvent(conf_info)
-  notifier = ThreadedNotifier(wm, event)
-  notifier.start()
-  wdd = wm.add_watch(conf_info.run_path, mask, rec=True)
-
-  # Log pipeline starting
-  logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
-  
-  # Start the pipeline (and hide!)
-  #pipe = subprocess.Popen(['make',
-  #                         '-j8',
-  #                         'recursive'],
-  #                        stdout=subprocess.PIPE,
-  #                        stderr=subprocess.PIPE)
-
-  fout = open(stdout_filepath, 'w')
-  ferr = open(stderr_filepath, 'w')
-
-  pipe = subprocess.Popen(['make',
-                           '--directory=%s' % (conf_info.run_path),
-                           '-j8',
-                           'recursive'],
-                           stdout=fout,
-                           stderr=ferr)
-                           #shell=True)
-  # Wait for run to finish
-  retcode = pipe.wait()
-
-
-  # Clean up
-  notifier.stop()
-  fout.close()
-  ferr.close()
-
-  # Process stderr
-  ferr = open(stderr_filepath, 'r')
-
-  run_failed_stderr = False
-  for line in ferr:
-    err_status = pipeline_stderr_handler(line, conf_info)
-    if err_status == RUN_FAILED:
-      run_failed_stderr = True
-
-  ferr.close()
-
-  # Finished file check!
-  print 'RUN SUCCESS CHECK:'
-  for key, value in event.run_status_dict.items():
-    print '  %s: %s' % (key, value)
-
-  dstatus = event.run_status_dict
-
-  # Success or failure check
-  status = (retcode == 0) and \
-           run_failed_stderr is False and \
-           dstatus['firecrest'] is True and \
-           dstatus['bustard'] is True and \
-           dstatus['gerald'] is True
-
-  return status
-
-
diff --git a/gaworkflow/pipeline/firecrest.py b/gaworkflow/pipeline/firecrest.py
deleted file mode 100644 (file)
index dd06aec..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-Extract information about the Firecrest run
-
-Firecrest - class holding the properties we found
-firecrest - Firecrest factory function initalized from a directory name
-fromxml - Firecrest factory function initalized from an xml dump from
-          the Firecrest object.
-"""
-
-from datetime import date
-import os
-import re
-import time
-
-from gaworkflow.pipeline.runfolder import \
-   ElementTree, \
-   VERSION_RE, \
-   EUROPEAN_STRPTIME
-
-class Firecrest(object):
-    XML_VERSION=1
-
-    # xml tag names
-    FIRECREST = 'Firecrest'
-    SOFTWARE_VERSION = 'version'
-    START = 'FirstCycle'
-    STOP = 'LastCycle'
-    DATE = 'run_time'
-    USER = 'user'
-    MATRIX = 'matrix'
-
-    def __init__(self, xml=None):
-        self.start = None
-        self.stop = None
-        self.version = None
-        self.date = date.today()
-        self.user = None
-        self.matrix = None
-
-        if xml is not None:
-            self.set_elements(xml)
-        
-    def _get_time(self):
-        return time.mktime(self.date.timetuple())
-    time = property(_get_time, doc='return run time as seconds since epoch')
-
-    def dump(self):
-        print "Starting cycle:", self.start
-        print "Ending cycle:", self.stop
-        print "Firecrest version:", self.version
-        print "Run date:", self.date
-        print "user:", self.user
-
-    def get_elements(self):
-        attribs = {'version': str(Firecrest.XML_VERSION) }
-        root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
-        version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
-        version.text = self.version
-        start_cycle = ElementTree.SubElement(root, Firecrest.START)
-        start_cycle.text = str(self.start)
-        stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
-        stop_cycle.text = str(self.stop)
-        run_date = ElementTree.SubElement(root, Firecrest.DATE)
-        run_date.text = str(self.time)
-        user = ElementTree.SubElement(root, Firecrest.USER)
-        user.text = self.user
-        matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
-        matrix.text = self.matrix
-        return root
-
-    def set_elements(self, tree):
-        if tree.tag != Firecrest.FIRECREST:
-            raise ValueError('Expected "Firecrest" SubElements')
-        xml_version = int(tree.attrib.get('version', 0))
-        if xml_version > Firecrest.XML_VERSION:
-            logging.warn('Firecrest XML tree is a higher version than this class')
-        for element in list(tree):
-            if element.tag == Firecrest.SOFTWARE_VERSION:
-                self.version = element.text
-            elif element.tag == Firecrest.START:
-                self.start = int(element.text)
-            elif element.tag == Firecrest.STOP:
-                self.stop = int(element.text)
-            elif element.tag == Firecrest.DATE:
-                self.date = date.fromtimestamp(float(element.text))
-            elif element.tag == Firecrest.USER:
-                self.user = element.text
-            elif element.tag == Firecrest.MATRIX:
-                self.matrix = element.text
-            else:
-                raise ValueError("Unrecognized tag: %s" % (element.tag,))
-
-def firecrest(pathname):
-    """
-    Examine the directory at pathname and initalize a Firecrest object
-    """
-    f = Firecrest()
-
-    # parse firecrest directory name
-    path, name = os.path.split(pathname)
-    groups = name.split('_')
-    # grab the start/stop cycle information
-    cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
-    f.start = int(cycle.group(1))
-    f.stop = int(cycle.group(2))
-    # firecrest version
-    version = re.search(VERSION_RE, groups[1])
-    f.version = (version.group(1))
-    # datetime
-    t = time.strptime(groups[2], EUROPEAN_STRPTIME)
-    f.date = date(*t[0:3])
-    # username
-    f.user = groups[3]
-
-    # should I parse this deeper than just stashing the 
-    # contents of the matrix file?
-    matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
-    f.matrix = open(matrix_pathname, 'r').read()
-    return f
-
-def fromxml(tree):
-    """
-    Initialize a Firecrest object from an element tree node
-    """
-    f = Firecrest()
-    f.set_elements(tree)
-    return f
diff --git a/gaworkflow/pipeline/genome_mapper.py b/gaworkflow/pipeline/genome_mapper.py
deleted file mode 100644 (file)
index 90c619b..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/python
-import glob
-import sys
-import os
-import re
-
-import logging
-
-from gaworkflow.util.alphanum import alphanum
-
-class DuplicateGenome(Exception): pass
-
-
-def _has_metainfo(genome_dir):
-  metapath = os.path.join(genome_dir, '_metainfo_')
-  if os.path.isfile(metapath):
-    return True
-  else:
-    return False
-
-def getAvailableGenomes(genome_base_dir):
-  """
-  raises IOError (on genome_base_dir not found)
-  raises DuplicateGenome on duplicate genomes found.
-  
-  returns a double dictionary (i.e. d[species][build] = path)
-  """
-
-  # Need valid directory
-  if not os.path.exists(genome_base_dir):
-    msg = "Directory does not exist: %s" % (genome_base_dir)
-    raise IOError, msg
-
-  # Find all subdirectories
-  filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
-  potential_genome_dirs = \
-    [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
-
-  # Get list of metadata files
-  genome_dir_list = \
-    [ dirpath \
-      for dirpath in potential_genome_dirs \
-      if _has_metainfo(dirpath) ]
-
-  # Genome double dictionary
-  d = {}
-
-  for genome_dir in genome_dir_list:
-    line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
-
-    # Get species, build... log and skip on failure
-    try:
-      species, build = line.split('|')
-    except:
-      logging.warning('Skipping: Invalid metafile (%s) line: %s' \
-                      % (metafile, line))
-      continue
-
-    build_dict = d.setdefault(species, {})
-    if build in build_dict:
-      msg = "Duplicate genome for %s|%s" % (species, build)
-      raise DuplicateGenome, msg
-
-    build_dict[build] = genome_dir
-
-  return d
-  
-
-class constructMapperDict(object):
-    """
-    Emulate a dictionary to map genome|build names to paths.
-    
-    It uses the dictionary generated by getAvailableGenomes.
-    """
-    def __init__(self, genome_dict):
-        self.genome_dict = genome_dict
-        
-    def __getitem__(self, key):
-        """
-        Return the best match for key
-        """
-        elements = re.split("\|", key)
-          
-        if len(elements) == 1:
-            # we just the species name
-            # get the set of builds
-            builds = self.genome_dict[elements[0]]
-            
-            # sort build names the way humans would
-            keys = builds.keys()
-            keys.sort(cmp=alphanum)
-            
-            # return the path from the 'last' build name
-            return builds[keys[-1]]
-                        
-        elif len(elements) == 2:
-            # we have species, and build name
-            return self.genome_dict[elements[0]][elements[1]]
-        else:
-            raise KeyError("Unrecognized key")
-        
-    def keys(self):
-        keys = []
-        for species in self.genome_dict.keys():
-            for build in self.genome_dict[species]:
-                keys.append([species+'|'+build])
-        return keys
-            
-    def values(self):
-        values = []
-        for species in self.genome_dict.keys():
-            for build in self.genome_dict[species]:
-                values.append(self.genome_dict[species][build])
-        return values
-       
-    def items(self):
-        items = []
-        for species in self.genome_dict.keys():
-            for build in self.genome_dict[species]:
-                key = [species+'|'+build]
-                value = self.genome_dict[species][build]
-                items.append((key, value))
-        return items
-            
-if __name__ == '__main__':
-
-  if len(sys.argv) != 2:
-    print 'useage: %s <base_genome_dir>' % (sys.argv[0])
-    sys.exit(1)
-
-  d = getAvailableGenomes(sys.argv[1])
-  d2 = constructMapperDict(d)
-
-  for k,v in d2.items():
-    print '%s: %s' % (k,v)
-  
-  
diff --git a/gaworkflow/pipeline/gerald.py b/gaworkflow/pipeline/gerald.py
deleted file mode 100644 (file)
index f268767..0000000
+++ /dev/null
@@ -1,719 +0,0 @@
-"""
-Provide access to information stored in the GERALD directory.
-"""
-from datetime import datetime, date
-from glob import glob
-import logging
-import os
-import stat
-import time
-import types
-
-from gaworkflow.pipeline.runfolder import \
-   ElementTree, \
-   EUROPEAN_STRPTIME, \
-   LANES_PER_FLOWCELL, \
-   VERSION_RE
-from gaworkflow.util.ethelp import indent, flatten
-from gaworkflow.util.opener import autoopen
-
-class Gerald(object):
-    """
-    Capture meaning out of the GERALD directory
-    """
-    XML_VERSION = 1
-    GERALD='Gerald'
-    RUN_PARAMETERS='RunParameters'
-    SUMMARY='Summary'
-
-    class LaneParameters(object):
-        """
-        Make it easy to access elements of LaneSpecificRunParameters from python
-        """
-        def __init__(self, gerald, key):
-            self._gerald = gerald
-            self._key = key
-        
-        def __get_attribute(self, xml_tag):
-            subtree = self._gerald.tree.find('LaneSpecificRunParameters')
-            container = subtree.find(xml_tag)
-            if container is None:
-                return None
-            if len(container.getchildren()) > LANES_PER_FLOWCELL:
-                raise RuntimeError('GERALD config.xml file changed')
-            lanes = [x.tag.split('_')[1] for x in container.getchildren()]
-            index = lanes.index(self._key)
-            element = container[index]
-            return element.text
-        def _get_analysis(self):
-            return self.__get_attribute('ANALYSIS')
-        analysis = property(_get_analysis)
-
-        def _get_eland_genome(self):
-            genome = self.__get_attribute('ELAND_GENOME')
-            # default to the chipwide parameters if there isn't an
-            # entry in the lane specific paramaters
-            if genome is None:
-                subtree = self._gerald.tree.find('ChipWideRunParameters')
-                container = subtree.find('ELAND_GENOME')
-                genome = container.text
-            return genome
-        eland_genome = property(_get_eland_genome)
-
-        def _get_read_length(self):
-            return self.__get_attribute('READ_LENGTH')
-        read_length = property(_get_read_length)
-
-        def _get_use_bases(self):
-            return self.__get_attribute('USE_BASES')
-        use_bases = property(_get_use_bases)
-
-    class LaneSpecificRunParameters(object):
-        """
-        Provide access to LaneSpecificRunParameters
-        """
-        def __init__(self, gerald):
-            self._gerald = gerald
-            self._keys = None
-        def __getitem__(self, key):
-            return Gerald.LaneParameters(self._gerald, key)
-        def keys(self):
-            if self._keys is None:
-                tree = self._gerald.tree
-                analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
-                # according to the pipeline specs I think their fields 
-                # are sampleName_laneID, with sampleName defaulting to s
-                # since laneIDs are constant lets just try using 
-                # those consistently.
-                self._keys = [ x.tag.split('_')[1] for x in analysis]
-            return self._keys
-        def values(self):
-            return [ self[x] for x in self.keys() ]
-        def items(self):
-            return zip(self.keys(), self.values())
-        def __len__(self):
-            return len(self.keys())
-
-    def __init__(self, xml=None):
-        self.pathname = None
-        self.tree = None
-
-        # parse lane parameters out of the config.xml file
-        self.lanes = Gerald.LaneSpecificRunParameters(self)
-
-        self.summary = None
-        self.eland_results = None
-
-        if xml is not None:
-            self.set_elements(xml)
-
-    def _get_date(self):
-        if self.tree is None:
-            return datetime.today()
-        timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
-        epochstamp = time.mktime(time.strptime(timestamp, '%c'))
-        return datetime.fromtimestamp(epochstamp)
-    date = property(_get_date)
-
-    def _get_time(self):
-        return time.mktime(self.date.timetuple())
-    time = property(_get_time, doc='return run time as seconds since epoch')
-
-    def _get_version(self):
-        if self.tree is None:
-            return None
-        return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
-    version = property(_get_version)
-
-    def dump(self):
-        """
-        Debugging function, report current object
-        """
-        print 'Gerald version:', self.version
-        print 'Gerald run date:', self.date
-        print 'Gerald config.xml:', self.tree
-        self.summary.dump()
-
-    def get_elements(self):
-        if self.tree is None or self.summary is None:
-            return None
-
-        gerald = ElementTree.Element(Gerald.GERALD, 
-                                     {'version': unicode(Gerald.XML_VERSION)})
-        gerald.append(self.tree)
-        gerald.append(self.summary.get_elements())
-        if self.eland_results:
-            gerald.append(self.eland_results.get_elements())
-        return gerald
-
-    def set_elements(self, tree):
-        if tree.tag !=  Gerald.GERALD:
-            raise ValueError('exptected GERALD')
-        xml_version = int(tree.attrib.get('version', 0))
-        if xml_version > Gerald.XML_VERSION:
-            logging.warn('XML tree is a higher version than this class')
-        for element in list(tree):
-            tag = element.tag.lower()
-            if tag == Gerald.RUN_PARAMETERS.lower():
-                self.tree = element
-            elif tag == Gerald.SUMMARY.lower():
-                self.summary = Summary(xml=element)
-            elif tag == ELAND.ELAND.lower():
-                self.eland_results = ELAND(xml=element)
-            else:
-                logging.warn("Unrecognized tag %s" % (element.tag,))
-        
-
-def gerald(pathname):
-    g = Gerald()
-    g.pathname = pathname
-    path, name = os.path.split(pathname)
-    config_pathname = os.path.join(pathname, 'config.xml')
-    g.tree = ElementTree.parse(config_pathname).getroot()
-
-    # parse Summary.htm file
-    summary_pathname = os.path.join(pathname, 'Summary.htm')
-    g.summary = Summary(summary_pathname)
-    # parse eland files
-    g.eland_results = eland(g.pathname, g)
-    return g
-
-def tonumber(v):
-    """
-    Convert a value to int if its an int otherwise a float.
-    """
-    try:
-        v = int(v)
-    except ValueError, e:
-        v = float(v)
-    return v
-
-def parse_mean_range(value):
-    """
-    Parse values like 123 +/- 4.5
-    """
-    if value.strip() == 'unknown':
-       return 0, 0
-
-    average, pm, deviation = value.split()
-    if pm != '+/-':
-        raise RuntimeError("Summary.htm file format changed")
-    return tonumber(average), tonumber(deviation)
-
-def make_mean_range_element(parent, name, mean, deviation):
-    """
-    Make an ElementTree subelement <Name mean='mean', deviation='deviation'/>
-    """
-    element = ElementTree.SubElement(parent, name,
-                                     { 'mean': unicode(mean),
-                                       'deviation': unicode(deviation)})
-    return element
-
-def parse_mean_range_element(element):
-    """
-    Grab mean/deviation out of element
-    """
-    return (tonumber(element.attrib['mean']), 
-            tonumber(element.attrib['deviation']))
-
-def parse_summary_element(element):
-    """
-    Determine if we have a simple element or a mean/deviation element
-    """
-    if len(element.attrib) > 0:
-        return parse_mean_range_element(element)
-    else:
-        return element.text
-
-class Summary(object):
-    """
-    Extract some useful information from the Summary.htm file
-    """
-    XML_VERSION = 2
-    SUMMARY = 'Summary'
-
-    class LaneResultSummary(object):
-        """
-        Parse the LaneResultSummary table out of Summary.htm
-        Mostly for the cluster number
-        """
-        LANE_RESULT_SUMMARY = 'LaneResultSummary'
-        TAGS = { 
-          'LaneYield': 'lane_yield',
-          'Cluster': 'cluster', # Raw
-          'ClusterPF': 'cluster_pass_filter',
-          'AverageFirstCycleIntensity': 'average_first_cycle_intensity',
-          'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles',
-          'PercentPassFilterClusters': 'percent_pass_filter_clusters',
-          'PercentPassFilterAlign': 'percent_pass_filter_align',
-          'AverageAlignmentScore': 'average_alignment_score',
-          'PercentErrorRate': 'percent_error_rate'
-        }
-                 
-        def __init__(self, html=None, xml=None):
-            self.lane = None
-            self.lane_yield = None
-            self.cluster = None
-            self.cluster_pass_filter = None
-            self.average_first_cycle_intensity = None
-            self.percent_intensity_after_20_cycles = None
-            self.percent_pass_filter_clusters = None
-            self.percent_pass_filter_align = None
-            self.average_alignment_score = None
-            self.percent_error_rate = None
-
-            if html is not None:
-                self.set_elements_from_html(html)
-            if xml is not None:
-                self.set_elements(xml)
-
-        def set_elements_from_html(self, data):
-            if not len(data) in (8,10):
-                raise RuntimeError("Summary.htm file format changed")
-
-            # same in pre-0.3.0 Summary file and 0.3 summary file
-            self.lane = data[0]
-
-            if len(data) == 8:
-                parsed_data = [ parse_mean_range(x) for x in data[1:] ]
-                # this is the < 0.3 Pipeline version
-                self.cluster = parsed_data[0]
-                self.average_first_cycle_intensity = parsed_data[1]
-                self.percent_intensity_after_20_cycles = parsed_data[2]
-                self.percent_pass_filter_clusters = parsed_data[3]
-                self.percent_pass_filter_align = parsed_data[4]
-                self.average_alignment_score = parsed_data[5]
-                self.percent_error_rate = parsed_data[6]
-            elif len(data) == 10:
-                parsed_data = [ parse_mean_range(x) for x in data[2:] ]
-                # this is the >= 0.3 summary file
-                self.lane_yield = data[1]
-                self.cluster = parsed_data[0]
-                self.cluster_pass_filter = parsed_data[1]
-                self.average_first_cycle_intensity = parsed_data[2]
-                self.percent_intensity_after_20_cycles = parsed_data[3]
-                self.percent_pass_filter_clusters = parsed_data[4]
-                self.percent_pass_filter_align = parsed_data[5]
-                self.average_alignment_score = parsed_data[6]
-                self.percent_error_rate = parsed_data[7]
-
-        def get_elements(self):
-            lane_result = ElementTree.Element(
-                            Summary.LaneResultSummary.LANE_RESULT_SUMMARY, 
-                            {'lane': self.lane})
-            for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
-                value = getattr(self, variable_name)
-                if value is None:
-                    continue
-                # it looks like a sequence
-                elif type(value) in (types.TupleType, types.ListType):
-                    element = make_mean_range_element(
-                      lane_result,
-                      tag,
-                      *value
-                    )
-                else:
-                    element = ElementTree.SubElement(lane_result, tag)
-                    element.text = value
-            return lane_result
-
-        def set_elements(self, tree):
-            if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
-                raise ValueError('Expected %s' % (
-                        Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
-            self.lane = tree.attrib['lane']
-            tags = Summary.LaneResultSummary.TAGS
-            for element in list(tree):
-                try:
-                    variable_name = tags[element.tag]
-                    setattr(self, variable_name, 
-                            parse_summary_element(element))
-                except KeyError, e:
-                    logging.warn('Unrecognized tag %s' % (element.tag,))
-
-    def __init__(self, filename=None, xml=None):
-        self.lane_results = {}
-
-        if filename is not None:
-            self._extract_lane_results(filename)
-        if xml is not None:
-            self.set_elements(xml)
-
-    def __getitem__(self, key):
-        return self.lane_results[key]
-
-    def __len__(self):
-        return len(self.lane_results)
-
-    def keys(self):
-        return self.lane_results.keys()
-
-    def values(self):
-        return self.lane_results.values()
-
-    def items(self):
-        return self.lane_results.items()
-
-    def _flattened_row(self, row):
-        """
-        flatten the children of a <tr>...</tr>
-        """
-        return [flatten(x) for x in row.getchildren() ]
-    
-    def _parse_table(self, table):
-        """
-        assumes the first line is the header of a table, 
-        and that the remaining rows are data
-        """
-        rows = table.getchildren()
-        data = []
-        for r in rows:
-            data.append(self._flattened_row(r))
-        return data
-    
-    def _extract_named_tables(self, pathname):
-        """
-        extract all the 'named' tables from a Summary.htm file
-        and return as a dictionary
-        
-        Named tables are <h2>...</h2><table>...</table> pairs
-        The contents of the h2 tag is considered to the name
-        of the table.
-        """
-        tree = ElementTree.parse(pathname).getroot()
-        body = tree.find('body')
-        tables = {}
-        for i in range(len(body)):
-            if body[i].tag == 'h2' and body[i+1].tag == 'table':
-                # we have an interesting table
-                name = flatten(body[i])
-                table = body[i+1]
-                data = self._parse_table(table)
-                tables[name] = data
-        return tables
-
-    def _extract_lane_results(self, pathname):
-        """
-        extract the Lane Results Summary table
-        """
-
-        tables = self._extract_named_tables(pathname)
-
-        # parse lane result summary
-        lane_summary = tables['Lane Results Summary']
-        # this is version 1 of the summary file
-        if len(lane_summary[-1]) == 8:
-            # strip header
-            headers = lane_summary[0]
-            # grab the lane by lane data
-            lane_summary = lane_summary[1:]
-
-        # this is version 2 of the summary file
-        if len(lane_summary[-1]) == 10:
-            # lane_summary[0] is a different less specific header row
-            headers = lane_summary[1]
-            lane_summary = lane_summary[2:10]
-            # after the last lane, there's a set of chip wide averages
-
-        for r in lane_summary:
-            lrs = Summary.LaneResultSummary(html=r)
-            self.lane_results[lrs.lane] = lrs
-
-    def get_elements(self):
-        summary = ElementTree.Element(Summary.SUMMARY, 
-                                      {'version': unicode(Summary.XML_VERSION)})
-        for lane in self.lane_results.values():
-            summary.append(lane.get_elements())
-        return summary
-
-    def set_elements(self, tree):
-        if tree.tag != Summary.SUMMARY:
-            return ValueError("Expected %s" % (Summary.SUMMARY,))
-        xml_version = int(tree.attrib.get('version', 0))
-        if xml_version > Summary.XML_VERSION:
-            logging.warn('Summary XML tree is a higher version than this class')
-        for element in list(tree):
-            lrs = Summary.LaneResultSummary()
-            lrs.set_elements(element)
-            self.lane_results[lrs.lane] = lrs
-
-    def dump(self):
-        """
-        Debugging function, report current object
-        """
-        pass
-
-
-def build_genome_fasta_map(genome_dir):
-    # build fasta to fasta file map
-    genome = genome_dir.split(os.path.sep)[-1]
-    fasta_map = {}
-    for vld_file in glob(os.path.join(genome_dir, '*.vld')):
-        is_link = False
-        if os.path.islink(vld_file):
-            is_link = True
-        vld_file = os.path.realpath(vld_file)
-        path, vld_name = os.path.split(vld_file)
-        name, ext = os.path.splitext(vld_name)
-        if is_link:
-            fasta_map[name] = name
-        else:
-            fasta_map[name] = os.path.join(genome, name)
-    return fasta_map
-    
-class ElandLane(object):
-    """
-    Process an eland result file
-    """
-    XML_VERSION = 1
-    LANE = 'ElandLane'
-    SAMPLE_NAME = 'SampleName'
-    LANE_ID = 'LaneID'
-    GENOME_MAP = 'GenomeMap'
-    GENOME_ITEM = 'GenomeItem'
-    MAPPED_READS = 'MappedReads'
-    MAPPED_ITEM = 'MappedItem'
-    MATCH_CODES = 'MatchCodes'
-    MATCH_ITEM = 'Code'
-    READS = 'Reads'
-
-    def __init__(self, pathname=None, genome_map=None, xml=None):
-        self.pathname = pathname
-        self._sample_name = None
-        self._lane_id = None
-        self._reads = None
-        self._mapped_reads = None
-        self._match_codes = None
-        if genome_map is None:
-            genome_map = {}
-        self.genome_map = genome_map
-        
-        if xml is not None:
-            self.set_elements(xml)
-
-    def _update(self):
-        """
-        Actually read the file and actually count the reads
-        """
-        # can't do anything if we don't have a file to process
-        if self.pathname is None:
-            return
-
-        if os.stat(self.pathname)[stat.ST_SIZE] == 0:
-            raise RuntimeError("Eland isn't done, try again later.")
-
-        reads = 0
-        mapped_reads = {}
-
-        match_codes = {'NM':0, 'QC':0, 'RM':0, 
-                       'U0':0, 'U1':0, 'U2':0,
-                       'R0':0, 'R1':0, 'R2':0,
-                      }
-        for line in autoopen(self.pathname,'r'):
-            reads += 1
-            fields = line.split()
-            # code = fields[2]
-            # match_codes[code] = match_codes.setdefault(code, 0) + 1
-            # the QC/NM etc codes are in the 3rd field and always present
-            match_codes[fields[2]] += 1
-            # ignore lines that don't have a fasta filename
-            if len(fields) < 7:
-                continue
-            fasta = self.genome_map.get(fields[6], fields[6])
-            mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
-        self._match_codes = match_codes
-        self._mapped_reads = mapped_reads
-        self._reads = reads
-
-    def _update_name(self):
-        # extract the sample name
-        if self.pathname is None:
-            return
-
-        path, name = os.path.split(self.pathname)
-        split_name = name.split('_')
-        self._sample_name = split_name[0]
-        self._lane_id = split_name[1]
-
-    def _get_sample_name(self):
-        if self._sample_name is None:
-            self._update_name()
-        return self._sample_name
-    sample_name = property(_get_sample_name)
-
-    def _get_lane_id(self):
-        if self._lane_id is None:
-            self._update_name()
-        return self._lane_id
-    lane_id = property(_get_lane_id)
-
-    def _get_reads(self):
-        if self._reads is None:
-            self._update()
-        return self._reads
-    reads = property(_get_reads)
-
-    def _get_mapped_reads(self):
-        if self._mapped_reads is None:
-            self._update()
-        return self._mapped_reads
-    mapped_reads = property(_get_mapped_reads)
-
-    def _get_match_codes(self):
-        if self._match_codes is None:
-            self._update()
-        return self._match_codes
-    match_codes = property(_get_match_codes)
-
-    def get_elements(self):
-        lane = ElementTree.Element(ElandLane.LANE, 
-                                   {'version': 
-                                    unicode(ElandLane.XML_VERSION)})
-        sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
-        sample_tag.text = self.sample_name
-        lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
-        lane_tag.text = self.lane_id
-        genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
-        for k, v in self.genome_map.items():
-            item = ElementTree.SubElement(
-                genome_map, ElandLane.GENOME_ITEM, 
-                {'name':k, 'value':unicode(v)})
-        mapped_reads = ElementTree.SubElement(lane, ElandLane.MAPPED_READS)
-        for k, v in self.mapped_reads.items():
-            item = ElementTree.SubElement(
-                mapped_reads, ElandLane.MAPPED_ITEM, 
-                {'name':k, 'value':unicode(v)})
-        match_codes = ElementTree.SubElement(lane, ElandLane.MATCH_CODES)
-        for k, v in self.match_codes.items():
-            item = ElementTree.SubElement(
-                match_codes, ElandLane.MATCH_ITEM, 
-                {'name':k, 'value':unicode(v)})
-        reads = ElementTree.SubElement(lane, ElandLane.READS)
-        reads.text = unicode(self.reads)
-
-        return lane
-
-    def set_elements(self, tree):
-        if tree.tag != ElandLane.LANE:
-            raise ValueError('Exptecting %s' % (ElandLane.LANE,))
-
-        # reset dictionaries
-        self._mapped_reads = {}
-        self._match_codes = {}
-        
-        for element in tree:
-            tag = element.tag.lower()
-            if tag == ElandLane.SAMPLE_NAME.lower():
-                self._sample_name = element.text
-            elif tag == ElandLane.LANE_ID.lower():
-                self._lane_id = element.text
-            elif tag == ElandLane.GENOME_MAP.lower():
-                for child in element:
-                    name = child.attrib['name']
-                    value = child.attrib['value']
-                    self.genome_map[name] = value
-            elif tag == ElandLane.MAPPED_READS.lower():
-                for child in element:
-                    name = child.attrib['name']
-                    value = child.attrib['value']
-                    self._mapped_reads[name] = int(value)
-            elif tag == ElandLane.MATCH_CODES.lower():
-                for child in element:
-                    name = child.attrib['name']
-                    value = int(child.attrib['value'])
-                    self._match_codes[name] = value
-            elif tag == ElandLane.READS.lower():
-                self._reads = int(element.text)
-            else:
-                logging.warn("ElandLane unrecognized tag %s" % (element.tag,))
-
-def extract_eland_sequence(instream, outstream, start, end):
-    """
-    Extract a chunk of sequence out of an eland file
-    """
-    for line in instream:
-        record = line.split()
-        if len(record) > 1:
-            result = [record[0], record[1][start:end]]
-        else:
-            result = [record[0][start:end]]
-        outstream.write("\t".join(result))
-        outstream.write(os.linesep)
-
-class ELAND(object):
-    """
-    Summarize information from eland files
-    """
-    XML_VERSION = 1
-
-    ELAND = 'ElandCollection'
-    LANE = 'Lane'
-    LANE_ID = 'id'
-
-    def __init__(self, xml=None):
-        # we need information from the gerald config.xml
-        self.results = {}
-        
-        if xml is not None:
-            self.set_elements(xml)
-
-    def __len__(self):
-        return len(self.results)
-
-    def keys(self):
-        return self.results.keys()
-    
-    def values(self):
-        return self.results.values()
-
-    def items(self):
-        return self.results.items()
-
-    def __getitem__(self, key):
-        return self.results[key]
-
-    def get_elements(self):
-        root = ElementTree.Element(ELAND.ELAND, 
-                                   {'version': unicode(ELAND.XML_VERSION)})
-        for lane_id, lane in self.results.items():
-            eland_lane = lane.get_elements()
-            eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
-            root.append(eland_lane)
-        return root
-
-    def set_elements(self, tree):
-        if tree.tag.lower() != ELAND.ELAND.lower():
-            raise ValueError('Expecting %s', ELAND.ELAND)
-        for element in list(tree):
-            lane_id = element.attrib[ELAND.LANE_ID]
-            lane = ElandLane(xml=element)
-            self.results[lane_id] = lane
-
-def eland(basedir, gerald=None, genome_maps=None):
-    e = ELAND()
-
-    file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
-    if len(file_list) == 0:
-        # lets handle compressed eland files too
-        file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
-
-    for pathname in file_list:
-        # yes the lane_id is also being computed in ElandLane._update
-        # I didn't want to clutter up my constructor
-        # but I needed to persist the sample_name/lane_id for
-        # runfolder summary_report
-        path, name = os.path.split(pathname)
-        split_name = name.split('_')
-        lane_id = split_name[1]
-
-        if genome_maps is not None:
-            genome_map = genome_maps[lane_id]
-        elif gerald is not None:
-            genome_dir = gerald.lanes[lane_id].eland_genome
-            genome_map = build_genome_fasta_map(genome_dir)
-        else:
-            genome_map = {}
-
-        eland_result = ElandLane(pathname, genome_map)
-        e.results[lane_id] = eland_result
-    return e
diff --git a/gaworkflow/pipeline/recipe_parser.py b/gaworkflow/pipeline/recipe_parser.py
deleted file mode 100644 (file)
index 7f5ced6..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-from xml import sax
-
-
-def get_cycles(recipe_xml_filepath):
-  """
-  returns the number of cycles found in Recipe*.xml
-  """
-  handler = CycleXmlHandler()
-  sax.parse(recipe_xml_filepath, handler)
-  return handler.cycle_count
-
-
-
-class CycleXmlHandler(sax.ContentHandler):
-
-  def __init__(self):
-    self.cycle_count = 0
-    self.in_protocol = False
-    sax.ContentHandler.__init__(self)
-
-
-  def startDocument(self):
-    self.cycle_count = 0
-    self.in_protocol = False
-
-
-  def startElement(self, name, attrs):
-
-    #Only count Incorporations as cycles if within
-    # the protocol section of the xml document.
-    if name == "Incorporation" and self.in_protocol:
-      #print 'Found a cycle!'
-      self.cycle_count += 1
-      return
-    
-    elif name == 'Protocol':
-      #print 'In protocol'
-      self.in_protocol = True
-      return
-
-    #print 'Skipping: %s' % (name)
-    
-
-  def endElement(self, name):
-    
-    if name == 'Protocol':
-      #print 'End protocol'
-      self.in_protocol = False
diff --git a/gaworkflow/pipeline/retrieve_config.py b/gaworkflow/pipeline/retrieve_config.py
deleted file mode 100644 (file)
index 72cff17..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env python
-
-from optparse import OptionParser, IndentedHelpFormatter
-from ConfigParser import SafeConfigParser
-
-import logging
-import os
-import sys
-import urllib2
-
-CONFIG_SYSTEM = '/etc/ga_frontend/ga_frontend.conf'
-CONFIG_USER = os.path.expanduser('~/.ga_frontend.conf')
-
-#Disable or enable commandline arg parsing; disabled by default.
-DISABLE_CMDLINE = True
-
-class FlowCellNotFound(Exception): pass
-class WebError404(Exception): pass
-
-class DummyOptions:
-  """
-  Used when command line parsing is disabled; default
-  """
-  def __init__(self):
-    self.url = None
-    self.output_filepath = None
-    self.flowcell = None
-    self.genome_dir = None
-
-class PreformattedDescriptionFormatter(IndentedHelpFormatter):
-  
-  #def format_description(self, description):
-  #  
-  #  if description:
-  #      return description + "\n"
-  #  else:
-  #     return ""
-      
-  def format_epilog(self, epilog):
-    """
-    It was removing my preformated epilog, so this should override
-    that behavior! Muhahaha!
-    """
-    if epilog:
-        return "\n" + epilog + "\n"
-    else:
-        return ""
-
-
-def constructOptionParser():
-  """
-  returns a pre-setup optparser
-  """
-  global DISABLE_CMDLINE
-  
-  if DISABLE_CMDLINE:
-    return None
-  
-  parser = OptionParser(formatter=PreformattedDescriptionFormatter())
-
-  parser.set_description('Retrieves eland config file from ga_frontend web frontend.')
-  
-  parser.epilog = """
-Config File:
-  * %s (System wide)
-  * %s (User specific; overrides system)
-  * command line overrides all config file options
-  
-  Example Config File:
-  
-    [config_file_server]
-    base_host_url=http://somewhere.domain:port
-""" % (CONFIG_SYSTEM, CONFIG_USER)
-  
-  #Special formatter for allowing preformatted description.
-  ##parser.format_epilog(PreformattedDescriptionFormatter())
-
-  parser.add_option("-u", "--url",
-                    action="store", type="string", dest="url")
-  
-  parser.add_option("-o", "--output",
-                    action="store", type="string", dest="output_filepath")
-  
-  parser.add_option("-f", "--flowcell",
-                    action="store", type="string", dest="flowcell")
-
-  parser.add_option("-g", "--genome_dir",
-                    action="store", type="string", dest="genome_dir")
-  
-  #parser.set_default("url", "default")
-  
-  return parser
-
-def constructConfigParser():
-  """
-  returns a pre-setup config parser
-  """
-  parser = SafeConfigParser()
-  parser.read([CONFIG_SYSTEM, CONFIG_USER])
-  if not parser.has_section('config_file_server'):
-    parser.add_section('config_file_server')
-  if not parser.has_section('local_setup'):
-    parser.add_section('local_setup')
-  
-  return parser
-
-
-def getCombinedOptions():
-  """
-  Returns optparse options after it has be updated with ConfigParser
-  config files and merged with parsed commandline options.
-  """
-  cl_parser = constructOptionParser()
-  conf_parser = constructConfigParser()
-  
-  if cl_parser is None:
-    options = DummyOptions()
-  else:
-    options, args = cl_parser.parse_args()
-  
-  if options.url is None:
-    if conf_parser.has_option('config_file_server', 'base_host_url'):
-      options.url = conf_parser.get('config_file_server', 'base_host_url')
-
-  if options.genome_dir is None:
-    if conf_parser.has_option('local_setup', 'genome_dir'):
-      options.genome_dir = conf_parser.get('local_setup', 'genome_dir')
-  
-  print 'USING OPTIONS:'
-  print ' URL:', options.url
-  print ' OUT:', options.output_filepath
-  print '  FC:', options.flowcell
-  print 'GDIR:', options.genome_dir
-  print ''
-  
-  return options
-
-
-def saveConfigFile(flowcell, base_host_url, output_filepath):
-  """
-  retrieves the flowcell eland config file, give the base_host_url
-  (i.e. http://sub.domain.edu:port)
-  """
-  url = base_host_url + '/eland_config/%s/' % (flowcell)
-  
-  f = open(output_filepath, 'w')
-  #try:
-  try:
-    web = urllib2.urlopen(url)
-  except urllib2.URLError, e:
-    errmsg = 'URLError: %d' % (e.code,)
-    logging.error(errmsg)
-    logging.error('opened %s' % (url,))
-    logging.error('%s' % ( e.read(),))
-    raise IOError(errmsg)
-
-  #except IOError, msg:
-  #  if str(msg).find("Connection refused") >= 0:
-  #    print 'Error: Connection refused for: %s' % (url)
-  #    f.close()
-  #    sys.exit(1)
-  #  elif str(msg).find("Name or service not known") >= 0:
-  #    print 'Error: Invalid domain or ip address for: %s' % (url)
-  #    f.close()
-  #    sys.exit(2)
-  #  else:
-  #    raise IOError, msg
-
-  data = web.read()
-
-  if data.find('Hmm, config file for') >= 0:
-    msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url)
-    raise FlowCellNotFound, msg
-
-  if data.find('404 - Not Found') >= 0:
-    msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
-          "Did you get right port #?" % (flowcell, base_host_url, url)
-    raise FlowCellNotFound, msg
-  
-  f.write(data)
-  web.close()
-  f.close()
-  logging.info('Wrote config file to %s' % (output_filepath,))
-
-  
diff --git a/gaworkflow/pipeline/run_status.py b/gaworkflow/pipeline/run_status.py
deleted file mode 100644 (file)
index 39dc54c..0000000
+++ /dev/null
@@ -1,478 +0,0 @@
-import glob
-import re
-import os
-import sys
-import time
-import threading
-
-s_comment = re.compile('^#')
-s_general_read_len = re.compile('^READ_LENGTH ')
-s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
-
-s_firecrest = None
-
-def _four_digit_num_in_string(num):
-  if num < 0:
-    pass
-  elif num < 10:
-    return '000' + str(num)
-  elif num < 100:
-    return '00' + str(num)
-  elif num < 1000:
-    return '0' + str(num)
-  elif num < 10000:
-    return str(num)
-
-  msg = 'Invalid number: %s' % (num)
-  raise ValueError, msg
-
-def _two_digit_num_in_string(num):
-  if num < 0:
-    pass
-  elif num < 10:
-    return '0' + str(num)
-  elif num < 100:
-    return str(num)
-
-  msg = 'Invalid number: %s' % (num)
-  raise ValueError, msg
-
-
-# FIRECREST PATTERNS
-# _p2f(<pattern>, lane, tile, cycle)
-PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
-
-# _p2f(<pattern>, lane, tile)
-PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
-PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
-PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
-PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
-PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
-PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
-PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
-PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
-
-
-# BUSTARD PATTERNS
-# _p2f(<pattern>, lane, tile)
-PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
-PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
-
-
-
-# GERALD PATTERNS
-# _p2f(<pattern>, lane, tile)
-PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
-PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
-PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
-PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
-PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
-PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
-PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
-PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
-PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
-PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
-PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
-PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
-PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
-
-# _p2f(<pattern>, lane)
-PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
-PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
-PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
-PATTERN_GERALD_CALLPNG = 's_%s_call.png'
-PATTERN_GERALD_ALLPNG = 's_%s_all.png'
-PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
-PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
-PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
-PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
-PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
-PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
-PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
-PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
-PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
-PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
-
-
-
-def _p2f(pattern, lane, tile=None, cycle=None):
-  """
-  Converts a pattern plus info into file names
-  """
-
-  # lane, and cycle provided (INVALID)
-  if tile is None and cycle is not None:
-    msg = "Handling of cycle without tile is not currently implemented."
-    raise ValueError, msg
-
-  # lane, tile, cycle provided
-  elif cycle:
-    return pattern % (lane,
-                      _four_digit_num_in_string(tile),
-                      _two_digit_num_in_string(cycle))
-  
-  # lane, tile provided
-  elif tile:
-    return pattern % (lane, _four_digit_num_in_string(tile))
-
-  # lane provided
-  else:
-    return pattern % (lane)
-    
-
-class GARunStatus(object):
-
-  def __init__(self, conf_filepath):
-    """
-    Given an eland config file in the top level directory
-    of a run, predicts the files that will be generated
-    during a run and provides methods for retrieving
-    (completed, total) for each step or entire run.
-    """
-    #print 'self._conf_filepath = %s' % (conf_filepath)
-    self._conf_filepath = conf_filepath
-    self._base_dir, junk = os.path.split(conf_filepath)
-    self._image_dir = os.path.join(self._base_dir, 'Images')
-    
-    self.lanes = []
-    self.lane_read_length = {}
-    self.tiles = None
-    self.cycles = None
-    
-    self.status = {}
-    self.status['firecrest'] = {}
-    self.status['bustard'] = {}
-    self.status['gerald'] = {}
-    
-    self._process_config()
-    self._count_tiles()
-    self._count_cycles()
-    self._generate_expected()
-
-
-  def _process_config(self):
-    """
-    Grabs info from self._conf_filepath
-    """
-    f = open(self._conf_filepath, 'r')
-
-    for line in f:
-
-      #Skip comment lines for now.
-      if s_comment.search(line):
-        continue
-
-      mo =  s_general_read_len.search(line)
-      if mo:
-        read_length = int(line[mo.end():])
-        #Handle general READ_LENGTH
-        for i in range(1,9):
-          self.lane_read_length[i] = read_length
-      
-      mo = s_read_len.search(line)
-      if mo:
-        read_length = int(line[mo.end():])
-        lanes, junk = line.split(':')
-
-        #Convert lanes from string of lanes to list of lane #s.
-        lanes = [ int(i) for i in lanes ]
-
-        
-        for lane in lanes:
-
-          #Keep track of which lanes are being run.
-          if lane not in self.lanes:
-            self.lanes.append(lane)
-
-          #Update with lane specific read lengths
-          self.lane_read_length[lane] = read_length
-
-        self.lanes.sort()
-
-
-  def _count_tiles(self):
-    """
-    Count the number of tiles being used
-    """
-    self.tiles = len(glob.glob(os.path.join(self._image_dir,
-                                            'L001',
-                                            'C1.1',
-                                            's_1_*_a.tif')))
-
-  def _count_cycles(self):
-    """
-    Figures out the number of cycles that are available
-    """
-    #print 'self._image_dir = %s' % (self._image_dir)
-    cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
-    #print 'cycle_dirs = %s' % (cycle_dirs)
-    cycle_list = []
-    for cycle_dir in cycle_dirs:
-      junk, c = os.path.split(cycle_dir)
-      cycle_list.append(int(c[1:c.find('.')]))
-
-    self.cycles = max(cycle_list)
-    
-
-
-
-  def _generate_expected(self):
-    """
-    generates a list of files we expect to find.
-    """
-
-    firecrest = self.status['firecrest']
-    bustard = self.status['bustard']
-    gerald = self.status['gerald']
-    
-    
-    for lane in self.lanes:
-      for tile in range(1,self.tiles+1):
-        for cycle in range(1, self.cycles+1):
-
-          ##########################
-          # LANE, TILE, CYCLE LAYER
-
-          # FIRECREST
-          firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
-
-
-        ###################
-        # LANE, TILE LAYER
-
-        # FIRECREST
-        firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
-        firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
-
-
-        # BUSTARD
-        bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
-        bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
-
-
-        # GERALD
-        #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
-        gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
-        #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
-
-      ###################
-      # LANE LAYER
-
-      # GERALD
-      #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
-      #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
-      gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
-      
-      
-
-    #################
-    # LOOPS FINISHED
-
-    # FIRECREST
-    firecrest['offsets_finished.txt'] = False
-    firecrest['finished.txt'] = False
-
-    # BUSTARD
-    bustard['finished.txt'] = False
-
-    # GERALD
-    gerald['tiles.txt'] = False
-    gerald['FullAll.htm'] = False
-    #gerald['All.htm.tmp'] = False
-    #gerald['Signal_Means.txt.tmp'] = False
-    #gerald['plotIntensity_for_IVC'] = False
-    #gerald['IVC.htm.tmp'] = False
-    gerald['FullError.htm'] = False
-    gerald['FullPerfect.htm'] = False
-    #gerald['Error.htm.tmp'] = False
-    #gerald['Perfect.htm.tmp'] = False
-    #gerald['Summary.htm.tmp'] = False
-    #gerald['Tile.htm.tmp'] = False
-    gerald['finished.txt'] = False
-    
-  def statusFirecrest(self):
-    """
-    returns (<completed>, <total>)
-    """
-    firecrest = self.status['firecrest']
-    total = len(firecrest)
-    completed = firecrest.values().count(True)
-
-    return (completed, total)
-
-
-  def statusBustard(self):
-    """
-    returns (<completed>, <total>)
-    """
-    bustard = self.status['bustard']
-    total = len(bustard)
-    completed = bustard.values().count(True)
-
-    return (completed, total)
-
-
-  def statusGerald(self):
-    """
-    returns (<completed>, <total>)
-    """
-    gerald = self.status['gerald']
-    total = len(gerald)
-    completed = gerald.values().count(True)
-
-    return (completed, total)
-
-
-  def statusTotal(self):
-    """
-    returns (<completed>, <total>)
-    """
-    #f = firecrest  c = completed
-    #b = bustard    t = total
-    #g = gerald
-    fc, ft = self.statusFirecrest()
-    bc, bt = self.statusBustard()
-    gc, gt = self.statusGerald()
-
-    return (fc+bc+gc, ft+bt+gt)
-
-
-  def statusReport(self):
-    """
-    Generate the basic percent complete report
-    """
-    def _percentCompleted(completed, total):
-      """
-      Returns precent completed as float
-      """
-      return (completed / float(total)) * 100
-
-    fc, ft = self.statusFirecrest()
-    bc, bt = self.statusBustard()
-    gc, gt = self.statusGerald()
-    tc, tt = self.statusTotal()
-    
-    fp = _percentCompleted(fc, ft)
-    bp = _percentCompleted(bc, bt)
-    gp = _percentCompleted(gc, gt)
-    tp = _percentCompleted(tc, tt)
-    
-    report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
-              '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
-              '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
-              '-----------------------',
-              '    Total: %s%% (%s/%s)' % (tp, tc, tt),
-             ]
-    return report
-
-  def updateFirecrest(self, filename):
-    """
-    Marks firecrest filename as being completed.
-    """
-    self.status['firecrest'][filename] = True
-    
-
-  def updateBustard(self, filename):
-    """
-    Marks bustard filename as being completed.
-    """
-    self.status['bustard'][filename] = True
-
-
-  def updateGerald(self, filename):
-    """
-    Marks gerald filename as being completed.
-    """
-    self.status['gerald'][filename] = True
-
-
-
-##################################################
-# Functions to be called by Thread(target=<func>)
-def _cmdLineStatusMonitorFunc(conf_info):
-  """
-  Given a ConfigInfo object, provides status to stdout.
-
-  You should probably use startCmdLineStatusMonitor()
-  instead of ths function.
-
-  Use with:
-    t = threading.Thread(target=_cmdLineStatusMonitorFunc,
-                         args=[conf_info])
-    t.setDaemon(True)
-    t.start()
-  """
-  SLEEP_AMOUNT = 30
-
-  while 1:
-    if conf_info.status is None:
-      print "No status object yet."
-      time.sleep(SLEEP_AMOUNT)
-      continue
-
-    report = conf_info.status.statusReport()
-    print os.linesep.join(report)
-    print
-
-    time.sleep(SLEEP_AMOUNT)
-
-
-#############################################
-# Start monitor thread convenience functions
-def startCmdLineStatusMonitor(conf_info):
-  """
-  Starts a command line status monitor given a conf_info object.
-  """
-  t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
-  t.setDaemon(True)
-  t.start()
-
-from optparse import OptionParser
-def make_parser():
-  usage = "%prog: config file"
-
-  parser = OptionParser()
-  return parser
-  
-def main(cmdline=None):
-  parser = make_parser()
-  opt, args = parser.parse_args(cmdline)
-
-  if len(args) != 1:
-    parser.error("need name of configuration file")
-    
-  status = GARunStatus(args[0])
-  print os.linesep.join(status.statusReport())
-  return 0
-
-if __name__ == "__main__":
-  sys.exit(main(sys.argv[1:]))
-                   
diff --git a/gaworkflow/pipeline/runfolder.py b/gaworkflow/pipeline/runfolder.py
deleted file mode 100644 (file)
index 65f6191..0000000
+++ /dev/null
@@ -1,313 +0,0 @@
-"""
-Core information needed to inspect a runfolder.
-"""
-from glob import glob
-import logging
-import os
-import re
-import shutil
-import stat
-import subprocess
-import sys
-import time
-
-try:
-  from xml.etree import ElementTree
-except ImportError, e:
-  from elementtree import ElementTree
-
-EUROPEAN_STRPTIME = "%d-%m-%Y"
-EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
-VERSION_RE = "([0-9\.]+)"
-USER_RE = "([a-zA-Z0-9]+)"
-LANES_PER_FLOWCELL = 8
-
-from gaworkflow.util.alphanum import alphanum
-from gaworkflow.util.ethelp import indent, flatten
-
-
-class PipelineRun(object):
-    """
-    Capture "interesting" information about a pipeline run
-    """
-    XML_VERSION = 1
-    PIPELINE_RUN = 'PipelineRun'
-    FLOWCELL_ID = 'FlowcellID'
-
-    def __init__(self, pathname=None, firecrest=None, bustard=None, gerald=None, xml=None):
-        if pathname is not None:
-          self.pathname = os.path.normpath(pathname)
-        else:
-          self.pathname = None
-        self._name = None
-        self._flowcell_id = None
-        self.firecrest = firecrest
-        self.bustard = bustard
-        self.gerald = gerald
-
-        if xml is not None:
-          self.set_elements(xml)
-    
-    def _get_flowcell_id(self):
-        # extract flowcell ID
-        if self._flowcell_id is None:
-          config_dir = os.path.join(self.pathname, 'Config')
-          flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
-         if os.path.exists(flowcell_id_path):
-            flowcell_id_tree = ElementTree.parse(flowcell_id_path)
-            self._flowcell_id = flowcell_id_tree.findtext('Text')
-         else:
-            path_fields = self.pathname.split('_')
-            if len(path_fields) > 0:
-              # guessing last element of filename
-              flowcell_id = path_fields[-1]
-            else:
-              flowcell_id = 'unknown'
-              
-           logging.warning(
-             "Flowcell id was not found, guessing %s" % (
-                flowcell_id))
-           self._flowcell_id = flowcell_id
-        return self._flowcell_id
-    flowcell_id = property(_get_flowcell_id)
-
-    def get_elements(self):
-        """
-        make one master xml file from all of our sub-components.
-        """
-        root = ElementTree.Element(PipelineRun.PIPELINE_RUN)
-        flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID)
-        flowcell.text = self.flowcell_id
-        root.append(self.firecrest.get_elements())
-        root.append(self.bustard.get_elements())
-        root.append(self.gerald.get_elements())
-        return root
-
-    def set_elements(self, tree):
-        # this file gets imported by all the others,
-        # so we need to hide the imports to avoid a cyclic imports
-        from gaworkflow.pipeline import firecrest
-        from gaworkflow.pipeline import bustard
-        from gaworkflow.pipeline import gerald
-
-        tag = tree.tag.lower()
-        if tag != PipelineRun.PIPELINE_RUN.lower():
-          raise ValueError('Pipeline Run Expecting %s got %s' % (
-              PipelineRun.PIPELINE_RUN, tag))
-        for element in tree:
-          tag = element.tag.lower()
-          if tag == PipelineRun.FLOWCELL_ID.lower():
-            self._flowcell_id = element.text
-          #ok the xword.Xword.XWORD pattern for module.class.constant is lame
-          elif tag == firecrest.Firecrest.FIRECREST.lower():
-            self.firecrest = firecrest.Firecrest(xml=element)
-          elif tag == bustard.Bustard.BUSTARD.lower():
-            self.bustard = bustard.Bustard(xml=element)
-          elif tag == gerald.Gerald.GERALD.lower():
-            self.gerald = gerald.Gerald(xml=element)
-          else:
-            logging.warn('PipelineRun unrecognized tag %s' % (tag,))
-
-    def _get_run_name(self):
-        """
-        Given a run tuple, find the latest date and use that as our name
-        """
-        if self._name is None:
-          tmax = max(self.firecrest.time, self.bustard.time, self.gerald.time)
-          timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
-          self._name = 'run_'+self.flowcell_id+"_"+timestamp+'.xml'
-        return self._name
-    name = property(_get_run_name)
-
-    def save(self, destdir=None):
-        if destdir is None:
-            destdir = ''
-        logging.info("Saving run report "+ self.name)
-        xml = self.get_elements()
-        indent(xml)
-        dest_pathname = os.path.join(destdir, self.name)
-        ElementTree.ElementTree(xml).write(dest_pathname)
-
-    def load(self, filename):
-        logging.info("Loading run report from " + filename)
-        tree = ElementTree.parse(filename).getroot()
-        self.set_elements(tree)
-
-def get_runs(runfolder):
-    """
-    Search through a run folder for all the various sub component runs
-    and then return a PipelineRun for each different combination.
-
-    For example if there are two different GERALD runs, this will
-    generate two different PipelineRun objects, that differ
-    in there gerald component.
-    """
-    from gaworkflow.pipeline import firecrest
-    from gaworkflow.pipeline import bustard
-    from gaworkflow.pipeline import gerald
-
-    datadir = os.path.join(runfolder, 'Data')
-
-    logging.info('Searching for runs in ' + datadir)
-    runs = []
-    for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
-        f = firecrest.firecrest(firecrest_pathname)
-        bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
-        for bustard_pathname in glob(bustard_glob):
-            b = bustard.bustard(bustard_pathname)
-            gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
-            for gerald_pathname in glob(gerald_glob):
-                try:
-                    g = gerald.gerald(gerald_pathname)
-                    runs.append(PipelineRun(runfolder, f, b, g))
-                except IOError, e:
-                    print "Ignoring", str(e)
-    return runs
-                
-    
-def extract_run_parameters(runs):
-    """
-    Search through runfolder_path for various runs and grab their parameters
-    """
-    for run in runs:
-      run.save()
-
-def summarize_mapped_reads(mapped_reads):
-    """
-    Summarize per chromosome reads into a genome count
-    But handle spike-in/contamination symlinks seperately.
-    """
-    summarized_reads = {}
-    genome_reads = 0
-    genome = 'unknown'
-    for k, v in mapped_reads.items():
-        path, k = os.path.split(k)
-        if len(path) > 0:
-            genome = path
-            genome_reads += v
-        else:
-            summarized_reads[k] = summarized_reads.setdefault(k, 0) + v
-    summarized_reads[genome] = genome_reads
-    return summarized_reads
-
-def summary_report(runs):
-    """
-    Summarize cluster numbers and mapped read counts for a runfolder
-    """
-    report = []
-    for run in runs:
-        # print a run name?
-        report.append('Summary for %s' % (run.name,))
-       # sort the report
-       eland_keys = run.gerald.eland_results.results.keys()
-       eland_keys.sort(alphanum)
-
-        lane_results = run.gerald.summary.lane_results
-       for lane_id in eland_keys:
-           result = run.gerald.eland_results.results[lane_id]
-            report.append("Sample name %s" % (result.sample_name))
-            report.append("Lane id %s" % (result.lane_id,))
-            cluster = lane_results[result.lane_id].cluster
-            report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
-            report.append("Total Reads: %d" % (result.reads))
-            mc = result._match_codes
-            nm = mc['NM']
-            nm_percent = float(nm)/result.reads  * 100
-            qc = mc['QC']
-            qc_percent = float(qc)/result.reads * 100
-
-           report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
-           report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
-            report.append('Unique (0,1,2 mismatches) %d %d %d' % \
-                          (mc['U0'], mc['U1'], mc['U2']))
-            report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
-                          (mc['R0'], mc['R1'], mc['R2']))
-            report.append("Mapped Reads")
-            mapped_reads = summarize_mapped_reads(result.mapped_reads)
-            for name, counts in mapped_reads.items():
-              report.append("  %s: %d" % (name, counts))
-            report.append('---')
-            report.append('')
-        return os.linesep.join(report)
-
-def extract_results(runs, output_base_dir=None):
-    if output_base_dir is None:
-        output_base_dir = os.getcwd()
-
-    for r in runs:
-      result_dir = os.path.join(output_base_dir, r.flowcell_id)
-      logging.info("Using %s as result directory" % (result_dir,))
-      if not os.path.exists(result_dir):
-        os.mkdir(result_dir)
-      
-      # create cycle_dir
-      cycle = "C%d-%d" % (r.firecrest.start, r.firecrest.stop)
-      logging.info("Filling in %s" % (cycle,))
-      cycle_dir = os.path.join(result_dir, cycle)
-      if os.path.exists(cycle_dir):
-        logging.error("%s already exists, not overwriting" % (cycle_dir,))
-        continue
-      else:
-        os.mkdir(cycle_dir)
-
-      # copy stuff out of the main run
-      g = r.gerald
-
-      # save run file
-      r.save(cycle_dir)
-
-      # Copy Summary.htm
-      summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
-      if os.path.exists(summary_path):
-          logging.info('Copying %s to %s' % (summary_path, cycle_dir))
-          shutil.copy(summary_path, cycle_dir)
-      else:
-          logging.info('Summary file %s was not found' % (summary_path,))
-
-      # tar score files
-      score_files = []
-      for f in os.listdir(g.pathname):
-          if re.match('.*_score.txt', f):
-              score_files.append(f)
-
-      tar_cmd = ['/bin/tar', 'c'] + score_files
-      bzip_cmd = [ 'bzip2', '-9', '-c' ]
-      tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
-      tar_dest = open(tar_dest_name, 'w')
-      logging.info("Compressing score files in %s" % (g.pathname,))
-      logging.info("Running tar: " + " ".join(tar_cmd[:10]))
-      logging.info("Running bzip2: " + " ".join(bzip_cmd))
-      logging.info("Writing to %s" %(tar_dest_name))
-      
-      tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, cwd=g.pathname)
-      bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
-      tar.wait()
-
-      # copy & bzip eland files
-      for eland_lane in g.eland_results.values():
-          source_name = eland_lane.pathname
-          path, name = os.path.split(eland_lane.pathname)
-          dest_name = os.path.join(cycle_dir, name+'.bz2')
-
-          args = ['bzip2', '-9', '-c', source_name]
-          logging.info('Running: %s' % ( " ".join(args) ))
-          bzip_dest = open(dest_name, 'w')
-          bzip = subprocess.Popen(args, stdout=bzip_dest)
-          logging.info('Saving to %s' % (dest_name, ))
-          bzip.wait()
-
-def clean_runs(runs):
-    """
-    Clean up run folders to optimize for compression.
-    """
-    # TODO: implement this.
-    # rm RunLog*.xml
-    # rm pipeline_*.txt
-    # rm gclog.txt
-    # rm NetCopy.log
-    # rm nfn.log
-    # rm Images/L*
-    # cd Data/C1-*_Firecrest*
-    # make clean_intermediate
-
-    pass
diff --git a/gaworkflow/pipeline/test/test_genome_mapper.py b/gaworkflow/pipeline/test/test_genome_mapper.py
deleted file mode 100644 (file)
index c8366d1..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-import unittest
-
-from StringIO import StringIO
-from gaworkflow.pipeline import genome_mapper
-
-class testGenomeMapper(unittest.TestCase):
-    def test_construct_mapper(self):
-        genomes = {
-        'Arabidopsis thaliana': {'v01212004': '/arabidopsis'},
-        'Homo sapiens': {'hg18': '/hg18'},
-        'Mus musculus': {'mm8': '/mm8',
-                        'mm9': '/mm9',
-                        'mm10': '/mm10'},
-        'Phage': {'174': '/phi'},
-        }
-        genome_map = genome_mapper.constructMapperDict(genomes)
-        
-        self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
-        self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi")
-        self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10")
-        self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
-        self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10")
-        
-        self.failUnlessEqual(len(genome_map.keys()), 6)
-        self.failUnlessEqual(len(genome_map.values()), 6)
-        self.failUnlessEqual(len(genome_map.items()), 6)
-        
-        
-def suite():
-    return unittest.makeSuite(testGenomeMapper,'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
diff --git a/gaworkflow/pipeline/test/test_runfolder026.py b/gaworkflow/pipeline/test/test_runfolder026.py
deleted file mode 100644 (file)
index f539cfa..0000000
+++ /dev/null
@@ -1,601 +0,0 @@
-#!/usr/bin/env python
-
-from datetime import datetime, date
-import os
-import tempfile
-import shutil
-import unittest
-
-from gaworkflow.pipeline import firecrest
-from gaworkflow.pipeline import bustard
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
-
-
-def make_flowcell_id(runfolder_dir, flowcell_id=None):
-    if flowcell_id is None:
-        flowcell_id = '207BTAAXY'
-
-    config = """<?xml version="1.0"?>
-<FlowcellId>
-  <Text>%s</Text>
-</FlowcellId>""" % (flowcell_id,)
-    config_dir = os.path.join(runfolder_dir, 'Config')
-    
-    if not os.path.exists(config_dir):
-        os.mkdir(config_dir)
-    pathname = os.path.join(config_dir, 'FlowcellId.xml')
-    f = open(pathname,'w')
-    f.write(config)
-    f.close()
-
-def make_matrix(matrix_dir):
-    contents = """# Auto-generated frequency response matrix
-> A
-> C
-> G
-> T
-0.77 0.15 -0.04 -0.04 
-0.76 1.02 -0.05 -0.06 
--0.10 -0.10 1.17 -0.03 
--0.13 -0.12 0.80 1.27 
-"""
-    s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
-    f = open(s_matrix, 'w')
-    f.write(contents)
-    f.close()
-    
-def make_phasing_params(bustard_dir):
-    for lane in range(1,9):
-        pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
-        f = open(pathname, 'w')
-        f.write("""<Parameters>
-  <Phasing>0.009900</Phasing>
-  <Prephasing>0.003500</Prephasing>
-</Parameters>
-""")
-        f.close()
-
-def make_gerald_config(gerald_dir):
-    config_xml = """<RunParameters>
-<ChipWideRunParameters>
-  <ANALYSIS>default</ANALYSIS>
-  <BAD_LANES></BAD_LANES>
-  <BAD_TILES></BAD_TILES>
-  <CONTAM_DIR></CONTAM_DIR>
-  <CONTAM_FILE></CONTAM_FILE>
-  <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
-  <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
-  <ELAND_REPEAT></ELAND_REPEAT>
-  <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
-  <EMAIL_LIST>diane</EMAIL_LIST>
-  <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
-  <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
-  <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
-  <FORCE>1</FORCE>
-  <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
-  <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
-  <HAMSTER_FLAG>genome</HAMSTER_FLAG>
-  <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
-  <POST_RUN_COMMAND></POST_RUN_COMMAND>
-  <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
-  <PURE_BASES>12</PURE_BASES>
-  <QF_PARAMS>'((CHASTITY&gt;=0.6))'</QF_PARAMS>
-  <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
-  <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
-  <READ_LENGTH>32</READ_LENGTH>
-  <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
-  <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
-  <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
-  <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
-  <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
-  <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
-  <TILE_ROOT>s</TILE_ROOT>
-  <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
-  <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
-  <USE_BASES>all</USE_BASES>
-  <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
-</ChipWideRunParameters>
-<LaneSpecificRunParameters>
-  <ANALYSIS>
-    <s_1>eland</s_1>
-    <s_2>eland</s_2>
-    <s_3>eland</s_3>
-    <s_4>eland</s_4>
-    <s_5>eland</s_5>
-    <s_6>eland</s_6>
-    <s_7>eland</s_7>
-    <s_8>eland</s_8>
-  </ANALYSIS>
-  <ELAND_GENOME>
-    <s_1>/g/dm3</s_1>
-    <s_2>/g/equcab1</s_2>
-    <s_3>/g/equcab1</s_3>
-    <s_4>/g/canfam2</s_4>
-    <s_5>/g/hg18</s_5>
-    <s_6>/g/hg18</s_6>
-    <s_7>/g/hg18</s_7>
-    <s_8>/g/hg18</s_8>
-  </ELAND_GENOME>
-  <READ_LENGTH>
-    <s_1>32</s_1>
-    <s_2>32</s_2>
-    <s_3>32</s_3>
-    <s_4>32</s_4>
-    <s_5>32</s_5>
-    <s_6>32</s_6>
-    <s_7>32</s_7>
-    <s_8>32</s_8>
-  </READ_LENGTH>
-  <USE_BASES>
-    <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
-    <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
-    <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
-    <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
-    <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
-    <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
-    <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
-    <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
-  </USE_BASES>
-</LaneSpecificRunParameters>
-</RunParameters>
-"""
-    pathname = os.path.join(gerald_dir, 'config.xml')
-    f = open(pathname,'w')
-    f.write(config_xml)
-    f.close()
-    
-
-def make_summary_htm(gerald_dir):
-    summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>HWI-EAS229</td></tr>
-<tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>dm3</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>equcab1</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>equcab1</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>canfam2</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<td>Lane </td>
-<td>Clusters </td>
-<td>Av 1st Cycle Int </td>
-<td>% intensity after 20 cycles </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>17421 +/- 2139</td>
-<td>7230 +/- 801</td>
-<td>23.73 +/- 10.79</td>
-<td>13.00 +/- 22.91</td>
-<td>32.03 +/- 18.45</td>
-<td>6703.57 +/- 3753.85</td>
-<td>4.55 +/- 4.81</td>
-</tr>
-<tr>
-<td>2</td>
-<td>20311 +/- 2402</td>
-<td>7660 +/- 678</td>
-<td>17.03 +/- 4.40</td>
-<td>40.74 +/- 30.33</td>
-<td>29.54 +/- 9.03</td>
-<td>5184.02 +/- 1631.54</td>
-<td>3.27 +/- 3.94</td>
-</tr>
-<tr>
-<td>3</td>
-<td>20193 +/- 2399</td>
-<td>7700 +/- 797</td>
-<td>15.75 +/- 3.30</td>
-<td>56.56 +/- 17.16</td>
-<td>27.33 +/- 7.48</td>
-<td>4803.49 +/- 1313.31</td>
-<td>3.07 +/- 2.86</td>
-</tr>
-<tr>
-<td>4</td>
-<td>15537 +/- 2531</td>
-<td>7620 +/- 1392</td>
-<td>15.37 +/- 3.79</td>
-<td>63.05 +/- 18.30</td>
-<td>15.88 +/- 4.99</td>
-<td>3162.13 +/- 962.59</td>
-<td>3.11 +/- 2.22</td>
-</tr>
-<tr>
-<td>5</td>
-<td>32047 +/- 3356</td>
-<td>8093 +/- 831</td>
-<td>23.79 +/- 6.18</td>
-<td>53.36 +/- 18.06</td>
-<td>48.04 +/- 13.77</td>
-<td>9866.23 +/- 2877.30</td>
-<td>2.26 +/- 1.16</td>
-</tr>
-<tr>
-<td>6</td>
-<td>32946 +/- 4753</td>
-<td>8227 +/- 736</td>
-<td>24.07 +/- 4.69</td>
-<td>54.65 +/- 12.57</td>
-<td>50.98 +/- 10.54</td>
-<td>10468.86 +/- 2228.53</td>
-<td>2.21 +/- 2.33</td>
-</tr>
-<tr>
-<td>7</td>
-<td>39504 +/- 4171</td>
-<td>8401 +/- 785</td>
-<td>22.55 +/- 4.56</td>
-<td>45.22 +/- 10.34</td>
-<td>48.41 +/- 9.67</td>
-<td>9829.40 +/- 1993.20</td>
-<td>2.26 +/- 1.11</td>
-</tr>
-<tr>
-<td>8</td>
-<td>37998 +/- 3792</td>
-<td>8443 +/- 1211</td>
-<td>39.03 +/- 7.52</td>
-<td>42.16 +/- 12.35</td>
-<td>40.98 +/- 14.89</td>
-<td>8128.87 +/- 3055.34</td>
-<td>3.57 +/- 2.77</td>
-</tr>
-</table>
-</body>
-</html>
-"""
-    pathname = os.path.join(gerald_dir, 'Summary.htm')
-    f = open(pathname, 'w')
-    f.write(summary_htm)
-    f.close()
-
-def make_eland_results(gerald_dir):
-    eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
->HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
->HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
->HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
-"""
-    for i in range(1,9):
-        pathname = os.path.join(gerald_dir, 
-                                's_%d_eland_result.txt' % (i,))
-        f = open(pathname, 'w')
-        f.write(eland_result)
-        f.close()
-                     
-class RunfolderTests(unittest.TestCase):
-    """
-    Test components of the runfolder processing code
-    which includes firecrest, bustard, and gerald
-    """
-    def setUp(self):
-        # make a fake runfolder directory
-        self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
-
-        self.runfolder_dir = os.path.join(self.temp_dir, 
-                                          '080102_HWI-EAS229_0010_207BTAAXX')
-        os.mkdir(self.runfolder_dir)
-
-        self.data_dir = os.path.join(self.runfolder_dir, 'Data')
-        os.mkdir(self.data_dir)
-
-        self.firecrest_dir = os.path.join(self.data_dir, 
-                               'C1-33_Firecrest1.8.28_12-04-2008_diane'
-                             )
-        os.mkdir(self.firecrest_dir)
-        self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
-        os.mkdir(self.matrix_dir)
-        make_matrix(self.matrix_dir)
-
-        self.bustard_dir = os.path.join(self.firecrest_dir, 
-                                        'Bustard1.8.28_12-04-2008_diane')
-        os.mkdir(self.bustard_dir)
-        make_phasing_params(self.bustard_dir)
-        
-        self.gerald_dir = os.path.join(self.bustard_dir,
-                                       'GERALD_12-04-2008_diane')
-        os.mkdir(self.gerald_dir)
-        make_gerald_config(self.gerald_dir)
-        make_summary_htm(self.gerald_dir)
-        make_eland_results(self.gerald_dir)
-
-    def tearDown(self):
-        shutil.rmtree(self.temp_dir)
-
-    def test_firecrest(self):
-        """
-        Construct a firecrest object
-        """
-        f = firecrest.firecrest(self.firecrest_dir)
-        self.failUnlessEqual(f.version, '1.8.28')
-        self.failUnlessEqual(f.start, 1)
-        self.failUnlessEqual(f.stop, 33)
-        self.failUnlessEqual(f.user, 'diane')
-        self.failUnlessEqual(f.date, date(2008,4,12))
-
-        xml = f.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-
-        f2 = firecrest.Firecrest(xml=xml)
-        self.failUnlessEqual(f.version, f2.version)
-        self.failUnlessEqual(f.start,   f2.start)
-        self.failUnlessEqual(f.stop,    f2.stop)
-        self.failUnlessEqual(f.user,    f2.user)
-        self.failUnlessEqual(f.date,    f2.date)
-
-    def test_bustard(self):
-        """
-        construct a bustard object
-        """
-        b = bustard.bustard(self.bustard_dir)
-        self.failUnlessEqual(b.version, '1.8.28')
-        self.failUnlessEqual(b.date,    date(2008,4,12))
-        self.failUnlessEqual(b.user,    'diane')
-        self.failUnlessEqual(len(b.phasing), 8)
-        self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
-        
-        xml = b.get_elements()
-        b2 = bustard.Bustard(xml=xml)
-        self.failUnlessEqual(b.version, b2.version)
-        self.failUnlessEqual(b.date,    b2.date )
-        self.failUnlessEqual(b.user,    b2.user)
-        self.failUnlessEqual(len(b.phasing), len(b2.phasing))
-        for key in b.phasing.keys():
-            self.failUnlessEqual(b.phasing[key].lane, 
-                                 b2.phasing[key].lane)
-            self.failUnlessEqual(b.phasing[key].phasing, 
-                                 b2.phasing[key].phasing)
-            self.failUnlessEqual(b.phasing[key].prephasing, 
-                                 b2.phasing[key].prephasing)
-
-    def test_gerald(self):
-        # need to update gerald and make tests for it
-        g = gerald.gerald(self.gerald_dir) 
-
-        self.failUnlessEqual(g.version, 
-            '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
-        self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
-        self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
-        self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
-
-        
-        # list of genomes, matches what was defined up in 
-        # make_gerald_config.
-        # the first None is to offset the genomes list to be 1..9
-        # instead of pythons default 0..8
-        genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
-                         '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
-
-        # test lane specific parameters from gerald config file
-        for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
-            self.failUnlessEqual(cur_lane.analysis, 'eland')
-            self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
-            self.failUnlessEqual(cur_lane.read_length, '32')
-            self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
-
-        # test data extracted from summary file
-        clusters = [None, 
-                    (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
-                    (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
-
-        for i in range(1,9):
-            summary_lane = g.summary[str(i)]
-            self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
-
-        xml = g.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-        g2 = gerald.Gerald(xml=xml)
-
-        # do it all again after extracting from the xml file
-        self.failUnlessEqual(g.version, g2.version)
-        self.failUnlessEqual(g.date, g2.date)
-        self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
-        self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
-
-        # test lane specific parameters from gerald config file
-        for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
-            self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
-            self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
-            self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
-            self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
-
-        # test (some) summary elements
-        for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
-            self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
-            self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
-            g_eland = g.eland_results
-            g2_eland = g2.eland_results
-            for lane in g_eland.keys():
-                self.failUnlessEqual(g_eland[lane].reads, 
-                                     g2_eland[lane].reads)
-                self.failUnlessEqual(len(g_eland[lane].mapped_reads), 
-                                     len(g2_eland[lane].mapped_reads))
-                for k in g_eland[lane].mapped_reads.keys():
-                    self.failUnlessEqual(g_eland[lane].mapped_reads[k],
-                                         g2_eland[lane].mapped_reads[k])
-
-                self.failUnlessEqual(len(g_eland[lane].match_codes), 
-                                     len(g2_eland[lane].match_codes))
-                for k in g_eland[lane].match_codes.keys():
-                    self.failUnlessEqual(g_eland[lane].match_codes[k],
-                                         g2_eland[lane].match_codes[k])
-
-
-    def test_eland(self):
-        dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
-                    'chr2L.fa': 'dm3/chr2L.fa',
-                    'Lambda.fa': 'Lambda.fa'}
-        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
-                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
-        eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
-        
-        for i in range(1,9):
-            lane = eland[str(i)]
-            self.failUnlessEqual(lane.reads, 4)
-            self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
-            self.failUnlessEqual(len(lane.mapped_reads), 3)
-            self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
-            self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
-            self.failUnlessEqual(lane.match_codes['U1'], 2)
-            self.failUnlessEqual(lane.match_codes['NM'], 1)
-
-        xml = eland.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-        e2 = gerald.ELAND(xml=xml)
-
-        for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
-            self.failUnlessEqual(l1.reads, l2.reads)
-            self.failUnlessEqual(l1.sample_name, l2.sample_name)
-            self.failUnlessEqual(l1.lane_id, l2.lane_id)
-            self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
-            self.failUnlessEqual(len(l1.mapped_reads), 3)
-            for k in l1.mapped_reads.keys():
-                self.failUnlessEqual(l1.mapped_reads[k],
-                                     l2.mapped_reads[k])
-
-            self.failUnlessEqual(len(l1.match_codes), 9)
-            self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
-            for k in l1.match_codes.keys():
-                self.failUnlessEqual(l1.match_codes[k], 
-                                     l2.match_codes[k])
-
-    def test_runfolder(self):
-        runs = runfolder.get_runs(self.runfolder_dir)
-        
-        # do we get the flowcell id from the filename?
-        self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
-
-        # do we get the flowcell id from the FlowcellId.xml file
-        make_flowcell_id(self.runfolder_dir, '207BTAAXY')
-        runs = runfolder.get_runs(self.runfolder_dir)
-        self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
-        
-        r1 = runs[0]
-        xml = r1.get_elements()
-        xml_str = ElementTree.tostring(xml)
-
-        r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
-        self.failIfEqual(r2.firecrest, None)
-        self.failIfEqual(r2.bustard, None)
-        self.failIfEqual(r2.gerald, None)
-        
-
-def suite():
-    return unittest.makeSuite(RunfolderTests,'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
-    
diff --git a/gaworkflow/pipeline/test/test_runfolder030.py b/gaworkflow/pipeline/test/test_runfolder030.py
deleted file mode 100644 (file)
index 5078412..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-#!/usr/bin/env python
-
-from datetime import datetime, date
-import os
-import tempfile
-import shutil
-import unittest
-
-from gaworkflow.pipeline import firecrest
-from gaworkflow.pipeline import bustard
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
-
-
-def make_flowcell_id(runfolder_dir, flowcell_id=None):
-    if flowcell_id is None:
-        flowcell_id = '207BTAAXY'
-
-    config = """<?xml version="1.0"?>
-<FlowcellId>
-  <Text>%s</Text>
-</FlowcellId>""" % (flowcell_id,)
-    config_dir = os.path.join(runfolder_dir, 'Config')
-    
-    if not os.path.exists(config_dir):
-        os.mkdir(config_dir)
-    pathname = os.path.join(config_dir, 'FlowcellId.xml')
-    f = open(pathname,'w')
-    f.write(config)
-    f.close()
-
-def make_matrix(matrix_dir):
-    contents = """# Auto-generated frequency response matrix
-> A
-> C
-> G
-> T
-0.77 0.15 -0.04 -0.04 
-0.76 1.02 -0.05 -0.06 
--0.10 -0.10 1.17 -0.03 
--0.13 -0.12 0.80 1.27 
-"""
-    s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
-    f = open(s_matrix, 'w')
-    f.write(contents)
-    f.close()
-    
-def make_phasing_params(bustard_dir):
-    for lane in range(1,9):
-        pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
-        f = open(pathname, 'w')
-        f.write("""<Parameters>
-  <Phasing>0.009900</Phasing>
-  <Prephasing>0.003500</Prephasing>
-</Parameters>
-""")
-        f.close()
-
-def make_gerald_config(gerald_dir):
-    config_xml = """<RunParameters>
-<ChipWideRunParameters>
-  <ANALYSIS>default</ANALYSIS>
-  <BAD_LANES></BAD_LANES>
-  <BAD_TILES></BAD_TILES>
-  <CONTAM_DIR></CONTAM_DIR>
-  <CONTAM_FILE></CONTAM_FILE>
-  <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
-  <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
-  <ELAND_REPEAT></ELAND_REPEAT>
-  <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
-  <EMAIL_LIST>diane</EMAIL_LIST>
-  <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
-  <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
-  <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
-  <FORCE>1</FORCE>
-  <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
-  <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
-  <HAMSTER_FLAG>genome</HAMSTER_FLAG>
-  <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
-  <POST_RUN_COMMAND></POST_RUN_COMMAND>
-  <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
-  <PURE_BASES>12</PURE_BASES>
-  <QF_PARAMS>'((CHASTITY&gt;=0.6))'</QF_PARAMS>
-  <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
-  <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
-  <READ_LENGTH>32</READ_LENGTH>
-  <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
-  <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
-  <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
-  <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
-  <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
-  <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
-  <TILE_ROOT>s</TILE_ROOT>
-  <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
-  <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
-  <USE_BASES>all</USE_BASES>
-  <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
-</ChipWideRunParameters>
-<LaneSpecificRunParameters>
-  <ANALYSIS>
-    <s_1>eland</s_1>
-    <s_2>eland</s_2>
-    <s_3>eland</s_3>
-    <s_4>eland</s_4>
-    <s_5>eland</s_5>
-    <s_6>eland</s_6>
-    <s_7>eland</s_7>
-    <s_8>eland</s_8>
-  </ANALYSIS>
-  <ELAND_GENOME>
-    <s_1>/g/dm3</s_1>
-    <s_2>/g/equcab1</s_2>
-    <s_3>/g/equcab1</s_3>
-    <s_4>/g/canfam2</s_4>
-    <s_5>/g/hg18</s_5>
-    <s_6>/g/hg18</s_6>
-    <s_7>/g/hg18</s_7>
-    <s_8>/g/hg18</s_8>
-  </ELAND_GENOME>
-  <READ_LENGTH>
-    <s_1>32</s_1>
-    <s_2>32</s_2>
-    <s_3>32</s_3>
-    <s_4>32</s_4>
-    <s_5>32</s_5>
-    <s_6>32</s_6>
-    <s_7>32</s_7>
-    <s_8>32</s_8>
-  </READ_LENGTH>
-  <USE_BASES>
-    <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
-    <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
-    <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
-    <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
-    <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
-    <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
-    <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
-    <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
-  </USE_BASES>
-</LaneSpecificRunParameters>
-</RunParameters>
-"""
-    pathname = os.path.join(gerald_dir, 'config.xml')
-    f = open(pathname,'w')
-    f.write(config_xml)
-    f.close()
-    
-def make_summary_htm(gerald_dir):
-    summary_htm="""<!--RUN_TIME Wed Jul  2 06:47:44 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>HWI-EAS229</td></tr>
-<tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Chip Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Clusters</td>
-<td>Clusters (PF)</td>
-<td>Yield (kbases)</td>
-</tr>
-<tr><td>80933224</td>
-<td>43577803</td>
-<td>1133022</td>
-</tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Num Tiles</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td colspan="2">Lane Info</td>
-<td colspan="8">Tile Mean +/- SD for Lane</td>
-</tr>
-<tr>
-<td>Lane </td>
-<td>Lane Yield (kbases) </td>
-<td>Clusters (raw)</td>
-<td>Clusters (PF) </td>
-<td>1st Cycle Int (PF) </td>
-<td>% intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>158046</td>
-<td>96483 +/- 9074</td>
-<td>60787 +/- 4240</td>
-<td>329 +/- 35</td>
-<td>101.88 +/- 6.03</td>
-<td>63.21 +/- 3.29</td>
-<td>70.33 +/- 0.24</td>
-<td>9054.08 +/- 59.16</td>
-<td>0.46 +/- 0.18</td>
-</tr>
-<tr>
-<td>2</td>
-<td>156564</td>
-<td>133738 +/- 7938</td>
-<td>60217 +/- 1926</td>
-<td>444 +/- 39</td>
-<td>92.62 +/- 7.58</td>
-<td>45.20 +/- 3.31</td>
-<td>51.98 +/- 0.74</td>
-<td>6692.04 +/- 92.49</td>
-<td>0.46 +/- 0.09</td>
-</tr>
-<tr>
-<td>3</td>
-<td>185818</td>
-<td>152142 +/- 10002</td>
-<td>71468 +/- 2827</td>
-<td>366 +/- 36</td>
-<td>91.53 +/- 8.66</td>
-<td>47.19 +/- 3.80</td>
-<td>82.24 +/- 0.44</td>
-<td>10598.68 +/- 64.13</td>
-<td>0.41 +/- 0.04</td>
-</tr>
-<tr>
-<td>4</td>
-<td>34953</td>
-<td>15784 +/- 2162</td>
-<td>13443 +/- 1728</td>
-<td>328 +/- 40</td>
-<td>97.53 +/- 9.87</td>
-<td>85.29 +/- 1.91</td>
-<td>80.02 +/- 0.53</td>
-<td>10368.82 +/- 71.08</td>
-<td>0.15 +/- 0.05</td>
-</tr>
-<tr>
-<td>5</td>
-<td>167936</td>
-<td>119735 +/- 8465</td>
-<td>64590 +/- 2529</td>
-<td>417 +/- 37</td>
-<td>88.69 +/- 14.79</td>
-<td>54.10 +/- 2.59</td>
-<td>76.95 +/- 0.32</td>
-<td>9936.47 +/- 65.75</td>
-<td>0.28 +/- 0.02</td>
-</tr>
-<tr>
-<td>6</td>
-<td>173463</td>
-<td>152177 +/- 8146</td>
-<td>66716 +/- 2493</td>
-<td>372 +/- 39</td>
-<td>87.06 +/- 9.86</td>
-<td>43.98 +/- 3.12</td>
-<td>78.80 +/- 0.43</td>
-<td>10162.28 +/- 49.65</td>
-<td>0.38 +/- 0.03</td>
-</tr>
-<tr>
-<td>7</td>
-<td>149287</td>
-<td>84649 +/- 7325</td>
-<td>57418 +/- 3617</td>
-<td>295 +/- 28</td>
-<td>89.40 +/- 8.23</td>
-<td>67.97 +/- 1.82</td>
-<td>33.38 +/- 0.25</td>
-<td>4247.92 +/- 32.37</td>
-<td>1.00 +/- 0.03</td>
-</tr>
-<tr>
-<td>8</td>
-<td>106953</td>
-<td>54622 +/- 4812</td>
-<td>41136 +/- 3309</td>
-<td>284 +/- 37</td>
-<td>90.21 +/- 9.10</td>
-<td>75.39 +/- 2.27</td>
-<td>48.33 +/- 0.29</td>
-<td>6169.21 +/- 169.50</td>
-<td>0.86 +/- 1.22</td>
-</tr>
-<tr><td colspan="13">Tile mean across chip</td></tr>
-<tr>
-<td>Av.</td>
-<td></td>
-<td>101166</td>
-<td>54472</td>
-<td>354</td>
-<td>92.36</td>
-<td>60.29</td>
-<td>65.25</td>
-<td>8403.69</td>
-<td>0.50</td>
-</tr>
-</table>
-<h2><br></br>Expanded Lane Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<tr><td colspan="2">Lane Info</td>
-<td colspan="2">Phasing Info</td>
-<td colspan="2">Raw Data (tile mean)</td>
-<td colspan="7">Filtered Data (tile mean)</td></tr>
-<td>Lane </td>
-<td>Clusters (tile mean) (raw)</td>
-<td>% Phasing </td>
-<td>% Prephasing </td>
-<td>% Error Rate (raw) </td>
-<td> Equiv Perfect Clusters (raw) </td>
-<td>% retained </td>
-<td>Cycle 2-4 Av Int (PF) </td>
-<td>Cycle 2-10 Av % Loss (PF) </td>
-<td>Cycle 10-20 Av % Loss (PF) </td>
-<td>% Align (PF) </td>
-<td>% Error Rate (PF) </td>
-<td> Equiv Perfect Clusters (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>96483</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.00</td>
-<td>49676</td>
-<td>63.21</td>
-<td>317 +/- 32</td>
-<td>0.13 +/- 0.44</td>
-<td>-1.14 +/- 0.34</td>
-<td>70.33</td>
-<td>0.46</td>
-<td>41758</td>
-</tr>
-<tr>
-<td>2</td>
-<td>133738</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.22</td>
-<td>40467</td>
-<td>45.20</td>
-<td>415 +/- 33</td>
-<td>0.29 +/- 0.40</td>
-<td>-0.79 +/- 0.35</td>
-<td>51.98</td>
-<td>0.46</td>
-<td>30615</td>
-</tr>
-<tr>
-<td>3</td>
-<td>152142</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.30</td>
-<td>78588</td>
-<td>47.19</td>
-<td>344 +/- 26</td>
-<td>0.68 +/- 0.51</td>
-<td>-0.77 +/- 0.42</td>
-<td>82.24</td>
-<td>0.41</td>
-<td>57552</td>
-</tr>
-<tr>
-<td>4</td>
-<td>15784</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.29</td>
-<td>11095</td>
-<td>85.29</td>
-<td>306 +/- 34</td>
-<td>0.20 +/- 0.69</td>
-<td>-1.28 +/- 0.66</td>
-<td>80.02</td>
-<td>0.15</td>
-<td>10671</td>
-</tr>
-<tr>
-<td>5</td>
-<td>119735</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.85</td>
-<td>60335</td>
-<td>54.10</td>
-<td>380 +/- 32</td>
-<td>0.34 +/- 0.49</td>
-<td>-1.55 +/- 4.69</td>
-<td>76.95</td>
-<td>0.28</td>
-<td>49015</td>
-</tr>
-<tr>
-<td>6</td>
-<td>152177</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.21</td>
-<td>70905</td>
-<td>43.98</td>
-<td>333 +/- 27</td>
-<td>0.57 +/- 0.50</td>
-<td>-0.91 +/- 0.39</td>
-<td>78.80</td>
-<td>0.38</td>
-<td>51663</td>
-</tr>
-<tr>
-<td>7</td>
-<td>84649</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.38</td>
-<td>21069</td>
-<td>67.97</td>
-<td>272 +/- 20</td>
-<td>1.15 +/- 0.52</td>
-<td>-0.84 +/- 0.58</td>
-<td>33.38</td>
-<td>1.00</td>
-<td>18265</td>
-</tr>
-<tr>
-<td>8</td>
-<td>54622</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.17</td>
-<td>21335</td>
-<td>75.39</td>
-<td>262 +/- 31</td>
-<td>1.10 +/- 0.59</td>
-<td>-1.01 +/- 0.47</td>
-<td>48.33</td>
-<td>0.86</td>
-<td>19104</td>
-</tr>
-</table>
-<b><br></br>IVC Plots</b>
-<p> <a href='IVC.htm' target="_blank"> IVC.htm
- </a></p>
-<b><br></br>All Intensity Plots</b>
-<p> <a href='All.htm' target="_blank"> All.htm
- </a></p>
-<b><br></br>Error graphs: </b>
-<p> <a href='Error.htm' target="_blank"> Error.htm
- </a></p>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>0001</td>
-<td>114972</td>
-<td>326.48</td>
-<td>94.39</td>
-<td>57.44</td>
-<td>70.2</td>
-<td>9038.6</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>2</td>
-<td>0001</td>
-<td>147793</td>
-<td>448.12</td>
-<td>83.68</td>
-<td>38.57</td>
-<td>53.7</td>
-<td>6905.4</td>
-<td>0.54</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>3</td>
-<td>0001</td>
-<td>167904</td>
-<td>374.05</td>
-<td>86.91</td>
-<td>40.36</td>
-<td>81.3</td>
-<td>10465.0</td>
-<td>0.47</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>4</td>
-<td>0001</td>
-<td>20308</td>
-<td>276.85</td>
-<td>92.87</td>
-<td>84.26</td>
-<td>80.4</td>
-<td>10413.8</td>
-<td>0.16</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>6</td>
-<td>0001</td>
-<td>166844</td>
-<td>348.12</td>
-<td>77.59</td>
-<td>38.13</td>
-<td>79.7</td>
-<td>10264.4</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>7</td>
-<td>0001</td>
-<td>98913</td>
-<td>269.90</td>
-<td>86.66</td>
-<td>64.55</td>
-<td>33.2</td>
-<td>4217.5</td>
-<td>1.02</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>8</td>
-<td>0001</td>
-<td>64972</td>
-<td>243.60</td>
-<td>89.40</td>
-<td>73.17</td>
-<td>48.3</td>
-<td>6182.8</td>
-<td>0.71</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-</body>
-</html>
-"""
-    pathname = os.path.join(gerald_dir, 'Summary.htm')
-    f = open(pathname, 'w')
-    f.write(summary_htm)
-    f.close()
-
-def make_eland_results(gerald_dir):
-    eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
->HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
->HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
->HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
-"""
-    for i in range(1,9):
-        pathname = os.path.join(gerald_dir, 
-                                's_%d_eland_result.txt' % (i,))
-        f = open(pathname, 'w')
-        f.write(eland_result)
-        f.close()
-
-def make_runfolder(obj=None):
-    """
-    Make a fake runfolder, attach all the directories to obj if defined
-    """
-    # make a fake runfolder directory
-    temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
-
-    runfolder_dir = os.path.join(temp_dir, 
-                                 '080102_HWI-EAS229_0010_207BTAAXX')
-    os.mkdir(runfolder_dir)
-
-    data_dir = os.path.join(runfolder_dir, 'Data')
-    os.mkdir(data_dir)
-
-    firecrest_dir = os.path.join(data_dir, 
-                                 'C1-33_Firecrest1.8.28_12-04-2008_diane'
-                                 )
-    os.mkdir(firecrest_dir)
-    matrix_dir = os.path.join(firecrest_dir, 'Matrix')
-    os.mkdir(matrix_dir)
-    make_matrix(matrix_dir)
-
-    bustard_dir = os.path.join(firecrest_dir, 
-                               'Bustard1.8.28_12-04-2008_diane')
-    os.mkdir(bustard_dir)
-    make_phasing_params(bustard_dir)
-
-    gerald_dir = os.path.join(bustard_dir,
-                              'GERALD_12-04-2008_diane')
-    os.mkdir(gerald_dir)
-    make_gerald_config(gerald_dir)
-    make_summary_htm(gerald_dir)
-    make_eland_results(gerald_dir)
-
-    if obj is not None:
-        obj.temp_dir = temp_dir
-        obj.runfolder_dir = runfolder_dir
-        obj.data_dir = data_dir
-        obj.firecrest_dir = firecrest_dir
-        obj.matrix_dir = matrix_dir
-        obj.bustard_dir = bustard_dir
-        obj.gerald_dir = gerald_dir
-        
-                     
-class RunfolderTests(unittest.TestCase):
-    """
-    Test components of the runfolder processing code
-    which includes firecrest, bustard, and gerald
-    """
-    def setUp(self):
-        # attaches all the directories to the object passed in
-        make_runfolder(self)
-
-    def tearDown(self):
-        shutil.rmtree(self.temp_dir)
-
-    def test_firecrest(self):
-        """
-        Construct a firecrest object
-        """
-        f = firecrest.firecrest(self.firecrest_dir)
-        self.failUnlessEqual(f.version, '1.8.28')
-        self.failUnlessEqual(f.start, 1)
-        self.failUnlessEqual(f.stop, 33)
-        self.failUnlessEqual(f.user, 'diane')
-        self.failUnlessEqual(f.date, date(2008,4,12))
-
-        xml = f.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-
-        f2 = firecrest.Firecrest(xml=xml)
-        self.failUnlessEqual(f.version, f2.version)
-        self.failUnlessEqual(f.start,   f2.start)
-        self.failUnlessEqual(f.stop,    f2.stop)
-        self.failUnlessEqual(f.user,    f2.user)
-        self.failUnlessEqual(f.date,    f2.date)
-
-    def test_bustard(self):
-        """
-        construct a bustard object
-        """
-        b = bustard.bustard(self.bustard_dir)
-        self.failUnlessEqual(b.version, '1.8.28')
-        self.failUnlessEqual(b.date,    date(2008,4,12))
-        self.failUnlessEqual(b.user,    'diane')
-        self.failUnlessEqual(len(b.phasing), 8)
-        self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
-        
-        xml = b.get_elements()
-        b2 = bustard.Bustard(xml=xml)
-        self.failUnlessEqual(b.version, b2.version)
-        self.failUnlessEqual(b.date,    b2.date )
-        self.failUnlessEqual(b.user,    b2.user)
-        self.failUnlessEqual(len(b.phasing), len(b2.phasing))
-        for key in b.phasing.keys():
-            self.failUnlessEqual(b.phasing[key].lane, 
-                                 b2.phasing[key].lane)
-            self.failUnlessEqual(b.phasing[key].phasing, 
-                                 b2.phasing[key].phasing)
-            self.failUnlessEqual(b.phasing[key].prephasing, 
-                                 b2.phasing[key].prephasing)
-
-    def test_gerald(self):
-        # need to update gerald and make tests for it
-        g = gerald.gerald(self.gerald_dir) 
-
-        self.failUnlessEqual(g.version, 
-            '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
-        self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
-        self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
-        self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
-
-        
-        # list of genomes, matches what was defined up in 
-        # make_gerald_config.
-        # the first None is to offset the genomes list to be 1..9
-        # instead of pythons default 0..8
-        genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
-                         '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
-
-        # test lane specific parameters from gerald config file
-        for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
-            self.failUnlessEqual(cur_lane.analysis, 'eland')
-            self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
-            self.failUnlessEqual(cur_lane.read_length, '32')
-            self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
-
-        # test data extracted from summary file
-        clusters = [None, 
-                    (96483, 9074), (133738, 7938), 
-                    (152142, 10002), (15784, 2162), 
-                    (119735, 8465), (152177, 8146),
-                    (84649, 7325), (54622, 4812),]
-
-        for i in range(1,9):
-            summary_lane = g.summary[str(i)]
-            self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
-
-        xml = g.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-        g2 = gerald.Gerald(xml=xml)
-
-        # do it all again after extracting from the xml file
-        self.failUnlessEqual(g.version, g2.version)
-        self.failUnlessEqual(g.date, g2.date)
-        self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
-        self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
-
-        # test lane specific parameters from gerald config file
-        for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
-            self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
-            self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
-            self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
-            self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
-
-        # test (some) summary elements
-        for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
-            self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
-            self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
-            g_eland = g.eland_results
-            g2_eland = g2.eland_results
-            for lane in g_eland.keys():
-                self.failUnlessEqual(g_eland[lane].reads, 
-                                     g2_eland[lane].reads)
-                self.failUnlessEqual(len(g_eland[lane].mapped_reads), 
-                                     len(g2_eland[lane].mapped_reads))
-                for k in g_eland[lane].mapped_reads.keys():
-                    self.failUnlessEqual(g_eland[lane].mapped_reads[k],
-                                         g2_eland[lane].mapped_reads[k])
-
-                self.failUnlessEqual(len(g_eland[lane].match_codes), 
-                                     len(g2_eland[lane].match_codes))
-                for k in g_eland[lane].match_codes.keys():
-                    self.failUnlessEqual(g_eland[lane].match_codes[k],
-                                         g2_eland[lane].match_codes[k])
-
-
-    def test_eland(self):
-        dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
-                    'chr2L.fa': 'dm3/chr2L.fa',
-                    'Lambda.fa': 'Lambda.fa'}
-        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
-                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
-        eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
-        
-        for i in range(1,9):
-            lane = eland[str(i)]
-            self.failUnlessEqual(lane.reads, 4)
-            self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
-            self.failUnlessEqual(len(lane.mapped_reads), 3)
-            self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
-            self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
-            self.failUnlessEqual(lane.match_codes['U1'], 2)
-            self.failUnlessEqual(lane.match_codes['NM'], 1)
-
-        xml = eland.get_elements()
-        # just make sure that element tree can serialize the tree
-        xml_str = ElementTree.tostring(xml)
-        e2 = gerald.ELAND(xml=xml)
-
-        for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
-            self.failUnlessEqual(l1.reads, l2.reads)
-            self.failUnlessEqual(l1.sample_name, l2.sample_name)
-            self.failUnlessEqual(l1.lane_id, l2.lane_id)
-            self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
-            self.failUnlessEqual(len(l1.mapped_reads), 3)
-            for k in l1.mapped_reads.keys():
-                self.failUnlessEqual(l1.mapped_reads[k],
-                                     l2.mapped_reads[k])
-
-            self.failUnlessEqual(len(l1.match_codes), 9)
-            self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
-            for k in l1.match_codes.keys():
-                self.failUnlessEqual(l1.match_codes[k], 
-                                     l2.match_codes[k])
-
-    def test_runfolder(self):
-        runs = runfolder.get_runs(self.runfolder_dir)
-        
-        # do we get the flowcell id from the filename?
-        self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
-
-        # do we get the flowcell id from the FlowcellId.xml file
-        make_flowcell_id(self.runfolder_dir, '207BTAAXY')
-        runs = runfolder.get_runs(self.runfolder_dir)
-        self.failUnlessEqual(len(runs), 1)
-        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
-        
-        r1 = runs[0]
-        xml = r1.get_elements()
-        xml_str = ElementTree.tostring(xml)
-
-        r2 = runfolder.PipelineRun(xml=xml)
-        self.failUnlessEqual(r1.name, r2.name)
-        self.failIfEqual(r2.firecrest, None)
-        self.failIfEqual(r2.bustard, None)
-        self.failIfEqual(r2.gerald, None)
-        
-
-def suite():
-    return unittest.makeSuite(RunfolderTests,'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
-    
diff --git a/gaworkflow/util/__init__.py b/gaworkflow/util/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/gaworkflow/util/alphanum.py b/gaworkflow/util/alphanum.py
deleted file mode 100644 (file)
index 8893bdb..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#\r
-# The Alphanum Algorithm is an improved sorting algorithm for strings\r
-# containing numbers.  Instead of sorting numbers in ASCII order like\r
-# a standard sort, this algorithm sorts numbers in numeric order.\r
-#\r
-# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
-#\r
-#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
-#* Distributed under same license as original\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\r
-#\r
-\r
-import re\r
-\r
-#\r
-# TODO: Make decimal points be considered in the same class as digits\r
-#\r
-\r
-def chunkify(str):\r
-       """return a list of numbers and non-numeric substrings of +str+\r
-\r
-       the numeric substrings are converted to integer, non-numeric are left as is\r
-       """\r
-       chunks = re.findall("(\d+|\D+)",str)\r
-       chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
-       return chunks\r
-\r
-def alphanum(a,b):\r
-       """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
-\r
-       +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
-       Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
-       Numeric sorting, comparison of non-numeric gives Lexicographic order\r
-       """\r
-       # split strings into chunks\r
-       aChunks = chunkify(a)\r
-       bChunks = chunkify(b)\r
-\r
-       return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
-\r
-\r
-\r
-if __name__ == "__main__":\r
-       unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
-       sorted = unsorted[:]\r
-       sorted.sort(alphanum)\r
-       print '+++++Sorted...++++'\r
-       print '\n'.join(sorted)\r
diff --git a/gaworkflow/util/ethelp.py b/gaworkflow/util/ethelp.py
deleted file mode 100644 (file)
index 19f6c9f..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-ElementTree helper functions
-"""
-def indent(elem, level=0):
-    """
-    reformat an element tree to be 'pretty' (indented)
-    """
-    i = "\n" + level*"  "
-    if len(elem):
-        if not elem.text or not elem.text.strip():
-            elem.text = i + "  "
-        for child in elem:
-            indent(child, level+1)
-        # we don't want the closing tag indented too far
-        child.tail = i
-        if not elem.tail or not elem.tail.strip():
-            elem.tail = i
-    else:
-        if level and (not elem.tail or not elem.tail.strip()):
-            elem.tail = i
-
-def flatten(elem, include_tail=0):
-    """
-    Extract the text from an element tree 
-    (AKA extract the text that not part of XML tags)
-    """
-    text = elem.text or ""
-    for e in elem:
-        text += flatten(e, 1)
-    if include_tail and elem.tail: text += elem.tail
-    return text
-
diff --git a/gaworkflow/util/fctracker.py b/gaworkflow/util/fctracker.py
deleted file mode 100644 (file)
index 57b5dcf..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-import datetime
-import os
-import re
-import sys
-import time
-
-if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
-  # we're python 2.5
-  import sqlite3
-else:
-  import pysqlite2.dbapi2 as sqlite3
-
-
-class fctracker:
-    """
-    provide a simple way to interact with the flowcell data in fctracker.db
-    """
-    def __init__(self, database):
-        # default to the current directory
-        if database is None: 
-            self.database = self._guess_fctracker_path()
-        else:
-            self.database = database
-        self.conn = sqlite3.connect(self.database)
-        self._get_library()
-        self._get_species()
-
-    def _guess_fctracker_path(self):
-        """
-        Guess a few obvious places for the database
-        """
-        fctracker = 'fctracker.db'
-        name = fctracker
-        # is it in the current dir?
-        if os.path.exists(name): 
-            return name
-        name = os.path.expanduser(os.path.join('~', fctracker))
-        if os.path.exists(name):
-            return name
-        raise RuntimeError("Can't find fctracker")
-
-    def _make_dict_from_table(self, table_name, pkey_name):
-        """
-        Convert a django table into a dictionary indexed by the primary key.
-        Yes, it really does just load everything into memory, hopefully
-        we stay under a few tens of thousands of runs for a while.
-        """
-        table = {}
-        c = self.conn.cursor()
-        c.execute('select * from %s;' % (table_name))
-        # extract just the field name
-        description = [ f[0] for f in c.description]
-        for row in c:
-            row_dict = dict(zip(description, row))
-            table[row_dict[pkey_name]] = row_dict
-        c.close()
-        return table
-
-    def _add_lanes_to_libraries(self):
-        """
-        add flowcell/lane ids to new attribute 'lanes' in the library dictionary
-        """
-        library_id_re = re.compile('lane_\d_library_id')
-
-        for fc_id, fc in self.flowcells.items():
-            lane_library = [ (x[0][5], x[1]) for x in fc.items() 
-                                             if library_id_re.match(x[0]) ]
-            for lane, library_id in lane_library:
-                if not self.library[library_id].has_key('lanes'):
-                    self.library[library_id]['lanes'] = []
-                self.library[library_id]['lanes'].append((fc_id, lane))
-
-    def _get_library(self):
-        """
-        attach the library dictionary to the instance
-        """
-        self.library = self._make_dict_from_table(
-                         'fctracker_library', 
-                         'library_id')
-                                                  
-        
-    def _get_species(self):
-        """
-        attach the species dictionary to the instance
-        """
-        self.species = self._make_dict_from_table(
-                         'fctracker_species',
-                         'id'
-                       )
-        
-    def _get_flowcells(self, where=None):
-        """
-        attach the flowcell dictionary to the instance
-
-        where is a sql where clause. (eg "where run_date > '2008-1-1'")
-        that can be used to limit what flowcells we select
-        FIXME: please add sanitization code
-        """
-        if where is None:
-            where = ""
-        self.flowcells = {}
-        c = self.conn.cursor()
-        c.execute('select * from fctracker_flowcell %s;' % (where))
-        # extract just the field name
-        description = [ f[0] for f in c.description ]
-        for row in c:
-            row_dict = dict(zip(description, row))
-            fcid, status = self._parse_flowcell_id(row_dict)
-            row_dict['flowcell_id'] = fcid
-            row_dict['flowcell_status'] = status
-
-            for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
-                lane_library = self.library[row_dict[lane+"_id"]]
-                species_id = lane_library['library_species_id']
-                lane_library['library_species'] = self.species[species_id]
-                row_dict[lane] = lane_library
-            # some useful parsing
-            run_date = time.strptime(row_dict['run_date'],  '%Y-%m-%d %H:%M:%S')
-            run_date = datetime.datetime(*run_date[:6])
-            row_dict['run_date'] = run_date
-            self.flowcells[row_dict['flowcell_id']] = row_dict
-
-        self._add_lanes_to_libraries()
-        return self.flowcells
-
-    def _parse_flowcell_id(self, flowcell_row):
-      """
-      Return flowcell id and status
-      
-      We stored the status information in the flowcell id name.
-      this was dumb, but database schemas are hard to update.
-      """
-      fields = flowcell_row['flowcell_id'].split()
-      fcid = None
-      status = None
-      if len(fields) > 0:
-        fcid = fields[0]
-      if len(fields) > 1:
-        status = fields[1]
-      return fcid, status
-      
-
-def flowcell_gone(cell):
-    """
-    Use a variety of heuristics to determine if the flowcell drive
-    has been deleted.
-    """
-    status = cell['flowcell_status']
-    if status is None:
-        return False
-    failures = ['failed', 'deleted', 'not run']
-    for f in failures:
-      if re.search(f, status):
-        return True
-    else:
-      return False
-
-def recoverable_drive_report(flowcells):
-    """
-    Attempt to report what flowcells are still on a hard drive
-    """
-    def format_status(status):
-      if status is None:
-        return ""
-      else:
-        return status+" "
-
-    # sort flowcells by run date
-    flowcell_list = []
-    for key, cell in flowcells.items():
-        flowcell_list.append( (cell['run_date'], key) )
-    flowcell_list.sort()
-
-    report = []
-    line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
-    line += "%(species)s"
-    for run_date, flowcell_id in flowcell_list:
-        cell = flowcells[flowcell_id]
-        if flowcell_gone(cell):
-            continue
-        for l in range(1,9):
-            lane = 'lane_%d' % (l)
-            cell_library = cell['%s_library'%(lane)]
-            fields = {
-              'date': cell['run_date'].strftime('%y-%b-%d'),
-              'id': cell['flowcell_id'],
-              'lane': l,
-              'library_name': cell_library['library_name'],
-              'library_id': cell['%s_library_id'%(lane)],
-              'species': cell_library['library_species']['scientific_name'],
-              'status': format_status(cell['flowcell_status']),
-            }
-            report.append(line % (fields))
-    return os.linesep.join(report)
-
diff --git a/gaworkflow/util/makebed.py b/gaworkflow/util/makebed.py
deleted file mode 100755 (executable)
index 6f1511c..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Utility functions to make bedfiles.
-"""
-import os
-import re
-
-# map eland_result.txt sense 
-sense_map = { 'F': '+', 'R': '-'}
-sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
-
-def write_bed_header(outstream, name, description):
-  """
-  Produce the headerline for a bedfile
-  """
-  # provide default track names
-  if name is None: name = "track"
-  if description is None: description = "eland result file"
-  bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
-  bed_header += os.linesep
-  outstream.write(bed_header % (name, description))
-
-def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
-  """
-  read an eland result file from instream and write a bedfile to outstream
-  """
-  # indexes into fields in eland_result.txt file
-  SEQ = 1
-  CHR = 6
-  START = 7
-  SENSE = 8
-
-  write_bed_header(outstream, name, description)
-
-  for line in instream:
-    fields = line.split()
-    # we need more than the CHR field, and it needs to match a chromosome
-    if len(fields) <= CHR or \
-          (chromosome_prefix is not None and \
-             fields[CHR][:3] != chromosome_prefix):
-      continue
-    start = fields[START]
-    stop = int(start) + len(fields[SEQ])
-    chromosome, extension = fields[CHR].split('.')
-    assert extension == "fa"
-    outstream.write('%s %s %d read 0 %s - - %s%s' % (
-      chromosome,
-      start,
-      stop,
-      sense_map[fields[SENSE]], 
-      sense_color[fields[SENSE]],
-      os.linesep  
-    ))
-
-
-def make_bed_from_multi_eland_stream(
-  instream, 
-  outstream, 
-  name, 
-  description, 
-  chr_prefix='chr', 
-  max_reads=255
-  ):
-  """
-  read a multi eland stream and write a bedfile
-  """
-  write_bed_header(outstream, name, description)
-  parse_multi_eland(instream, outstream, chr_prefix, max_reads)
-
-def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
-
-  loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
-  other_pattern = '(?P<chr>[^:,]+)'
-  split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
-
-  for line in instream:
-    rec = line.split()
-    if len(rec) > 3:
-      # colony_id = rec[0]
-      seq = rec[1]
-      # number of matches for 0, 1, and 2 mismatches
-      # m0, m1, m2 = [int(x) for x in rec[2].split(':')]
-      compressed_reads = rec[3]
-      cur_chr = ""
-      reads = {0: [], 1: [], 2:[]}
-
-      for token in split_re.finditer(compressed_reads):
-        if token.group('chr') is not None:
-          cur_chr =  token.group('chr')[:-3] # strip off .fa
-        elif token.group('fullloc') is not None:
-          matches = int(token.group('count'))
-          # only emit a bed line if 
-          #  our current chromosome starts with chromosome pattern
-          if chr_prefix is None or cur_chr.startswith(chr_prefix):
-            start = int(token.group('start'))
-            stop = start + len(seq)
-            orientation = token.group('dir')
-            strand = sense_map[orientation]
-            color = sense_color[orientation]
-            # build up list of reads for this record
-            reads[matches].append((cur_chr, start, stop, strand, color))
-
-      # report up to our max_read threshold reporting the fewer-mismatch
-      # matches first
-      reported_reads = 0
-      keys = [0,1,2]
-      for mismatch, read_list in ((k, reads[k]) for k in keys): 
-        reported_reads += len(read_list)
-        if reported_reads <= max_reads:
-          for cur_chr, start, stop, strand, color in read_list:
-            reported_reads += 1
-            outstream.write('%s %d %d read 0 %s - - %s%s' % (
-                cur_chr,
-                start,
-                stop,
-                sense_map[orientation],
-                sense_color[orientation],
-                os.linesep
-            ))
-
-def make_description(database, flowcell_id, lane):
-    """
-    compute a bedfile name and description from the fctracker database
-    """
-    from gaworkflow.util.fctracker import fctracker
-
-    fc = fctracker(database)
-    cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
-    if len(cells) != 1:
-      raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
-    lane = int(lane)
-    if lane < 1 or lane > 8:
-      raise RuntimeError("flowcells only have lanes 1-8")
-
-    name = "%s-%s" % (flowcell_id, lane)
-
-    cell_id, cell = cells.items()[0]
-    assert cell_id == flowcell_id
-
-    cell_library_id = cell['lane_%d_library_id' %(lane,)]
-    cell_library = cell['lane_%d_library' %(lane,)]
-    description = "%s-%s" % (cell_library['library_name'], cell_library_id)
-    return name, description
diff --git a/gaworkflow/util/mount.py b/gaworkflow/util/mount.py
deleted file mode 100644 (file)
index 75dbe0a..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-"""
-Utilities for working with unix-style mounts.
-"""
-import os
-import subprocess
-
-def list_mount_points():
-    """
-    Return list of current mount points
-
-    Note: unix-like OS specific
-    """
-    mount_points = []
-    likely_locations = ['/sbin/mount', '/bin/mount']
-    for mount in likely_locations:
-        if os.path.exists(mount):
-            p = subprocess.Popen(mount, stdout=subprocess.PIPE)
-            p.wait()
-            for l in p.stdout.readlines():
-                rec = l.split()
-                device = rec[0]            
-                mount_point = rec[2]
-                assert rec[1] == 'on'
-                # looking at the output of mount on linux, osx, and 
-                # sunos, the first 3 elements are always the same
-                # devicename on path
-                # everything after that displays the attributes
-                # of the mount points in wildly differing formats
-                mount_points.append(mount_point)
-            return mount_points
-    else:
-        raise RuntimeError("Couldn't find a mount executable")
-
-def is_mounted(point_to_check):
-    """
-    Return true if argument exactly matches a current mount point.
-    """
-    for mount_point in list_mount_points():
-        if point_to_check == mount_point:
-            return True
-    else:
-        return False
-
-def find_mount_point_for(pathname):
-    """
-    Find the deepest mount point pathname is located on
-    """
-    realpath = os.path.realpath(pathname)
-    mount_points = list_mount_points()
-
-    prefixes = set()
-    for current_mount in mount_points:
-        cp = os.path.commonprefix([current_mount, realpath])
-        prefixes.add((len(cp), cp))
-
-    prefixes = list(prefixes)
-    prefixes.sort()
-    if len(prefixes) == 0:
-        return None
-    else:
-        print prefixes
-        # return longest common prefix
-        return prefixes[-1][1]
-
-
diff --git a/gaworkflow/util/opener.py b/gaworkflow/util/opener.py
deleted file mode 100644 (file)
index 035bb24..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Helpful utilities for turning random names/objects into streams.
-"""
-import os
-import gzip
-import bz2
-import types
-import urllib2
-
-def isfilelike(file_ref, mode):
-    """Does file_ref have the core file operations?
-    """
-    # if mode is w/a check to make sure we writeable ops
-    # but always check to see if we can read
-    read_operations = ['read', 'readline', 'readlines']
-    write_operations = [ 'write', 'writelines' ]
-    #random_operations = [ 'seek', 'tell' ]
-    if mode[0] in ('w', 'a'):
-        for o in write_operations:
-            if not hasattr(file_ref, o):
-                return False
-    for o in read_operations:
-        if not hasattr(file_ref, o):
-            return False
-          
-    return True
-
-def isurllike(file_ref, mode):
-    """
-    does file_ref look like a url?
-    (AKA does it start with protocol:// ?)
-    """
-    #what if mode is 'w'?
-    parsed = urllib2.urlparse.urlparse(file_ref)
-    schema, netloc, path, params, query, fragment = parsed
-    
-    return len(schema) > 0
-
-def autoopen(file_ref, mode='r'):
-    """
-    Attempt to intelligently turn file_ref into a readable stream
-    """
-    # catch being passed a file
-    if type(file_ref) is types.FileType:
-        return file_ref
-    # does it look like a file?
-    elif isfilelike(file_ref, mode):
-        return file_ref
-    elif isurllike(file_ref, mode):
-        return urllib2.urlopen(file_ref)
-    elif os.path.splitext(file_ref)[1] == ".gz":
-        return gzip.open(file_ref, mode)
-    elif os.path.splitext(file_ref)[1] == '.bz2':
-        return bz2.BZ2File(file_ref, mode)
-    else:
-        return open(file_ref,mode)
-
diff --git a/gaworkflow/util/queuecommands.py b/gaworkflow/util/queuecommands.py
deleted file mode 100644 (file)
index 78728ae..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-"""
-Run up to N simultanous jobs from provided of commands 
-"""
-
-import logging
-from subprocess import PIPE
-import subprocess
-import select
-import sys
-import time
-
-class QueueCommands(object):
-    """
-    Queue up N commands from cmd_list, launching more jobs as the first
-    finish.
-    """
-
-    def __init__(self, cmd_list, N=0, cwd=None):
-        """
-        cmd_list is a list of elements suitable for subprocess
-        N is the  number of simultanious processes to run. 
-        0 is all of them.
-        
-        WARNING: this will not work on windows
-        (It depends on being able to pass local file descriptors to the 
-        select call with isn't supported by the Win32 API)
-        """
-        self.to_run = cmd_list[:]
-        self.running = {}
-        self.N = N
-        self.cwd = cwd
-
-    def under_process_limit(self):
-        """
-        are we still under the total number of allowable jobs?
-        """
-        if self.N == 0:
-            return True
-
-        if len(self.running) < self.N:
-            return True
-
-        return False
-
-    def start_jobs(self):
-        """
-        Launch jobs until we have the maximum allowable running
-        (or have run out of jobs)
-        """
-        queue_log = logging.getLogger('queue')
-        queue_log.info('using %s as cwd' % (self.cwd,))
-
-        while (len(self.to_run) > 0) and self.under_process_limit():
-            queue_log.info('%d left to run', len(self.to_run))
-            cmd = self.to_run.pop(0)
-            p = subprocess.Popen(cmd, stdout=PIPE, cwd=self.cwd, shell=True)
-            self.running[p.stdout] = p
-            queue_log.info("Created process %d from %s" % (p.pid, str(cmd)))
-
-    def run(self):
-        """
-        run up to N jobs until we run out of jobs
-        """
-        queue_log = logging.getLogger('queue')
-
-        # to_run slowly gets consumed by start_jobs
-        while len(self.to_run) > 0 or len(self.running) > 0:
-            # fill any empty spots in our job queue
-            self.start_jobs()
-
-            # build a list of file descriptors
-            # fds=file desciptors
-            fds = [ x.stdout for x in self.running.values()]
-
-            # wait for something to finish
-            # wl= write list, xl=exception list (not used so get bad names)
-            read_list, wl, xl = select.select(fds, [], fds)
-        
-            # for everything that might have finished...
-            for pending_fd in read_list:
-                pending = self.running[pending_fd]
-                # if it really did finish, remove it from running jobs
-                if pending.poll() is not None:
-                    queue_log.info("Process %d finished [%d]",
-                                   pending.pid, pending.returncode)
-                    del self.running[pending_fd]
-            time.sleep(1)
diff --git a/gaworkflow/util/test/test_ethelp.py b/gaworkflow/util/test/test_ethelp.py
deleted file mode 100644 (file)
index 98df518..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-import unittest
-
-try:
-  from xml.etree import ElementTree
-except ImportError, e:
-  from elementtree import ElementTree
-
-from gaworkflow.util.ethelp import indent, flatten
-
-class testETHelper(unittest.TestCase):
-    def setUp(self):
-        self.foo = '<foo><bar>asdf</bar><br/></foo>'
-        self.foo_tree = ElementTree.fromstring(self.foo)
-
-    def test_indent(self):
-        flat_foo = ElementTree.tostring(self.foo_tree)
-        self.failUnlessEqual(len(flat_foo.split('\n')), 1)
-
-        indent(self.foo_tree)
-        pretty_foo = ElementTree.tostring(self.foo_tree)
-        self.failUnlessEqual(len(pretty_foo.split('\n')), 5)
-
-    def test_flatten(self):
-        self.failUnless(flatten(self.foo_tree), 'asdf')
-
-def suite():
-    return unittest.makeSuite(testETHelper, 'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest='suite')
-
-
-
-
diff --git a/gaworkflow/util/test/test_makebed.py b/gaworkflow/util/test/test_makebed.py
deleted file mode 100644 (file)
index e96f29b..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-from StringIO import StringIO
-import unittest
-
-from gaworkflow.util import makebed
-
-class testMakeBed(unittest.TestCase):
-    def test_multi_1_0_0_limit_1(self):
-      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
-      out = StringIO()
-
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
-
-    def test_multi_1_0_0_limit_255(self):
-      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
-      out = StringIO()
-
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
-
-
-    def test_multi_2_0_0_limit_1(self):
-      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
-      out = StringIO()
-
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.len, 0)
-
-    def test_multi_2_0_0_limit_255(self):
-      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
-      out = StringIO()
-
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.len, 98)
-
-    def test_multi_0_2_0_limit_1(self):
-      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495    TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG        0:2:1   mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
-      out = StringIO()
-
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      print out.getvalue()
-      self.failUnlessEqual(out.len, 0)
-
-def suite():
-    return unittest.makeSuite(testMakeBed, 'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest='suite')
-
-
diff --git a/gaworkflow/util/test/test_queuecommands.py b/gaworkflow/util/test/test_queuecommands.py
deleted file mode 100644 (file)
index 6c54c04..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import logging
-import time
-import unittest
-
-
-from gaworkflow.util.queuecommands import QueueCommands
-
-class testQueueCommands(unittest.TestCase):
-    def setUp(self):
-        logging.basicConfig(level=logging.DEBUG,
-                            format='%(asctime)s %(name)-8s %(message)s')
-
-       
-
-    def test_unlimited_run(self):
-        """
-        Run everything at once
-        """
-        cmds = ['/bin/sleep 0',
-                '/bin/sleep 1',
-                '/bin/sleep 2',]
-
-        q = QueueCommands(cmds)
-        start = time.time()
-        q.run()
-        end = time.time()-start
-        # we should only take the length of the longest sleep
-        self.failUnless( end > 1.9 and end < 2.1,
-                         "took %s seconds, exected ~5" % (end,))
-
-    def test_limited_run(self):
-        """
-        Run a limited number of jobs
-        """
-        cmds = ['/bin/sleep 1',
-                '/bin/sleep 2',
-                '/bin/sleep 3',]
-
-        q = QueueCommands(cmds, 2)
-
-        start = time.time()
-        q.run()
-        end = time.time()-start
-        self.failUnless( end > 3.9 and end < 4.1,
-                         "took %s seconds, expected ~6" % (end,)) 
-
-def suite():
-    return unittest.makeSuite(testQueueCommands, 'test')
-
-if __name__ == "__main__":
-    unittest.main(defaultTest='suite')
-
-
-
-
diff --git a/htsworkflow/__init__.py b/htsworkflow/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/automation/__init__.py b/htsworkflow/automation/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/automation/copier.py b/htsworkflow/automation/copier.py
new file mode 100644 (file)
index 0000000..572cfb3
--- /dev/null
@@ -0,0 +1,245 @@
+import ConfigParser
+import copy
+import logging
+import logging.handlers
+import os
+import re
+import subprocess
+import sys
+import time
+import traceback
+
+from benderjab import rpc
+
+def runfolder_validate(fname):
+    """
+    Return True if fname looks like a runfolder name
+    """
+    if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
+        return True
+    else:
+        return False
+    
+class rsync(object):
+  def __init__(self, source, dest, pwfile):
+    self.pwfile = os.path.expanduser(pwfile)
+    self.cmd = ['/usr/bin/rsync', ]
+    self.cmd.append('--password-file=%s' % (self.pwfile))
+    self.source_base = source
+    self.dest_base = dest
+    self.processes = {}
+    self.exit_code = None
+
+  def list(self):
+    """Get a directory listing"""
+    args = copy.copy(self.cmd)
+    args.append(self.source_base)
+
+    logging.debug("Rsync cmd:" + " ".join(args))
+    short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+    return self.list_filter(short_process.stdout)
+
+  def list_filter(self, lines):
+    """
+    parse rsync directory listing
+    """
+    dirs_to_copy = []
+    direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
+    for permissions, size, filedate, filetime, filename in direntries:
+      if permissions[0] == 'd':
+        # hey its a directory, the first step to being something we want to 
+        # copy
+        if re.match("[0-9]{6}", filename):
+          # it starts with something that looks like a 6 digit date
+          # aka good enough for me
+          dirs_to_copy.append(filename)
+    return dirs_to_copy
+
+  def create_copy_process(self, dirname):
+    args = copy.copy(self.cmd)
+    # we want to copy everything
+    args.append('-rlt') 
+    # from here
+    args.append(os.path.join(self.source_base, dirname))
+    # to here
+    args.append(self.dest_base)
+    logging.debug("Rsync cmd:" + " ".join(args))
+    return subprocess.Popen(args)
+  def copy(self):
+    """
+    copy any interesting looking directories over
+    return list of items that we started copying.
+    """
+    # clean up any lingering non-running processes
+    self.poll()
+    
+    # what's available to copy?
+    dirs_to_copy = self.list()
+    
+    # lets start copying
+    started = []
+    for d in dirs_to_copy:
+      process = self.processes.get(d, None)
+      
+      if process is None:
+        # we don't have a process, so make one
+        logging.info("rsyncing %s" % (d))
+        self.processes[d] = self.create_copy_process(d)
+        started.append(d)           
+    return started
+      
+  def poll(self):
+      """
+      check currently running processes to see if they're done
+      
+      return path roots that have finished.
+      """
+      for dir_key, proc_value in self.processes.items():
+          retcode = proc_value.poll()
+          if retcode is None:
+              # process hasn't finished yet
+              pass
+          elif retcode == 0:
+              logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
+              del self.processes[dir_key]
+          else:
+              logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
+              
+  def __len__(self):
+      """
+      Return how many active rsync processes we currently have
+      
+      Call poll first to close finished processes.
+      """
+      return len(self.processes)
+  
+  def keys(self):
+      """
+      Return list of current run folder names
+      """
+      return self.processes.keys()
+
+class CopierBot(rpc.XmlRpcBot):
+    def __init__(self, section=None, configfile=None):
+        #if configfile is None:
+        #    configfile = '~/.htsworkflow'
+            
+        super(CopierBot, self).__init__(section, configfile)
+        
+        # options for rsync command
+        self.cfg['rsync_password_file'] = None
+        self.cfg['rsync_source'] = None
+        self.cfg['rsync_destination'] = None 
+        
+        # options for reporting we're done 
+        self.cfg['notify_users'] = None
+        self.cfg['notify_runner'] = None
+                            
+        self.pending = []
+        self.rsync = None
+        self.notify_users = None
+        self.notify_runner = None
+        
+        self.register_function(self.startCopy)
+        self.register_function(self.sequencingFinished)
+        self.eventTasks.append(self.update)
+        
+    def read_config(self, section=None, configfile=None):
+        """
+        read the config file
+        """
+        super(CopierBot, self).read_config(section, configfile)
+        
+        password = self._check_required_option('rsync_password_file')
+        source = self._check_required_option('rsync_source')
+        destination = self._check_required_option('rsync_destination')
+        self.rsync = rsync(source, destination, password)
+        
+        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+        try:
+          self.notify_runner = \
+             self._parse_user_list(self.cfg['notify_runner'],
+                                   require_resource=True)
+        except bot.JIDMissingResource:
+            msg = 'need a full jabber ID + resource for xml-rpc destinations'
+            logging.FATAL(msg)
+            raise bot.JIDMissingResource(msg)
+
+    def startCopy(self, *args):
+        """
+        start our copy
+        """
+        logging.info("starting copy scan")
+        started = self.rsync.copy()
+        logging.info("copying:" + " ".join(started)+".")
+        return started
+        
+    def sequencingFinished(self, runDir, *args):
+        """
+        The run was finished, if we're done copying, pass the message on        
+        """
+        # close any open processes
+        self.rsync.poll()
+        
+        # see if we're still copying
+        if runfolder_validate(runDir):
+            logging.info("recevied sequencing finshed for %s" % (runDir))
+            self.pending.append(runDir)
+            self.startCopy()
+            return "PENDING"
+        else:
+            errmsg = "received bad runfolder name (%s)" % (runDir)
+            logging.warning(errmsg)
+            # maybe I should use a different error message
+            raise RuntimeError(errmsg)
+    
+    def reportSequencingFinished(self, runDir):
+        """
+        Send the sequencingFinished message to the interested parties
+        """
+        if self.notify_users is not None:
+            for u in self.notify_users:
+                self.send(u, 'Sequencing run %s finished' % (runDir))
+        if self.notify_runner is not None:
+            for r in self.notify_runner:
+                self.rpc_send(r, (runDir,), 'sequencingFinished')
+        logging.info("forwarding sequencingFinshed message for %s" % (runDir))
+        
+    def update(self, *args):
+        """
+        Update our current status.
+        Report if we've finished copying files.
+        """
+        self.rsync.poll()
+        for p in self.pending:
+            if p not in self.rsync.keys():
+                self.reportSequencingFinished(p)
+                self.pending.remove(p)
+        
+    def _parser(self, msg, who):
+        """
+        Parse xmpp chat messages
+        """
+        help = u"I can [copy], or report current [status]"
+        if re.match(u"help", msg):
+            reply = help
+        elif re.match("copy", msg):            
+            started = self.startCopy()
+            reply = u"started copying " + ", ".join(started)
+        elif re.match(u"status", msg):
+            msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
+            for d in self.rsync.keys():
+              msg.append(u"  " + d)
+            reply = os.linesep.join(msg)
+        else:
+            reply = u"I didn't understand '%s'" % (unicode(msg))
+        return reply
+
+def main(args=None):
+    bot = CopierBot()
+    bot.main(args)
+    
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
+
diff --git a/htsworkflow/automation/runner.py b/htsworkflow/automation/runner.py
new file mode 100644 (file)
index 0000000..9c26940
--- /dev/null
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+from glob import glob
+import logging
+import os
+import re
+import sys
+import time
+import threading
+
+from benderjab import rpc
+
+from htsworkflow.pipeline.configure_run import *
+
+#s_fc = re.compile('FC[0-9]+')
+s_fc = re.compile('_[0-9a-zA-Z]*$')
+
+
+def _get_flowcell_from_rundir(run_dir):
+    """
+    Returns flowcell string based on run_dir.
+    Returns None and logs error if flowcell can't be found.
+    """
+    junk, dirname = os.path.split(run_dir)
+    mo = s_fc.search(dirname)
+    if not mo:
+        logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
+        return None
+
+    return dirname[mo.start()+1:]
+    
+
+
+class Runner(rpc.XmlRpcBot):
+    """
+    Manage running pipeline jobs.
+    """    
+    def __init__(self, section=None, configfile=None):
+        #if configfile is None:
+        #    self.configfile = "~/.htsworkflow"
+        super(Runner, self).__init__(section, configfile)
+        
+        self.cfg['notify_users'] = None
+        self.cfg['genome_dir'] = None
+        self.cfg['base_analysis_dir'] = None
+
+        self.cfg['notify_users'] = None
+        self.cfg['notify_postanalysis'] = None
+
+        self.conf_info_dict = {}
+        
+        self.register_function(self.sequencingFinished)
+        #self.eventTasks.append(self.update)
+
+    
+    def read_config(self, section=None, configfile=None):
+        super(Runner, self).read_config(section, configfile)
+
+        self.genome_dir = self._check_required_option('genome_dir')
+        self.base_analysis_dir = self._check_required_option('base_analysis_dir')
+
+        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+        #FIXME: process notify_postpipeline cfg
+        
+    
+    def _parser(self, msg, who):
+        """
+        Parse xmpp chat messages
+        """
+        help = u"I can send [start] a run, or report [status]"
+        if re.match(u"help", msg):
+            reply = help
+        elif re.match("status", msg):
+            words = msg.split()
+            if len(words) == 2:
+                reply = self.getStatusReport(words[1])
+            else:
+                reply = u"Status available for: %s" \
+                        % (', '.join([k for k in self.conf_info_dict.keys()]))
+        elif re.match(u"start", msg):
+            words = msg.split()
+            if len(words) == 2:
+                self.sequencingFinished(words[1])
+                reply = u"starting run for %s" % (words[1])
+            else:
+                reply = u"need runfolder name"
+        else:
+            reply = u"I didn't understand '%s'" %(msg)
+
+        logging.debug("reply: " + str(reply))
+        return reply
+
+
+    def getStatusReport(self, fc_num):
+        """
+        Returns text status report for flow cell number 
+        """
+        if fc_num not in self.conf_info_dict:
+            return "No record of a %s run." % (fc_num)
+
+        status = self.conf_info_dict[fc_num].status
+
+        if status is None:
+            return "No status information for %s yet." \
+                   " Probably still in configure step. Try again later." % (fc_num)
+
+        output = status.statusReport()
+
+        return '\n'.join(output)
+    
+            
+    def sequencingFinished(self, run_dir):
+        """
+        Sequenceing (and copying) is finished, time to start pipeline
+        """
+        logging.debug("received sequencing finished message")
+
+        # Setup config info object
+        ci = ConfigInfo()
+        ci.base_analysis_dir = self.base_analysis_dir
+        ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)        
+
+        # get flowcell from run_dir name
+        flowcell = _get_flowcell_from_rundir(run_dir)
+
+        # Store ci object in dictionary
+        self.conf_info_dict[flowcell] = ci
+
+
+        # Launch the job in it's own thread and turn.
+        self.launchJob(run_dir, flowcell, ci)
+        return "started"
+        
+        
+    def pipelineFinished(self, run_dir):
+        # need to strip off self.watch_dir from rundir I suspect.
+        logging.info("pipeline finished in" + str(run_dir))
+        #pattern = self.watch_dir
+        #if pattern[-1] != os.path.sep:
+        #    pattern += os.path.sep
+        #stripped_run_dir = re.sub(pattern, "", run_dir)
+        #logging.debug("stripped to " + stripped_run_dir)
+
+        # Notify each user that the run has finished.
+        if self.notify_users is not None:
+            for u in self.notify_users:
+                self.send(u, 'Pipeline run %s finished' % (run_dir))
+                
+        #if self.notify_runner is not None:
+        #    for r in self.notify_runner:
+        #        self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+
+    def reportMsg(self, msg):
+
+        if self.notify_users is not None:
+            for u in self.notify_users:
+                self.send(u, msg)
+
+
+    def _runner(self, run_dir, flowcell, conf_info):
+
+        # retrieve config step
+        cfg_filepath = os.path.join(conf_info.analysis_dir,
+                                    'config32auto.txt')
+        status_retrieve_cfg = retrieve_config(conf_info,
+                                          flowcell,
+                                          cfg_filepath,
+                                          self.genome_dir)
+        if status_retrieve_cfg:
+            logging.info("Runner: Retrieve config: success")
+            self.reportMsg("Retrieve config (%s): success" % (run_dir))
+        else:
+            logging.error("Runner: Retrieve config: failed")
+            self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
+
+        
+        # configure step
+        if status_retrieve_cfg:
+            status = configure(conf_info)
+            if status:
+                logging.info("Runner: Configure: success")
+                self.reportMsg("Configure (%s): success" % (run_dir))
+                self.reportMsg(
+                    os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
+                )
+            else:
+                logging.error("Runner: Configure: failed")
+                self.reportMsg("Configure (%s): FAILED" % (run_dir))
+
+            #if successful, continue
+            if status:
+                # Setup status cmdline status monitor
+                #startCmdLineStatusMonitor(ci)
+                
+                # running step
+                print 'Running pipeline now!'
+                run_status = run_pipeline(conf_info)
+                if run_status is True:
+                    logging.info('Runner: Pipeline: success')
+                    self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
+                else:
+                    logging.info('Runner: Pipeline: failed')
+                    self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
+
+
+    def launchJob(self, run_dir, flowcell, conf_info):
+        """
+        Starts up a thread for running the pipeline
+        """
+        t = threading.Thread(target=self._runner,
+                        args=[run_dir, flowcell, conf_info])
+        t.setDaemon(True)
+        t.start()
+        
+
+        
+def main(args=None):
+    bot = Runner()
+    return bot.main(args)
+    
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+    
diff --git a/htsworkflow/automation/spoolwatcher.py b/htsworkflow/automation/spoolwatcher.py
new file mode 100644 (file)
index 0000000..2a57535
--- /dev/null
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+import logging
+import os
+import re
+import sys
+import time
+#import glob
+
+from htsworkflow.util import mount
+
+# this uses pyinotify
+import pyinotify
+from pyinotify import EventsCodes
+
+from benderjab import rpc
+
+
+class WatcherEvents(object):
+    # two events need to be tracked
+    # one to send startCopy
+    # one to send OMG its broken
+    # OMG its broken needs to stop when we've seen enough
+    #  cycles
+    # this should be per runfolder. 
+    # read the xml files 
+    def __init__(self):
+        pass
+        
+
+class Handler(pyinotify.ProcessEvent):
+    def __init__(self, watchmanager, bot):
+        self.last_event_time = None
+        self.watchmanager = watchmanager
+        self.bot = bot
+
+    def process_IN_CREATE(self, event):
+        self.last_event_time = time.time()
+        msg = "Create: %s" %  os.path.join(event.path, event.name)
+        if event.name.lower() == "run.completed":
+            try:
+                self.bot.sequencingFinished(event.path)
+            except IOError, e:
+                logging.error("Couldn't send sequencingFinished")
+        logging.debug(msg)
+
+    def process_IN_DELETE(self, event):
+        logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
+
+    def process_IN_UNMOUNT(self, event):
+        pathname = os.path.join(event.path, event.name)
+        logging.debug("IN_UNMOUNT: %s" % (pathname,))
+        self.bot.unmount_watch()
+
+class SpoolWatcher(rpc.XmlRpcBot):
+    """
+    Watch a directory and send a message when another process is done writing.
+    
+    This monitors a directory tree using inotify (linux specific) and
+    after some files having been written will send a message after <timeout>
+    seconds of no file writing.
+    
+    (Basically when the solexa machine finishes dumping a round of data
+    this'll hopefully send out a message saying hey look theres data available
+    
+    """
+    # these params need to be in the config file
+    # I wonder where I should put the documentation
+    #:Parameters:
+    #    `watchdir` - which directory tree to monitor for modifications
+    #    `profile` - specify which .htsworkflow profile to use
+    #    `write_timeout` - how many seconds to wait for writes to finish to
+    #                      the spool
+    #    `notify_timeout` - how often to timeout from notify
+    
+    def __init__(self, section=None, configfile=None):
+        #if configfile is None:
+        #    self.configfile = "~/.htsworkflow"
+        super(SpoolWatcher, self).__init__(section, configfile)
+        
+        self.cfg['watchdir'] = None
+        self.cfg['write_timeout'] = 10
+        self.cfg['notify_users'] = None
+        self.cfg['notify_runner'] = None
+        
+        self.notify_timeout = 0.001
+        self.wm = pyinotify.WatchManager()
+        self.handler = Handler(self.wm, self)
+        self.notifier = pyinotify.Notifier(self.wm, self.handler)
+        self.wdd = None
+        self.mount_point = None
+        self.mounted = True
+        
+        self.notify_users = None
+        self.notify_runner = None
+        
+        self.eventTasks.append(self.process_notify)
+
+    def read_config(self, section=None, configfile=None):
+        super(SpoolWatcher, self).read_config(section, configfile)
+        
+        self.watch_dir = self._check_required_option('watchdir')
+        self.write_timeout = int(self.cfg['write_timeout'])
+        
+        self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+        try:
+          self.notify_runner = \
+             self._parse_user_list(self.cfg['notify_runner'],
+                                   require_resource=True)
+        except bot.JIDMissingResource:
+            msg = 'need a full jabber ID + resource for xml-rpc destinations'
+            logging.FATAL(msg)
+            raise bot.JIDMissingResource(msg)
+
+    def add_watch(self, watchdir=None):
+        """
+        start watching watchdir or self.watch_dir
+        we're currently limited to watching one directory tree.
+        """
+        # the one tree limit is mostly because self.wdd is a single item
+        # but managing it as a list might be a bit more annoying
+        if watchdir is None:
+            watchdir = self.watch_dir
+        logging.info("Watching:"+str(watchdir))
+
+        self.mount_point = mount.find_mount_point_for(watchdir)
+
+        mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
+        # rec traverses the tree and adds all the directories that are there
+        # at the start.
+        # auto_add will add in new directories as they are created
+        self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
+
+    def unmount_watch(self):
+        if self.wdd is not None:
+            self.wm.rm_watch(self.wdd.values())
+            self.wdd = None
+            self.mounted = False
+            
+    def process_notify(self, *args):
+        # process the queue of events as explained above
+        self.notifier.process_events()
+        #check events waits timeout
+        if self.notifier.check_events(self.notify_timeout):
+            # read notified events and enqeue them
+            self.notifier.read_events()
+            # should we do something?
+        # has something happened?
+        last_event_time = self.handler.last_event_time
+        if last_event_time is not None:
+            time_delta = time.time() - last_event_time
+            if time_delta > self.write_timeout:
+                self.startCopy()
+                self.handler.last_event_time = None
+        # handle unmounted filesystems
+        if not self.mounted:
+            if mount.is_mounted(self.mount_point):
+                # we've been remounted. Huzzah!
+                # restart the watch
+                self.add_watch()
+                self.mounted = True
+                logging.info(
+                    "%s was remounted, restarting watch" % \
+                        (self.mount_point)
+                )
+
+    def _parser(self, msg, who):
+        """
+        Parse xmpp chat messages
+        """
+        help = u"I can send [copy] message, or squencer [finished]"
+        if re.match(u"help", msg):
+            reply = help
+        elif re.match("copy", msg):            
+            self.startCopy()
+            reply = u"sent copy message"
+        elif re.match(u"finished", msg):
+            words = msg.split()
+            if len(words) == 2:
+                self.sequencingFinished(words[1])
+                reply = u"sending sequencing finished for %s" % (words[1])
+            else:
+                reply = u"need runfolder name"
+        else:
+            reply = u"I didn't understand '%s'" %(msg)            
+        return reply
+        
+    def start(self, daemonize):
+        """
+        Start application
+        """
+        self.add_watch()
+        super(SpoolWatcher, self).start(daemonize)
+        
+    def stop(self):
+        """
+        shutdown application
+        """
+        # destroy the inotify's instance on this interrupt (stop monitoring)
+        self.notifier.stop()
+        super(SpoolWatcher, self).stop()
+    
+    def startCopy(self):
+        logging.debug("writes seem to have stopped")
+        if self.notify_runner is not None:
+            for r in self.notify_runner:
+                self.rpc_send(r, tuple(), 'startCopy')
+        
+    def sequencingFinished(self, run_dir):
+        # need to strip off self.watch_dir from rundir I suspect.
+        logging.info("run.completed in " + str(run_dir))
+        pattern = self.watch_dir
+        if pattern[-1] != os.path.sep:
+            pattern += os.path.sep
+        stripped_run_dir = re.sub(pattern, "", run_dir)
+        logging.debug("stripped to " + stripped_run_dir)
+        if self.notify_users is not None:
+            for u in self.notify_users:
+                self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
+        if self.notify_runner is not None:
+            for r in self.notify_runner:
+                self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+        
+def main(args=None):
+    bot = SpoolWatcher()
+    return bot.main(args)
+    
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+
diff --git a/htsworkflow/frontend/__init__.py b/htsworkflow/frontend/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/frontend/eland_config/__init__.py b/htsworkflow/frontend/eland_config/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/frontend/eland_config/forms.py b/htsworkflow/frontend/eland_config/forms.py
new file mode 100644 (file)
index 0000000..2488359
--- /dev/null
@@ -0,0 +1,163 @@
+from django import newforms as forms
+from django.newforms.util import ErrorList
+
+
+SPECIES_LIST = [#('--choose--', '--Choose--'),
+                ('hg18', 'Homo sapiens (Hg18)'),
+                ('Mm8', 'Mus musculus (Mm8)'),
+                ('arabv6', 'Arabadopsis Thaliana v6'),
+                ('other', 'Other species (Include in description)')]
+
+
+class DivErrorList(ErrorList):
+  def __unicode__(self):
+    return self.as_divs()
+  
+  def as_divs(self):
+    if not self: return u''
+    return u'<div class="errorlist">%s</div>' % (''.join([u'<div class="error">%s</div>' % e for e in self]))
+
+
+
+class ConfigForm(forms.Form):
+  
+  flow_cell_number = forms.CharField(min_length=2)
+  run_date = forms.DateTimeField()
+  advanced_run = forms.BooleanField(required=False)
+  read_length = forms.IntegerField(min_value=1, initial=32)
+  #eland_repeat = forms.BooleanField()
+  
+  #needs a for loop or something to allow for n configurations
+  #analysis_type = forms.ChoiceField(choices=[('eland','eland')])
+  lane1_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane1_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane2_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane2_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane3_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane3_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane4_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane4_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane5_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane5_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane6_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane6_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane7_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane7_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  lane8_species = forms.ChoiceField(choices=SPECIES_LIST)
+  lane8_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+  
+  notes = forms.CharField(widget=forms.Textarea(attrs={'cols':'70'}), required=False)
+  
+  #lane_specific_read_length = forms.IntegerField(min_value=1)
+  
+  #eland_genome_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
+  #                                              ('lane2','2'),
+  #                                              ('lane3','3'),
+  #                                              ('lane4','4'),
+  #                                              ('lane5','5'),
+  #                                              ('lane6','6'),
+  #                                              ('lane7','7'),
+  #                                              ('lane8','8') ])
+  
+  #eland_genome = forms.ChoiceField(choices=)
+  
+  #use_bases_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
+  #                                              ('lane2','2'),
+  #                                              ('lane3','3'),
+  #                                              ('lane4','4'),
+  #                                              ('lane5','5'),
+  #                                              ('lane6','6'),
+  #                                              ('lane7','7'),
+  #                                              ('lane8','8') ])
+  
+  #use_bases_mask = forms.CharField()
+  
+  #sequence_format = forms.ChoiceField(choices=[('scarf', 'scarf')])
+  
+  
+  
+  #subject = forms.CharField(max_length=100)
+  #message = forms.CharField()
+  #sender = forms.EmailField()
+  #cc_myself = forms.BooleanField()
+  
+  def as_custom(self):
+    """
+    Displays customized html output
+    """
+    html = []
+    
+    fcn = self['flow_cell_number']
+    
+    html.append(fcn.label_tag() + ': ' + str(fcn) + str(fcn.errors) + '<br />')
+    
+    run_date = self['run_date']
+    html.append(run_date.label_tag() + ': ' + str(run_date) + str(run_date.errors) + '<br />')
+    
+    arun = self['advanced_run']
+    html.append(arun.label_tag() + ': ' + str(arun) + str(arun.errors) + '<br />')
+    
+    rl = self['read_length']
+    html.append(rl.label_tag() + ': ' + str(rl) + str(rl.errors) + '<br /><br />')
+    
+    html.append('<table border="0">')
+    html.append(' <tr><td>%s</td><td>%s</td><td>%s</td></tr>' \
+                % ('Lane', 'Species', 'Description'))
+    
+    l1s = self['lane1_species']
+    l1d = self['lane1_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('1', str(l1s), str(l1s.errors), str(l1d), str(l1d.errors)))
+    
+    l2s = self['lane2_species']
+    l2d = self['lane2_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('2', str(l2s), str(l2s.errors), str(l2d), str(l2d.errors)))
+    
+    l3s = self['lane3_species']
+    l3d = self['lane3_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('3', str(l3s), str(l3s.errors), str(l3d), str(l3d.errors)))
+    
+    l4s = self['lane4_species']
+    l4d = self['lane4_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('4', str(l4s), str(l4s.errors), str(l4d), str(l4d.errors)))
+    
+    l5s = self['lane5_species']
+    l5d = self['lane5_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('5', str(l5s), str(l5s.errors), str(l5d), str(l5d.errors)))
+    
+    l6s = self['lane6_species']
+    l6d = self['lane6_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('6', str(l6s), str(l6s.errors), str(l6d), str(l6d.errors)))
+    
+    l7s = self['lane7_species']
+    l7d = self['lane7_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('7', str(l7s), str(l7s.errors), str(l7d), str(l7d.errors)))
+    
+    l8s = self['lane8_species']
+    l8d = self['lane8_description']
+    html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+                % ('8', str(l8s), str(l8s.errors), str(l8d), str(l8d.errors)))
+    
+    html.append('</table><br />')
+    
+    notes = self['notes']
+    html.append('<p>Notes:</p>')
+    html.append(' %s<br />' % (str(notes)))
+    
+    return '\n'.join(html)
+    
+    
+    
\ No newline at end of file
diff --git a/htsworkflow/frontend/eland_config/models.py b/htsworkflow/frontend/eland_config/models.py
new file mode 100644 (file)
index 0000000..71a8362
--- /dev/null
@@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.
diff --git a/htsworkflow/frontend/eland_config/urls.py b/htsworkflow/frontend/eland_config/urls.py
new file mode 100644 (file)
index 0000000..129f57c
--- /dev/null
@@ -0,0 +1,10 @@
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns('',
+    # Example:
+    
+    (r'^(?P<flowcell>\w+)/$', 'htsworkflow.frontend.eland_config.views.config'),
+    (r'^$', 'htsworkflow.frontend.eland_config.views.config'),
+    #(r'^$', 'htsworkflow.frontend.eland_config.views.index')
+
+)
diff --git a/htsworkflow/frontend/eland_config/views.py b/htsworkflow/frontend/eland_config/views.py
new file mode 100644 (file)
index 0000000..25edea9
--- /dev/null
@@ -0,0 +1,413 @@
+from django.http import HttpResponse
+from django.shortcuts import render_to_response
+from django.core.exceptions import ObjectDoesNotExist
+
+from htsworkflow.frontend.eland_config import forms
+from htsworkflow.frontend import settings
+from htsworkflow.frontend.fctracker import models
+
+import os
+import glob
+# Create your views here.
+
+
+def _validate_input(data):
+  #if data.find('..') == -1 or data.find('/') == -1 or data.find('\\') == -1:
+  return data.replace('..', '').replace('/', '_').replace('\\', '_')
+
+#def contact(request):
+#    if request.method == 'POST':
+#        form = ContactForm(request.POST)
+#        if form.is_valid():
+#            # Do form processing here...
+#            return HttpResponseRedirect('/url/on_success/')
+#    else:
+#        form = ContactForm()
+#    return
+
+
+
+#def _saveConfigFile(form):
+#  """
+#  Given a valid form, save eland config to file based on flowcell number.
+#  """
+#  assert form.is_valid()
+#  
+#  clean_data = form.cleaned_data
+#  flowcell = clean_data['flow_cell_number'].replace('/','_').replace('..', '__')
+#  
+#  file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
+#  
+#  f = open(file_path, 'w')
+#  cfg = generateElandConfig(form)
+#  f.write(cfg)
+#  f.close()
+#  
+#
+#def _saveToDb(form):
+#  """
+#  Save info to the database.
+#  """
+#  clean_data = form.cleaned_data
+#  
+#  fc_id = clean_data['flow_cell_number']
+#  
+#  try:
+#    fc = models.FlowCell.objects.get(flowcell_id=fc_id)
+#  except models.FlowCell.DoesNotExist:
+#    fc = models.FlowCell()
+#    
+#  fc.flowcell_id = fc_id
+#  fc.run_date = clean_data['run_date']
+#  
+#  #LANE 1
+#  fc.lane1_sample = clean_data['lane1_description']
+#  species_name = clean_data['lane1_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane1_species = specie
+#  
+#  #LANE 2
+#  fc.lane2_sample = clean_data['lane2_description']
+#  species_name = clean_data['lane2_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane2_species = specie
+#  
+#  #LANE 3
+#  fc.lane3_sample = clean_data['lane3_description']
+#  species_name = clean_data['lane3_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane3_species = specie
+#  
+#  #LANE 4
+#  fc.lane4_sample = clean_data['lane4_description']
+#  species_name = clean_data['lane4_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane4_species = specie
+#  
+#  #LANE 5
+#  fc.lane5_sample = clean_data['lane5_description']
+#  species_name = clean_data['lane5_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane5_species = specie
+#  
+#  #LANE 6
+#  fc.lane6_sample = clean_data['lane6_description']
+#  species_name = clean_data['lane6_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane6_species = specie
+#  
+#  #LANE 7
+#  fc.lane7_sample = clean_data['lane7_description']
+#  species_name = clean_data['lane7_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane7_species = specie
+#  
+#  #LANE 8
+#  fc.lane8_sample = clean_data['lane8_description']
+#  species_name = clean_data['lane8_species']
+#  try:
+#    specie = models.Specie.objects.get(scientific_name=species_name)
+#  except models.Specie.DoesNotExist:
+#    specie = models.Specie(scientific_name=species_name)
+#    specie.save()
+#  fc.lane8_species = specie
+#  
+#  fc.notes = clean_data['notes']
+#  
+#  fc.save()
+#  
+#  return fc
+#  
+#
+#def generateElandConfig(form):
+#  data = []
+#  
+#  form = form.cleaned_data
+#  
+#  BASE_DIR = '/data-store01/compbio/genomes'
+#  
+#  data.append("# FLOWCELL: %s" % (form['flow_cell_number']))
+#  data.append("#")
+#  
+#  notes = form['notes'].replace('\r\n', '\n').replace('\r', '\n')
+#  notes = notes.replace('\n', '\n#  ')
+#  data.append("# NOTES:")
+#  data.append("#  %s\n#" % (notes))
+#  
+#  #Convert all newline conventions to unix style
+#  l1d = form['lane1_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l2d = form['lane2_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l3d = form['lane3_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l4d = form['lane4_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l5d = form['lane5_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l6d = form['lane6_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l7d = form['lane7_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  l8d = form['lane8_description'].replace('\r\n', '\n').replace('\r', '\n')
+#  
+#  # Turn new lines into indented commented newlines
+#  l1d = l1d.replace('\n', '\n#  ')
+#  l2d = l2d.replace('\n', '\n#  ')
+#  l3d = l3d.replace('\n', '\n#  ')
+#  l4d = l4d.replace('\n', '\n#  ')
+#  l5d = l5d.replace('\n', '\n#  ')
+#  l6d = l6d.replace('\n', '\n#  ')
+#  l7d = l7d.replace('\n', '\n#  ')
+#  l8d = l8d.replace('\n', '\n#  ')
+#  
+#  data.append("# Lane1: %s" % (l1d))
+#  data.append("# Lane2: %s" % (l2d))
+#  data.append("# Lane3: %s" % (l3d))
+#  data.append("# Lane4: %s" % (l4d))
+#  data.append("# Lane5: %s" % (l5d))
+#  data.append("# Lane6: %s" % (l6d))
+#  data.append("# Lane7: %s" % (l7d))
+#  data.append("# Lane8: %s" % (l8d))
+#  
+#  #data.append("GENOME_DIR %s" % (BASE_DIR))
+#  #data.append("CONTAM_DIR %s" % (BASE_DIR))
+#  read_length = form['read_length']
+#  data.append("READ_LENGTH %d" % (read_length))
+#  #data.append("ELAND_REPEAT")
+#  data.append("ELAND_MULTIPLE_INSTANCES 8")
+#  
+#  #Construct genome dictionary to figure out what lanes to put
+#  # in the config file.
+#  genome_dict = {}
+#  l1s = form['lane1_species']
+#  genome_dict.setdefault(l1s, []).append('1')
+#  l2s = form['lane2_species']
+#  genome_dict.setdefault(l2s, []).append('2')
+#  l3s = form['lane3_species']
+#  genome_dict.setdefault(l3s, []).append('3')
+#  l4s = form['lane4_species']
+#  genome_dict.setdefault(l4s, []).append('4')
+#  l5s = form['lane5_species']
+#  genome_dict.setdefault(l5s, []).append('5')
+#  l6s = form['lane6_species']
+#  genome_dict.setdefault(l6s, []).append('6')
+#  l7s = form['lane7_species']
+#  genome_dict.setdefault(l7s, []).append('7')
+#  l8s = form['lane8_species']
+#  genome_dict.setdefault(l8s, []).append('8')
+#  
+#  genome_list = genome_dict.keys()
+#  genome_list.sort()
+#  
+#  #Loop through and create entries for each species.
+#  for genome in genome_list:
+#    lanes = ''.join(genome_dict[genome])
+#    data.append('%s:ANALYSIS eland' % (lanes))
+#    data.append('%s:READ_LENGTH %s' % (lanes, read_length))
+#    data.append('%s:ELAND_GENOME %s' % (lanes, os.path.join(BASE_DIR, genome)))
+#    data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
+#    
+#  data.append('SEQUENCE_FORMAT --scarf')
+#  
+#  return '\n'.join(data)
+
+
+def getElandConfig(flowcell, regenerate=False):
+  
+  file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
+  
+  #If we are regenerating the config file, skip
+  # reading of existing file. If the file doesn't
+  # exist, try to generate it form the DB.
+  if not regenerate and os.path.isfile(file_path):
+    f = open(file_path, 'r')
+    data = f.read()
+    f.close()
+    return data
+  
+  try:
+    fcObj = models.FlowCell.objects.get(flowcell_id__iexact=flowcell)
+  except ObjectDoesNotExist:
+    return None
+  
+  data = []
+  
+  #form = form.cleaned_data
+  
+  BASE_DIR = '/data-store01/compbio/genomes'
+  
+  data.append("# FLOWCELL: %s" % (fcObj.flowcell_id))
+  data.append("#")
+  
+  notes = fcObj.notes.replace('\r\n', '\n').replace('\r', '\n')
+  notes = notes.replace('\n', '\n#  ')
+  data.append("# NOTES:")
+  data.append("#  %s\n#" % (notes))
+  
+  #Convert all newline conventions to unix style
+  l1d = str(fcObj.lane_1_library.library_id) + '|' \
+          + fcObj.lane_1_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l2d = str(fcObj.lane_2_library.library_id) + '|' \
+          + fcObj.lane_2_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l3d = str(fcObj.lane_3_library.library_id) + '|' \
+          + fcObj.lane_3_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l4d = str(fcObj.lane_4_library.library_id) + '|' \
+          + fcObj.lane_4_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  
+  l5d = str(fcObj.lane_5_library.library_id) + '|' \
+          + fcObj.lane_5_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l6d = str(fcObj.lane_6_library.library_id) + '|' \
+          + fcObj.lane_6_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l7d = str(fcObj.lane_7_library.library_id) + '|' \
+          + fcObj.lane_7_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  l8d = str(fcObj.lane_8_library.library_id) + '|' \
+          + fcObj.lane_8_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+  
+  # Turn new lines into indented commented newlines
+  l1d = l1d.replace('\n', '\n#  ')
+  l2d = l2d.replace('\n', '\n#  ')
+  l3d = l3d.replace('\n', '\n#  ')
+  l4d = l4d.replace('\n', '\n#  ')
+  l5d = l5d.replace('\n', '\n#  ')
+  l6d = l6d.replace('\n', '\n#  ')
+  l7d = l7d.replace('\n', '\n#  ')
+  l8d = l8d.replace('\n', '\n#  ')
+  
+  data.append("# Lane1: %s" % (l1d))
+  data.append("# Lane2: %s" % (l2d))
+  data.append("# Lane3: %s" % (l3d))
+  data.append("# Lane4: %s" % (l4d))
+  data.append("# Lane5: %s" % (l5d))
+  data.append("# Lane6: %s" % (l6d))
+  data.append("# Lane7: %s" % (l7d))
+  data.append("# Lane8: %s" % (l8d))
+  
+  #data.append("GENOME_DIR %s" % (BASE_DIR))
+  #data.append("CONTAM_DIR %s" % (BASE_DIR))
+  read_length = fcObj.read_length
+  data.append("READ_LENGTH %d" % (read_length))
+  #data.append("ELAND_REPEAT")
+  data.append("ELAND_MULTIPLE_INSTANCES 8")
+  
+  #Construct genome dictionary to figure out what lanes to put
+  # in the config file.
+  genome_dict = {}
+  
+  #l1s = form['lane1_species']
+  l1s = fcObj.lane_1_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_1_library.library_species.use_genome_build
+  genome_dict.setdefault(l1s, []).append('1')
+  l2s = fcObj.lane_2_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_2_library.library_species.use_genome_build
+  genome_dict.setdefault(l2s, []).append('2')
+  l3s = fcObj.lane_3_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_3_library.library_species.use_genome_build
+  genome_dict.setdefault(l3s, []).append('3')
+  l4s = fcObj.lane_4_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_4_library.library_species.use_genome_build
+  genome_dict.setdefault(l4s, []).append('4')
+  l5s = fcObj.lane_5_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_5_library.library_species.use_genome_build
+  genome_dict.setdefault(l5s, []).append('5')
+  l6s = fcObj.lane_6_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_6_library.library_species.use_genome_build
+  genome_dict.setdefault(l6s, []).append('6')
+  l7s = fcObj.lane_7_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_7_library.library_species.use_genome_build
+  genome_dict.setdefault(l7s, []).append('7')
+  l8s = fcObj.lane_8_library.library_species.scientific_name #+ '|' + \
+        #fcObj.lane_8_library.library_species.use_genome_build
+  genome_dict.setdefault(l8s, []).append('8')
+  
+  genome_list = genome_dict.keys()
+  genome_list.sort()
+  
+  #Loop through and create entries for each species.
+  for genome in genome_list:
+    lanes = ''.join(genome_dict[genome])
+    data.append('%s:ANALYSIS eland' % (lanes))
+    data.append('%s:READ_LENGTH %s' % (lanes, read_length))
+    data.append('%s:ELAND_GENOME %s' % (lanes, '%%(%s)s' % (genome)))
+    data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
+    
+  data.append('SEQUENCE_FORMAT --scarf')
+  
+  data = '\n'.join(data)
+  
+  f = open(file_path, 'w')
+  f.write(data)
+  f.close()
+  
+  return data
+
+
+
+def config(request, flowcell=None):
+  """
+  Returns eland config file for a given flowcell number,
+  or returns a list of available flowcell numbers.
+  """
+  
+  # Provide INDEX of available Flowcell config files.
+  if flowcell is None:
+    #Find all FC* config files and report an index html file
+    #fc_list = [ os.path.split(file_path)[1] for file_path in glob.glob(os.path.join(settings.UPLOADTO_CONFIG_FILE, 'FC*')) ]
+    fc_list = [ fc.flowcell_id for fc in models.FlowCell.objects.all() ]
+    
+    #Convert FC* list to html links
+    fc_html = [ '<a href="/eland_config/%s/">%s</a>' % (fc_name, fc_name) for fc_name in fc_list ]
+      
+    return HttpResponse('<br />'.join(fc_html))
+  
+  #FIXME: Should validate flowcell input before using.
+  flowcell = _validate_input(flowcell)
+  cfg = getElandConfig(flowcell, regenerate=True)
+  
+  if not cfg:
+    return HttpResponse("Hmm, config file for %s does not seem to exist." % (flowcell))
+  
+  
+  return HttpResponse(cfg, mimetype="text/plain")
+
+
+
+
+#def index(request):
+#  """
+#  Return a form for filling out information about the flowcell
+#  """
+#  if request.method == 'POST':
+#    form = forms.ConfigForm(request.POST, error_class=forms.DivErrorList)
+#    if form.is_valid():
+#      #cfg = generateElandConfig(form)
+#      _saveConfigFile(form)
+#      _saveToDb(form)
+#      return HttpResponse("Eland Config Saved!", mimetype="text/plain")
+#    else:
+#      return render_to_response('config_form.html', {'form': form })
+#  
+#  else:   
+#    fm = forms.ConfigForm(error_class=forms.DivErrorList)
+#    return render_to_response('config_form.html', {'form': fm })
diff --git a/htsworkflow/frontend/fctracker/__init__.py b/htsworkflow/frontend/fctracker/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/frontend/fctracker/models.py b/htsworkflow/frontend/fctracker/models.py
new file mode 100644 (file)
index 0000000..e12f0fe
--- /dev/null
@@ -0,0 +1,283 @@
+from django.db import models
+from django.contrib.auth.models import User
+from htsworkflow.frontend import settings
+
+# Create your models here.
+
+class Antibody(models.Model):
+  antigene = models.CharField(max_length=500, db_index=True)
+  catalog = models.CharField(max_length=50, unique=True, db_index=True)
+  antibodies = models.CharField(max_length=500, db_index=True)
+  source = models.CharField(max_length=500, blank=True, db_index=True)
+  biology = models.TextField(blank=True)
+  notes = models.TextField(blank=True)
+  def __str__(self):
+    return '%s - %s (%s)' % (self.antigene, self.antibodies, self.catalog)
+  class Meta:
+    verbose_name_plural = "antibodies"
+    ordering = ["antigene"]
+  class Admin:
+      list_display = ('antigene','antibodies','catalog','source','biology','notes')
+      list_filter = ('antibodies','source')
+      fields = (
+        (None, {
+            'fields': (('antigene','antibodies'),('catalog','source'),('biology'),('notes'))
+        }),
+       )
+
+class Cellline(models.Model):
+  cellline_name = models.CharField(max_length=100, unique=True, db_index=True)
+  notes = models.TextField(blank=True)
+  def __str__(self):
+    return '%s' % (self.cellline_name)
+
+  class Meta:
+    ordering = ["cellline_name"]
+
+  class Admin:
+      fields = (
+        (None, {
+            'fields': (('cellline_name'),('notes'),)
+        }),
+       )
+
+class Condition(models.Model):
+  condition_name = models.CharField(max_length=2000, unique=True, db_index=True)
+  notes = models.TextField(blank=True)
+  def __str__(self):
+    return '%s' % (self.condition_name)
+
+  class Meta:
+    ordering = ["condition_name"]
+
+  class Admin:
+      fields = (
+        (None, {
+            'fields': (('condition_name'),('notes'),)
+        }),
+       )
+
+class Species(models.Model):
+  
+  scientific_name = models.CharField(max_length=256, unique=False, db_index=True, core=True)
+  common_name = models.CharField(max_length=256, blank=True)
+  use_genome_build = models.CharField(max_length=100, blank=False, null=False)
+
+  def __str__(self):
+    return '%s (%s)|%s' % (self.scientific_name, self.common_name, self.use_genome_build)
+  
+  class Meta:
+    verbose_name_plural = "species"
+    ordering = ["scientific_name"]
+  
+  class Admin:
+      fields = (
+        (None, {
+            'fields': (('scientific_name', 'common_name'), ('use_genome_build'))
+        }),
+      )
+
+class Lab(models.Model):
+  
+  name = models.CharField(max_length=100, blank=False, unique=True)
+  
+  def __str__(self):
+    return self.name
+  
+  class Admin:
+    pass
+
+class UserProfile(models.Model):
+  
+  # This allows you to use user.get_profile() to get this object
+  user = models.ForeignKey(User, unique=True)
+
+  lab = models.ForeignKey(Lab)
+  #email = models.CharField(max_length=50, blank=True, null=True)
+  
+  def __str__(self):
+    return '%s (%s lab)' % (self.user, self.lab)
+  
+  class Meta:
+    #verbose_name_plural = "people"
+    #ordering = ["lab"]
+    pass
+    
+  class Admin:
+    #fields = (
+    #  (None, {
+    #      'fields': (('email', 'lab'), ('email'))
+    #  }),
+    #)
+    pass
+
+
+class Library(models.Model):
+  
+  library_id = models.CharField(max_length=30, primary_key=True, db_index=True, core=True)
+  library_name = models.CharField(max_length=100, unique=True, core=True)
+  library_species = models.ForeignKey(Species, core=True)
+  cell_line = models.ForeignKey(Cellline,core=True)
+  condition = models.ForeignKey(Condition,core=True)
+  antibody = models.ForeignKey(Antibody,blank=True,null=True,core=True)
+  
+  EXPERIMENT_TYPES = (
+      ('INPUT_RXLCh','INPUT_RXLCh'),
+      ('ChIP-seq', 'ChIP-seq'),
+      ('Sheared', 'Sheared'),
+      ('RNA-seq', 'RNA-seq'),
+      ('Methyl-seq', 'Methyl-seq'),
+      ('DIP-seq', 'DIP-seq'),
+    ) 
+  experiment_type = models.CharField(max_length=50, choices=EXPERIMENT_TYPES,
+                                     default='RNA-seq')
+  
+  creation_date = models.DateField(blank=True, null=True)
+  made_for = models.ForeignKey(User)
+  made_by = models.CharField(max_length=50, blank=True, default="Lorian")
+  
+  PROTOCOL_END_POINTS = (
+      ('?', 'Unknown'),
+      ('Sample', 'Raw sample'),
+      ('Progress', 'In progress'),
+      ('1A', 'Ligation, then gel'),
+      ('PCR', 'Ligation, then PCR'),
+      ('1Ab', 'Ligation, PCR, then gel'),
+      ('1Aa', 'Ligation, gel, then PCR'),
+      ('2A', 'Ligation, PCR, gel, PCR'),
+      ('Done', 'Completed'),
+    )
+  stopping_point = models.CharField(max_length=25, choices=PROTOCOL_END_POINTS, default='Done')
+  amplified_from_sample = models.ForeignKey('self', blank=True, null=True)  
+  
+  undiluted_concentration = models.DecimalField("Undiluted concentration (ng/ul)", max_digits=5, decimal_places=2, default=0, blank=True, null=True)
+  successful_pM = models.DecimalField(max_digits=5, decimal_places=2, blank=True, null=True)
+  ten_nM_dilution = models.BooleanField()
+  avg_lib_size = models.IntegerField(default=225, blank=True, null=True)
+  notes = models.TextField(blank=True)
+  
+  def __str__(self):
+    return '#%s: %s' % (self.library_id, self.library_name)
+  
+  class Meta:
+    verbose_name_plural = "libraries"
+    ordering = ["-library_id"]
+  
+  class Admin:
+    date_hierarchy = "creation_date"
+    save_as = True
+    save_on_top = True
+    search_fields = ['library_name', 'library_id']
+    list_display = ('library_id', 'library_name', 'made_for', 'creation_date', 'stopping_point')
+    list_display_links = ('library_id', 'library_name')
+    list_filter = ('stopping_point', 'library_species', 'made_for', 'made_by', 'experiment_type')
+    fields = (
+        (None, {
+            'fields': (('library_id', 'library_name'), ('library_species', 'experiment_type'),)
+        }),
+        ('Creation Information:', {
+            'fields' : (('made_for', 'made_by', 'creation_date'), ('stopping_point', 'amplified_from_sample'), ('undiluted_concentration', 'library_size'), 'notes',)
+        }),
+       ('Run Information:', {
+           'fields' : (('ten_nM_dilution','successful_pM'),)
+       }),
+    )
+
+class FlowCell(models.Model):
+  
+  flowcell_id = models.CharField(max_length=20, unique=True, db_index=True, core=True)
+  run_date = models.DateTimeField(core=True)
+  advanced_run = models.BooleanField(default=False)
+  read_length = models.IntegerField(default=32)
+  
+  
+  FLOWCELL_STATUSES = (
+      ('No', 'Not run'),
+      ('F', 'Failed'),
+      ('Del', 'Data deleted'),
+      ('A', 'Data available'),
+      ('In', 'In progress'),
+    )
+  flowcell_status = models.CharField(max_length=10, choices=FLOWCELL_STATUSES)
+  
+  lane_1_library = models.ForeignKey(Library, related_name="lane_1_library")
+  lane_2_library = models.ForeignKey(Library, related_name="lane_2_library")
+  lane_3_library = models.ForeignKey(Library, related_name="lane_3_library")
+  lane_4_library = models.ForeignKey(Library, related_name="lane_4_library")
+  lane_5_library = models.ForeignKey(Library, related_name="lane_5_library")
+  lane_6_library = models.ForeignKey(Library, related_name="lane_6_library")
+  lane_7_library = models.ForeignKey(Library, related_name="lane_7_library")
+  lane_8_library = models.ForeignKey(Library, related_name="lane_8_library")
+
+  lane_1_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_2_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_3_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_4_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_5_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_6_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_7_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  lane_8_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+  
+  lane_1_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_2_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_3_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_4_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_5_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_6_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_7_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  lane_8_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+  
+  kit_1000148 = models.IntegerField(blank=True, null=True)
+  kit_1000147 = models.IntegerField(blank=True, null=True)
+  kit_1000183 = models.IntegerField(blank=True, null=True)
+  kit_1001625 = models.IntegerField(blank=True, null=True)
+  
+  cluster_station_id = models.CharField(max_length=50, blank=True, null=True)
+  sequencer_id = models.CharField(max_length=50, blank=True, null=True)
+  
+  notes = models.TextField(blank=True)
+
+  def __str__(self):
+    return '%s (%s)' % (self.flowcell_id, self.run_date) 
+  
+  class Meta:
+    ordering = ["-run_date"]
+  
+  class Admin:
+    date_hierarchy = "run_date"
+    save_as = True
+    save_on_top = True
+    search_fields = ['flowcell_id', 'lane_1_library__library_id', 'lane_1_library__library_name', 'lane_2_library__library_id', 'lane_2_library__library_name', 'lane_3_library__library_id', 'lane_3_library__library_name', 'lane_4_library__library_id', 'lane_4_library__library_name', 'lane_5_library__library_id', 'lane_5_library__library_name', 'lane_6_library__library_id', 'lane_6_library__library_name', 'lane_7_library__library_id', 'lane_7_library__library_name', 'lane_8_library__library_id', 'lane_8_library__library_name']
+    list_display = ('run_date', 'flowcell_status', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
+    list_display_links = ('run_date', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
+    fields = (
+        (None, {
+            'fields': ('run_date', ('flowcell_id', 'flowcell_status'), ('read_length', 'advanced_run'),)
+        }),
+        ('Lanes:', {
+            'fields' : (('lane_1_library', 'lane_1_pM'), ('lane_2_library', 'lane_2_pM'), ('lane_3_library', 'lane_3_pM'), ('lane_4_library', 'lane_4_pM'), ('lane_5_library', 'lane_5_pM'), ('lane_6_library', 'lane_6_pM'), ('lane_7_library', 'lane_7_pM'), ('lane_8_library', 'lane_8_pM'),)
+        }),
+       (None, {
+           'fields' : ('notes',)
+       }),
+       ('Kits & Machines:', {
+           'classes': 'collapse',
+           'fields' : (('kit_1000148', 'kit_1000147', 'kit_1000183', 'kit_1001625'), ('cluster_station_id', 'sequencer_id'),)
+       }),
+       ('Cluster Estimates:', {
+           'classes': 'collapse',
+           'fields' : (('lane_1_cluster_estimate', 'lane_2_cluster_estimate'), ('lane_3_cluster_estimate', 'lane_4_cluster_estimate'), ('lane_5_cluster_estimate', 'lane_6_cluster_estimate'), ('lane_7_cluster_estimate', 'lane_8_cluster_estimate',),)
+       }),
+    )
+
+# Did not finish implementing, removing to avoid further confusion.
+#class ElandResult(models.Model):
+#  
+#  class Admin: pass
+#  
+#  flow_cell = models.ForeignKey(FlowCell)
+#  config_file = models.FileField(upload_to=settings.UPLOADTO_CONFIG_FILE)
+#  eland_result_pack = models.FileField(upload_to=settings.UPLOADTO_ELAND_RESULT_PACKS)
+#  bed_file_pack = models.FileField(upload_to=settings.UPLOADTO_BED_PACKS)
+#  
+#  notes = models.TextField(blank=True)
diff --git a/htsworkflow/frontend/fctracker/views.py b/htsworkflow/frontend/fctracker/views.py
new file mode 100644 (file)
index 0000000..2299e4f
--- /dev/null
@@ -0,0 +1 @@
+# Create your views here.
\ No newline at end of file
diff --git a/htsworkflow/frontend/manage.py b/htsworkflow/frontend/manage.py
new file mode 100644 (file)
index 0000000..5e78ea9
--- /dev/null
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+from django.core.management import execute_manager
+try:
+    import settings # Assumed to be in the same directory.
+except ImportError:
+    import sys
+    sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
+    sys.exit(1)
+
+if __name__ == "__main__":
+    execute_manager(settings)
diff --git a/htsworkflow/frontend/settings.py b/htsworkflow/frontend/settings.py
new file mode 100644 (file)
index 0000000..c9bee20
--- /dev/null
@@ -0,0 +1,94 @@
+import os
+
+# Django settings for elandifier project.
+
+DEBUG = True
+TEMPLATE_DEBUG = DEBUG
+
+ADMINS = (
+    # ('Your Name', 'your_email@domain.com'),
+)
+
+MANAGERS = ADMINS
+
+DATABASE_ENGINE = 'sqlite3'           # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'.
+DATABASE_NAME = os.path.abspath('../../fctracker.db')             # Or path to database file if using sqlite3.
+DATABASE_USER = ''             # Not used with sqlite3.
+DATABASE_PASSWORD = ''         # Not used with sqlite3.
+DATABASE_HOST = ''             # Set to empty string for localhost. Not used with sqlite3.
+DATABASE_PORT = ''             # Set to empty string for default. Not used with sqlite3.
+
+# Local time zone for this installation. Choices can be found here:
+# http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE
+# although not all variations may be possible on all operating systems.
+# If running in a Windows environment this must be set to the same as your
+# system time zone.
+TIME_ZONE = 'America/Los_Angeles'
+
+# Language code for this installation. All choices can be found here:
+# http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
+# http://blogs.law.harvard.edu/tech/stories/storyReader$15
+LANGUAGE_CODE = 'en-us'
+
+SITE_ID = 1
+
+# If you set this to False, Django will make some optimizations so as not
+# to load the internationalization machinery.
+USE_I18N = True
+
+# Absolute path to the directory that holds media.
+# Example: "/home/media/media.lawrence.com/"
+MEDIA_ROOT = ''
+
+# URL that handles the media served from MEDIA_ROOT.
+# Example: "http://media.lawrence.com"
+MEDIA_URL = ''
+
+# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
+# trailing slash.
+# Examples: "http://foo.com/media/", "/media/".
+ADMIN_MEDIA_PREFIX = '/media/'
+
+# Make this unique, and don't share it with anybody.
+SECRET_KEY = '(ekv^=gf(j9f(x25@a7r+8)hqlz%&_1!tw^75l%^041#vi=@4n'
+
+# List of callables that know how to import templates from various sources.
+TEMPLATE_LOADERS = (
+    'django.template.loaders.filesystem.load_template_source',
+    'django.template.loaders.app_directories.load_template_source',
+#     'django.template.loaders.eggs.load_template_source',
+)
+
+MIDDLEWARE_CLASSES = (
+    'django.middleware.common.CommonMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.middleware.doc.XViewMiddleware',
+)
+
+ROOT_URLCONF = 'htsworkflow.frontend.urls'
+
+TEMPLATE_DIRS = (
+    # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
+    # Always use forward slashes, even on Windows.
+    # Don't forget to use absolute paths, not relative paths.
+    os.path.abspath("../../templates"),
+)
+
+INSTALLED_APPS = (
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.sites',
+    'htsworkflow.frontend.eland_config',
+    'htsworkflow.frontend.fctracker',
+    'django.contrib.databrowse',
+)
+
+# Project specific settings
+UPLOADTO_HOME = os.path.abspath('../../uploads')
+UPLOADTO_CONFIG_FILE = os.path.join(UPLOADTO_HOME, 'eland_config')
+UPLOADTO_ELAND_RESULT_PACKS = os.path.join(UPLOADTO_HOME, 'eland_results')
+UPLOADTO_BED_PACKS = os.path.join(UPLOADTO_HOME, 'bed_packs')
+
diff --git a/htsworkflow/frontend/urls.py b/htsworkflow/frontend/urls.py
new file mode 100644 (file)
index 0000000..a791b85
--- /dev/null
@@ -0,0 +1,16 @@
+from django.conf.urls.defaults import *
+
+# Databrowser:
+from django.contrib import databrowse
+from fctracker.models import Library, FlowCell
+databrowse.site.register(Library)
+databrowse.site.register(FlowCell)
+
+urlpatterns = patterns('',
+    # Base:
+    (r'^eland_config/', include('htsworkflow.frontend.eland_config.urls')),
+    # Admin:
+     (r'^admin/', include('django.contrib.admin.urls')),
+    # Databrowser:
+     (r'^databrowse/(.*)', databrowse.site.root),
+)
diff --git a/htsworkflow/pipeline/__init__.py b/htsworkflow/pipeline/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/pipeline/bustard.py b/htsworkflow/pipeline/bustard.py
new file mode 100644 (file)
index 0000000..4e268f2
--- /dev/null
@@ -0,0 +1,146 @@
+
+from datetime import date
+from glob import glob
+import logging
+import os
+import time
+import re
+
+from htsworkflow.pipeline.runfolder import \
+   ElementTree, \
+   VERSION_RE, \
+   EUROPEAN_STRPTIME
+
+class Phasing(object):
+    PHASING = 'Phasing'
+    PREPHASING = 'Prephasing'
+
+    def __init__(self, fromfile=None, xml=None):
+        self.lane = None
+        self.phasing = None
+        self.prephasing = None
+
+        if fromfile is not None:
+            self._initialize_from_file(fromfile)
+        elif xml is not None:
+            self.set_elements(xml)
+
+    def _initialize_from_file(self, pathname):
+        path, name = os.path.split(pathname)
+        basename, ext = os.path.splitext(name)
+        # the last character of the param base filename should be the
+        # lane number
+        tree = ElementTree.parse(pathname).getroot()
+        self.set_elements(tree)
+        self.lane = int(basename[-1])
+
+    def get_elements(self):
+        root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
+        phasing = ElementTree.SubElement(root, Phasing.PHASING)
+        phasing.text = str(self.phasing)
+        prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
+        prephasing.text = str(self.prephasing)
+        return root
+
+    def set_elements(self, tree):
+        if tree.tag not in ('Phasing', 'Parameters'):
+            raise ValueError('exptected Phasing or Parameters')
+        lane = tree.attrib.get('lane', None)
+        if lane is not None:
+            self.lane = int(lane)
+        for element in list(tree):
+            if element.tag == Phasing.PHASING:
+                self.phasing = float(element.text)
+            elif element.tag == Phasing.PREPHASING:
+                self.prephasing = float(element.text)
+
+class Bustard(object):
+    XML_VERSION = 1
+
+    # Xml Tags
+    BUSTARD = 'Bustard'
+    SOFTWARE_VERSION = 'version'
+    DATE = 'run_time'
+    USER = 'user'
+    PARAMETERS = 'Parameters'
+
+    def __init__(self, xml=None):
+        self.version = None
+        self.date = date.today()
+        self.user = None
+        self.phasing = {}
+
+        if xml is not None:
+            self.set_elements(xml)
+
+    def _get_time(self):
+        return time.mktime(self.date.timetuple())
+    time = property(_get_time, doc='return run time as seconds since epoch')
+
+    def dump(self):
+        print "Bustard version:", self.version
+        print "Run date", self.date
+        print "user:", self.user
+        for lane, tree in self.phasing.items():
+            print lane
+            print tree
+
+    def get_elements(self):
+        root = ElementTree.Element('Bustard', 
+                                   {'version': str(Bustard.XML_VERSION)})
+        version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
+        version.text = self.version
+        run_date = ElementTree.SubElement(root, Bustard.DATE)
+        run_date.text = str(self.time)
+        user = ElementTree.SubElement(root, Bustard.USER)
+        user.text = self.user
+        params = ElementTree.SubElement(root, Bustard.PARAMETERS)
+        for p in self.phasing.values():
+            params.append(p.get_elements())
+        return root
+
+    def set_elements(self, tree):
+        if tree.tag != Bustard.BUSTARD:
+            raise ValueError('Expected "Bustard" SubElements')
+        xml_version = int(tree.attrib.get('version', 0))
+        if xml_version > Bustard.XML_VERSION:
+            logging.warn('Bustard XML tree is a higher version than this class')
+        for element in list(tree):
+            if element.tag == Bustard.SOFTWARE_VERSION:
+                self.version = element.text
+            elif element.tag == Bustard.DATE:
+                self.date = date.fromtimestamp(float(element.text))
+            elif element.tag == Bustard.USER:
+                self.user = element.text
+            elif element.tag == Bustard.PARAMETERS:
+                for param in element:
+                    p = Phasing(xml=param)
+                    self.phasing[p.lane] = p
+            else:
+                raise ValueError("Unrecognized tag: %s" % (element.tag,))
+        
+
+
+def bustard(pathname):
+    """
+    Construct a Bustard object from pathname
+    """
+    b = Bustard()
+    path, name = os.path.split(pathname)
+    groups = name.split("_")
+    version = re.search(VERSION_RE, groups[0])
+    b.version = version.group(1)
+    t = time.strptime(groups[1], EUROPEAN_STRPTIME)
+    b.date = date(*t[0:3])
+    b.user = groups[2]
+    paramfiles = glob(os.path.join(pathname, "params?.xml"))
+    for paramfile in paramfiles:
+        phasing = Phasing(paramfile)
+        assert (phasing.lane >= 1 and phasing.lane <= 8)
+        b.phasing[phasing.lane] = phasing
+    return b
+
+def fromxml(tree):
+    b = Bustard()
+    b.set_elements(tree)
+    return b
diff --git a/htsworkflow/pipeline/configure_run.py b/htsworkflow/pipeline/configure_run.py
new file mode 100644 (file)
index 0000000..d541de3
--- /dev/null
@@ -0,0 +1,606 @@
+#!/usr/bin/python
+import subprocess
+import logging
+import time
+import re
+import os
+
+from htsworkflow.pipeline.retrieve_config import getCombinedOptions, saveConfigFile
+from htsworkflow.pipeline.retrieve_config import FlowCellNotFound, WebError404
+from htsworkflow.pipeline.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict
+from htsworkflow.pipeline.run_status import GARunStatus
+
+from pyinotify import WatchManager, ThreadedNotifier
+from pyinotify import EventsCodes, ProcessEvent
+
+class ConfigInfo:
+  
+  def __init__(self):
+    #run_path = firecrest analysis directory to run analysis from
+    self.run_path = None
+    self.bustard_path = None
+    self.config_filepath = None
+    self.status = None
+
+    #top level directory where all analyses are placed
+    self.base_analysis_dir = None
+    #analysis_dir, top level analysis dir...
+    # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150'
+    self.analysis_dir = None
+
+
+  def createStatusObject(self):
+    """
+    Creates a status object which can be queried for
+    status of running the pipeline
+
+    returns True if object created
+    returns False if object cannot be created
+    """
+    if self.config_filepath is None:
+      return False
+
+    self.status = GARunStatus(self.config_filepath)
+    return True
+
+
+
+####################################
+# inotify event processor
+
+s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt')
+s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt')
+s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt')
+
+s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/')
+s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/')
+s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/')
+
+class RunEvent(ProcessEvent):
+
+  def __init__(self, conf_info):
+
+    self.run_status_dict = {'firecrest': False,
+                            'bustard': False,
+                            'gerald': False}
+
+    self._ci = conf_info
+
+    ProcessEvent.__init__(self)
+    
+
+  def process_IN_CREATE(self, event):
+    fullpath = os.path.join(event.path, event.name)
+    if s_finished.search(fullpath):
+      logging.info("File Found: %s" % (fullpath))
+
+      if s_firecrest_finished.search(fullpath):
+        self.run_status_dict['firecrest'] = True
+        self._ci.status.updateFirecrest(event.name)
+      elif s_bustard_finished.search(fullpath):
+        self.run_status_dict['bustard'] = True
+        self._ci.status.updateBustard(event.name)
+      elif s_gerald_finished.search(fullpath):
+        self.run_status_dict['gerald'] = True
+        self._ci.status.updateGerald(event.name)
+
+    #WARNING: The following order is important!!
+    # Firecrest regex will catch all gerald, bustard, and firecrest
+    # Bustard regex will catch all gerald and bustard
+    # Gerald regex will catch all gerald
+    # So, order needs to be Gerald, Bustard, Firecrest, or this
+    #  won't work properly.
+    elif s_gerald_all.search(fullpath):
+      self._ci.status.updateGerald(event.name)
+    elif s_bustard_all.search(fullpath):
+      self._ci.status.updateBustard(event.name)
+    elif s_firecrest_all.search(fullpath):
+      self._ci.status.updateFirecrest(event.name)
+      
+    #print "Create: %s" % (os.path.join(event.path, event.name))
+
+  def process_IN_DELETE(self, event):
+    #print "Remove %s" % (os.path.join(event.path, event.name))
+    pass
+
+
+
+
+#FLAGS
+# Config Step Error
+RUN_ABORT = 'abort'
+# Run Step Error
+RUN_FAILED = 'failed'
+
+
+#####################################
+# Configure Step (goat_pipeline.py)
+#Info
+s_start = re.compile('Starting Genome Analyzer Pipeline')
+s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
+s_generating = re.compile('^Generating journals, Makefiles')
+s_seq_folder = re.compile('^Sequence folder: ')
+s_seq_folder_sub = re.compile('want to make ')
+s_stderr_taskcomplete = re.compile('^Task complete, exiting')
+
+#Errors
+s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
+s_species_dir_err = re.compile('Error: Lane [1-8]:')
+s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
+s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+')
+
+SUPPRESS_MISSING_CYCLES = False
+
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#CF_STDERR_IGNORE_LIST = []
+s_skip = re.compile('s_[0-8]_[0-9]+')
+
+
+##########################################
+# Pipeline Run Step (make -j8 recursive)
+
+##Info
+s_finished = re.compile('finished')
+
+##Errors
+s_make_error = re.compile('^make[\S\s]+Error')
+s_no_gnuplot = re.compile('gnuplot: command not found')
+s_no_convert = re.compile('^Can\'t exec "convert"')
+s_no_ghostscript = re.compile('gs: command not found')
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#
+PL_STDERR_IGNORE_LIST = []
+# Info: PF 11802
+PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
+# About to analyse intensity file s_4_0101_sig2.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
+# Will send output to standard output
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
+# Found 31877 clusters
+PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
+# Will use quality criterion ((CHASTITY>=0.6)
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
+# Quality criterion translated to (($F[5]>=0.6))
+PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
+# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
+#  AND
+# opened s_4_0103_qhg.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
+# 81129 sequences out of 157651 passed filter criteria
+PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
+
+
+def pl_stderr_ignore(line):
+  """
+  Searches lines for lines to ignore (i.e. not to log)
+
+  returns True if line should be ignored
+  returns False if line should NOT be ignored
+  """
+  for s in PL_STDERR_IGNORE_LIST:
+    if s.search(line):
+      return True
+  return False
+
+
+def config_stdout_handler(line, conf_info):
+  """
+  Processes each line of output from GOAT
+  and stores useful information using the logging module
+
+  Loads useful information into conf_info as well, for future
+  use outside the function.
+
+  returns True if found condition that signifies success.
+  """
+
+  # Skip irrelevant line (without logging)
+  if s_skip.search(line):
+    pass
+
+  # Detect invalid command-line arguments
+  elif s_invalid_cmdline.search(line):
+    logging.error("Invalid commandline options!")
+
+  # Detect starting of configuration
+  elif s_start.search(line):
+    logging.info('START: Configuring pipeline')
+
+  # Detect it made it past invalid arguments
+  elif s_gerald.search(line):
+    logging.info('Running make now')
+
+  # Detect that make files have been generated (based on output)
+  elif s_generating.search(line):
+    logging.info('Make files generted')
+    return True
+
+  # Capture run directory
+  elif s_seq_folder.search(line):
+    mo = s_seq_folder_sub.search(line)
+    #Output changed when using --tiles=<tiles>
+    # at least in pipeline v0.3.0b2
+    if mo:
+      firecrest_bustard_gerald_makefile = line[mo.end():]
+      firecrest_bustard_gerald, junk = \
+                                os.path.split(firecrest_bustard_gerald_makefile)
+      firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
+      firecrest, junk = os.path.split(firecrest_bustard)
+
+      conf_info.bustard_path = firecrest_bustard
+      conf_info.run_path = firecrest
+    
+    #Standard output handling
+    else:
+      print 'Sequence line:', line
+      mo = s_seq_folder.search(line)
+      conf_info.bustard_path = line[mo.end():]
+      conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
+
+  # Log all other output for debugging purposes
+  else:
+    logging.warning('CONF:?: %s' % (line))
+
+  return False
+
+
+
+def config_stderr_handler(line, conf_info):
+  """
+  Processes each line of output from GOAT
+  and stores useful information using the logging module
+
+  Loads useful information into conf_info as well, for future
+  use outside the function.
+
+  returns RUN_ABORT upon detecting failure;
+          True on success message;
+          False if neutral message
+            (i.e. doesn't signify failure or success)
+  """
+  global SUPPRESS_MISSING_CYCLES
+
+  # Detect invalid species directory error
+  if s_species_dir_err.search(line):
+    logging.error(line)
+    return RUN_ABORT
+  # Detect goat_pipeline.py traceback
+  elif s_goat_traceb.search(line):
+    logging.error("Goat config script died, traceback in debug output")
+    return RUN_ABORT
+  # Detect indication of successful configuration (from stderr; odd, but ok)
+  elif s_stderr_taskcomplete.search(line):
+    logging.info('Configure step successful (from: stderr)')
+    return True
+  # Detect missing cycles
+  elif s_missing_cycles.search(line):
+
+    # Only display error once
+    if not SUPPRESS_MISSING_CYCLES:
+      logging.error("Missing cycles detected; Not all cycles copied?")
+      logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line))
+      SUPPRESS_MISSING_CYCLES = True
+    return RUN_ABORT
+  
+  # Log all other output as debug output
+  else:
+    logging.debug('CONF:STDERR:?: %s' % (line))
+
+  # Neutral (not failure; nor success)
+  return False
+
+
+#def pipeline_stdout_handler(line, conf_info):
+#  """
+#  Processes each line of output from running the pipeline
+#  and stores useful information using the logging module
+#
+#  Loads useful information into conf_info as well, for future
+#  use outside the function.
+#
+#  returns True if found condition that signifies success.
+#  """
+#
+#  #f.write(line + '\n')
+#
+#  return True
+
+
+
+def pipeline_stderr_handler(line, conf_info):
+  """
+  Processes each line of stderr from pipelien run
+  and stores useful information using the logging module
+
+  ##FIXME: Future feature (doesn't actually do this yet)
+  #Loads useful information into conf_info as well, for future
+  #use outside the function.
+
+  returns RUN_FAILED upon detecting failure;
+          #True on success message; (no clear success state)
+          False if neutral message
+            (i.e. doesn't signify failure or success)
+  """
+
+  if pl_stderr_ignore(line):
+    pass
+  elif s_make_error.search(line):
+    logging.error("make error detected; run failed")
+    return RUN_FAILED
+  elif s_no_gnuplot.search(line):
+    logging.error("gnuplot not found")
+    return RUN_FAILED
+  elif s_no_convert.search(line):
+    logging.error("imagemagick's convert command not found")
+    return RUN_FAILED
+  elif s_no_ghostscript.search(line):
+    logging.error("ghostscript not found")
+    return RUN_FAILED
+  else:
+    logging.debug('PIPE:STDERR:?: %s' % (line))
+
+  return False
+
+
+def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir):
+  """
+  Gets the config file from server...
+  requires config file in:
+    /etc/ga_frontend/ga_frontend.conf
+   or
+    ~/.ga_frontend.conf
+
+  with:
+  [config_file_server]
+  base_host_url: http://host:port
+
+  return True if successful, False is failure
+  """
+  options = getCombinedOptions()
+
+  if options.url is None:
+    logging.error("~/.ga_frontend.conf or /etc/ga_frontend/ga_frontend.conf" \
+                  " missing base_host_url option")
+    return False
+
+  try:
+    saveConfigFile(flowcell, options.url, cfg_filepath)
+    conf_info.config_filepath = cfg_filepath
+  except FlowCellNotFound, e:
+    logging.error(e)
+    return False
+  except WebError404, e:
+    logging.error(e)
+    return False
+  except IOError, e:
+    logging.error(e)
+    return False
+  except Exception, e:
+    logging.error(e)
+    return False
+
+  f = open(cfg_filepath, 'r')
+  data = f.read()
+  f.close()
+
+  genome_dict = getAvailableGenomes(genome_dir)
+  mapper_dict = constructMapperDict(genome_dict)
+
+  logging.debug(data)
+
+  f = open(cfg_filepath, 'w')
+  f.write(data % (mapper_dict))
+  f.close()
+  
+  return True
+  
+
+
+def configure(conf_info):
+  """
+  Attempts to configure the GA pipeline using goat.
+
+  Uses logging module to store information about status.
+
+  returns True if configuration successful, otherwise False.
+  """
+  #ERROR Test:
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                         '--GERALD=config32bk.txt',
+  #                         '--make .',],
+  #                         #'.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  #ERROR Test (2), causes goat_pipeline.py traceback
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                  '--GERALD=%s' % (conf_info.config_filepath),
+  #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
+  #                         '--make',
+  #                         '.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  ##########################
+  # Run configuration step
+  #   Not a test; actual configure attempt.
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                  '--GERALD=%s' % (conf_info.config_filepath),
+  #                         '--make',
+  #                         '.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+
+  stdout_filepath = os.path.join(conf_info.analysis_dir,
+                                 "pipeline_configure_stdout.txt")
+  stderr_filepath = os.path.join(conf_info.analysis_dir,
+                                 "pipeline_configure_stderr.txt")
+
+  fout = open(stdout_filepath, 'w')
+  ferr = open(stderr_filepath, 'w')
+  
+  pipe = subprocess.Popen(['goat_pipeline.py',
+                    '--GERALD=%s' % (conf_info.config_filepath),
+                           #'--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
+                           '--make',
+                           conf_info.analysis_dir],
+                          stdout=fout,
+                          stderr=ferr)
+
+  print "Configuring pipeline: %s" % (time.ctime())
+  error_code = pipe.wait()
+
+  # Clean up
+  fout.close()
+  ferr.close()
+  
+  
+  ##################
+  # Process stdout
+  fout = open(stdout_filepath, 'r')
+  
+  stdout_line = fout.readline()
+
+  complete = False
+  while stdout_line != '':
+    # Handle stdout
+    if config_stdout_handler(stdout_line, conf_info):
+      complete = True
+    stdout_line = fout.readline()
+
+  fout.close()
+
+
+  #error_code = pipe.wait()
+  if error_code:
+    logging.error('Recieved error_code: %s' % (error_code))
+  else:
+    logging.info('We are go for launch!')
+
+  #Process stderr
+  ferr = open(stderr_filepath, 'r')
+  stderr_line = ferr.readline()
+
+  abort = 'NO!'
+  stderr_success = False
+  while stderr_line != '':
+    stderr_status = config_stderr_handler(stderr_line, conf_info)
+    if stderr_status == RUN_ABORT:
+      abort = RUN_ABORT
+    elif stderr_status is True:
+      stderr_success = True
+    stderr_line = ferr.readline()
+
+  ferr.close()
+
+
+  #Success requirements:
+  # 1) The stdout completed without error
+  # 2) The program exited with status 0
+  # 3) No errors found in stdout
+  print '#Expect: True, False, True, True'
+  print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
+  status = complete is True and \
+           bool(error_code) is False and \
+           abort != RUN_ABORT and \
+           stderr_success is True
+
+  # If everything was successful, but for some reason
+  #  we didn't retrieve the path info, log it.
+  if status is True:
+    if conf_info.bustard_path is None or conf_info.run_path is None:
+      logging.error("Failed to retrieve run_path")
+      return False
+  
+  return status
+
+
+def run_pipeline(conf_info):
+  """
+  Run the pipeline and monitor status.
+  """
+  # Fail if the run_path doesn't actually exist
+  if not os.path.exists(conf_info.run_path):
+    logging.error('Run path does not exist: %s' \
+              % (conf_info.run_path))
+    return False
+
+  # Change cwd to run_path
+  stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt')
+  stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt')
+
+  # Create status object
+  conf_info.createStatusObject()
+
+  # Monitor file creation
+  wm = WatchManager()
+  mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE
+  event = RunEvent(conf_info)
+  notifier = ThreadedNotifier(wm, event)
+  notifier.start()
+  wdd = wm.add_watch(conf_info.run_path, mask, rec=True)
+
+  # Log pipeline starting
+  logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
+  
+  # Start the pipeline (and hide!)
+  #pipe = subprocess.Popen(['make',
+  #                         '-j8',
+  #                         'recursive'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  fout = open(stdout_filepath, 'w')
+  ferr = open(stderr_filepath, 'w')
+
+  pipe = subprocess.Popen(['make',
+                           '--directory=%s' % (conf_info.run_path),
+                           '-j8',
+                           'recursive'],
+                           stdout=fout,
+                           stderr=ferr)
+                           #shell=True)
+  # Wait for run to finish
+  retcode = pipe.wait()
+
+
+  # Clean up
+  notifier.stop()
+  fout.close()
+  ferr.close()
+
+  # Process stderr
+  ferr = open(stderr_filepath, 'r')
+
+  run_failed_stderr = False
+  for line in ferr:
+    err_status = pipeline_stderr_handler(line, conf_info)
+    if err_status == RUN_FAILED:
+      run_failed_stderr = True
+
+  ferr.close()
+
+  # Finished file check!
+  print 'RUN SUCCESS CHECK:'
+  for key, value in event.run_status_dict.items():
+    print '  %s: %s' % (key, value)
+
+  dstatus = event.run_status_dict
+
+  # Success or failure check
+  status = (retcode == 0) and \
+           run_failed_stderr is False and \
+           dstatus['firecrest'] is True and \
+           dstatus['bustard'] is True and \
+           dstatus['gerald'] is True
+
+  return status
+
+
diff --git a/htsworkflow/pipeline/firecrest.py b/htsworkflow/pipeline/firecrest.py
new file mode 100644 (file)
index 0000000..89ea598
--- /dev/null
@@ -0,0 +1,127 @@
+"""
+Extract information about the Firecrest run
+
+Firecrest - class holding the properties we found
+firecrest - Firecrest factory function initalized from a directory name
+fromxml - Firecrest factory function initalized from an xml dump from
+          the Firecrest object.
+"""
+
+from datetime import date
+import os
+import re
+import time
+
+from htsworkflow.pipeline.runfolder import \
+   ElementTree, \
+   VERSION_RE, \
+   EUROPEAN_STRPTIME
+
+class Firecrest(object):
+    XML_VERSION=1
+
+    # xml tag names
+    FIRECREST = 'Firecrest'
+    SOFTWARE_VERSION = 'version'
+    START = 'FirstCycle'
+    STOP = 'LastCycle'
+    DATE = 'run_time'
+    USER = 'user'
+    MATRIX = 'matrix'
+
+    def __init__(self, xml=None):
+        self.start = None
+        self.stop = None
+        self.version = None
+        self.date = date.today()
+        self.user = None
+        self.matrix = None
+
+        if xml is not None:
+            self.set_elements(xml)
+        
+    def _get_time(self):
+        return time.mktime(self.date.timetuple())
+    time = property(_get_time, doc='return run time as seconds since epoch')
+
+    def dump(self):
+        print "Starting cycle:", self.start
+        print "Ending cycle:", self.stop
+        print "Firecrest version:", self.version
+        print "Run date:", self.date
+        print "user:", self.user
+
+    def get_elements(self):
+        attribs = {'version': str(Firecrest.XML_VERSION) }
+        root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
+        version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
+        version.text = self.version
+        start_cycle = ElementTree.SubElement(root, Firecrest.START)
+        start_cycle.text = str(self.start)
+        stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
+        stop_cycle.text = str(self.stop)
+        run_date = ElementTree.SubElement(root, Firecrest.DATE)
+        run_date.text = str(self.time)
+        user = ElementTree.SubElement(root, Firecrest.USER)
+        user.text = self.user
+        matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
+        matrix.text = self.matrix
+        return root
+
+    def set_elements(self, tree):
+        if tree.tag != Firecrest.FIRECREST:
+            raise ValueError('Expected "Firecrest" SubElements')
+        xml_version = int(tree.attrib.get('version', 0))
+        if xml_version > Firecrest.XML_VERSION:
+            logging.warn('Firecrest XML tree is a higher version than this class')
+        for element in list(tree):
+            if element.tag == Firecrest.SOFTWARE_VERSION:
+                self.version = element.text
+            elif element.tag == Firecrest.START:
+                self.start = int(element.text)
+            elif element.tag == Firecrest.STOP:
+                self.stop = int(element.text)
+            elif element.tag == Firecrest.DATE:
+                self.date = date.fromtimestamp(float(element.text))
+            elif element.tag == Firecrest.USER:
+                self.user = element.text
+            elif element.tag == Firecrest.MATRIX:
+                self.matrix = element.text
+            else:
+                raise ValueError("Unrecognized tag: %s" % (element.tag,))
+
+def firecrest(pathname):
+    """
+    Examine the directory at pathname and initalize a Firecrest object
+    """
+    f = Firecrest()
+
+    # parse firecrest directory name
+    path, name = os.path.split(pathname)
+    groups = name.split('_')
+    # grab the start/stop cycle information
+    cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
+    f.start = int(cycle.group(1))
+    f.stop = int(cycle.group(2))
+    # firecrest version
+    version = re.search(VERSION_RE, groups[1])
+    f.version = (version.group(1))
+    # datetime
+    t = time.strptime(groups[2], EUROPEAN_STRPTIME)
+    f.date = date(*t[0:3])
+    # username
+    f.user = groups[3]
+
+    # should I parse this deeper than just stashing the 
+    # contents of the matrix file?
+    matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+    f.matrix = open(matrix_pathname, 'r').read()
+    return f
+
+def fromxml(tree):
+    """
+    Initialize a Firecrest object from an element tree node
+    """
+    f = Firecrest()
+    f.set_elements(tree)
+    return f
diff --git a/htsworkflow/pipeline/genome_mapper.py b/htsworkflow/pipeline/genome_mapper.py
new file mode 100644 (file)
index 0000000..a8ea871
--- /dev/null
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+import glob
+import sys
+import os
+import re
+
+import logging
+
+from htsworkflow.util.alphanum import alphanum
+
+class DuplicateGenome(Exception): pass
+
+
+def _has_metainfo(genome_dir):
+  metapath = os.path.join(genome_dir, '_metainfo_')
+  if os.path.isfile(metapath):
+    return True
+  else:
+    return False
+
+def getAvailableGenomes(genome_base_dir):
+  """
+  raises IOError (on genome_base_dir not found)
+  raises DuplicateGenome on duplicate genomes found.
+  
+  returns a double dictionary (i.e. d[species][build] = path)
+  """
+
+  # Need valid directory
+  if not os.path.exists(genome_base_dir):
+    msg = "Directory does not exist: %s" % (genome_base_dir)
+    raise IOError, msg
+
+  # Find all subdirectories
+  filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
+  potential_genome_dirs = \
+    [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
+
+  # Get list of metadata files
+  genome_dir_list = \
+    [ dirpath \
+      for dirpath in potential_genome_dirs \
+      if _has_metainfo(dirpath) ]
+
+  # Genome double dictionary
+  d = {}
+
+  for genome_dir in genome_dir_list:
+    line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
+
+    # Get species, build... log and skip on failure
+    try:
+      species, build = line.split('|')
+    except:
+      logging.warning('Skipping: Invalid metafile (%s) line: %s' \
+                      % (metafile, line))
+      continue
+
+    build_dict = d.setdefault(species, {})
+    if build in build_dict:
+      msg = "Duplicate genome for %s|%s" % (species, build)
+      raise DuplicateGenome, msg
+
+    build_dict[build] = genome_dir
+
+  return d
+  
+
+class constructMapperDict(object):
+    """
+    Emulate a dictionary to map genome|build names to paths.
+    
+    It uses the dictionary generated by getAvailableGenomes.
+    """
+    def __init__(self, genome_dict):
+        self.genome_dict = genome_dict
+        
+    def __getitem__(self, key):
+        """
+        Return the best match for key
+        """
+        elements = re.split("\|", key)
+          
+        if len(elements) == 1:
+            # we just the species name
+            # get the set of builds
+            builds = self.genome_dict[elements[0]]
+            
+            # sort build names the way humans would
+            keys = builds.keys()
+            keys.sort(cmp=alphanum)
+            
+            # return the path from the 'last' build name
+            return builds[keys[-1]]
+                        
+        elif len(elements) == 2:
+            # we have species, and build name
+            return self.genome_dict[elements[0]][elements[1]]
+        else:
+            raise KeyError("Unrecognized key")
+        
+    def keys(self):
+        keys = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                keys.append([species+'|'+build])
+        return keys
+            
+    def values(self):
+        values = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                values.append(self.genome_dict[species][build])
+        return values
+       
+    def items(self):
+        items = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                key = [species+'|'+build]
+                value = self.genome_dict[species][build]
+                items.append((key, value))
+        return items
+            
+if __name__ == '__main__':
+
+  if len(sys.argv) != 2:
+    print 'useage: %s <base_genome_dir>' % (sys.argv[0])
+    sys.exit(1)
+
+  d = getAvailableGenomes(sys.argv[1])
+  d2 = constructMapperDict(d)
+
+  for k,v in d2.items():
+    print '%s: %s' % (k,v)
+  
+  
diff --git a/htsworkflow/pipeline/gerald.py b/htsworkflow/pipeline/gerald.py
new file mode 100644 (file)
index 0000000..07ca009
--- /dev/null
@@ -0,0 +1,719 @@
+"""
+Provide access to information stored in the GERALD directory.
+"""
+from datetime import datetime, date
+from glob import glob
+import logging
+import os
+import stat
+import time
+import types
+
+from htsworkflow.pipeline.runfolder import \
+   ElementTree, \
+   EUROPEAN_STRPTIME, \
+   LANES_PER_FLOWCELL, \
+   VERSION_RE
+from htsworkflow.util.ethelp import indent, flatten
+from htsworkflow.util.opener import autoopen
+
+class Gerald(object):
+    """
+    Capture meaning out of the GERALD directory
+    """
+    XML_VERSION = 1
+    GERALD='Gerald'
+    RUN_PARAMETERS='RunParameters'
+    SUMMARY='Summary'
+
+    class LaneParameters(object):
+        """
+        Make it easy to access elements of LaneSpecificRunParameters from python
+        """
+        def __init__(self, gerald, key):
+            self._gerald = gerald
+            self._key = key
+        
+        def __get_attribute(self, xml_tag):
+            subtree = self._gerald.tree.find('LaneSpecificRunParameters')
+            container = subtree.find(xml_tag)
+            if container is None:
+                return None
+            if len(container.getchildren()) > LANES_PER_FLOWCELL:
+                raise RuntimeError('GERALD config.xml file changed')
+            lanes = [x.tag.split('_')[1] for x in container.getchildren()]
+            index = lanes.index(self._key)
+            element = container[index]
+            return element.text
+        def _get_analysis(self):
+            return self.__get_attribute('ANALYSIS')
+        analysis = property(_get_analysis)
+
+        def _get_eland_genome(self):
+            genome = self.__get_attribute('ELAND_GENOME')
+            # default to the chipwide parameters if there isn't an
+            # entry in the lane specific paramaters
+            if genome is None:
+                subtree = self._gerald.tree.find('ChipWideRunParameters')
+                container = subtree.find('ELAND_GENOME')
+                genome = container.text
+            return genome
+        eland_genome = property(_get_eland_genome)
+
+        def _get_read_length(self):
+            return self.__get_attribute('READ_LENGTH')
+        read_length = property(_get_read_length)
+
+        def _get_use_bases(self):
+            return self.__get_attribute('USE_BASES')
+        use_bases = property(_get_use_bases)
+
+    class LaneSpecificRunParameters(object):
+        """
+        Provide access to LaneSpecificRunParameters
+        """
+        def __init__(self, gerald):
+            self._gerald = gerald
+            self._keys = None
+        def __getitem__(self, key):
+            return Gerald.LaneParameters(self._gerald, key)
+        def keys(self):
+            if self._keys is None:
+                tree = self._gerald.tree
+                analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
+                # according to the pipeline specs I think their fields 
+                # are sampleName_laneID, with sampleName defaulting to s
+                # since laneIDs are constant lets just try using 
+                # those consistently.
+                self._keys = [ x.tag.split('_')[1] for x in analysis]
+            return self._keys
+        def values(self):
+            return [ self[x] for x in self.keys() ]
+        def items(self):
+            return zip(self.keys(), self.values())
+        def __len__(self):
+            return len(self.keys())
+
+    def __init__(self, xml=None):
+        self.pathname = None
+        self.tree = None
+
+        # parse lane parameters out of the config.xml file
+        self.lanes = Gerald.LaneSpecificRunParameters(self)
+
+        self.summary = None
+        self.eland_results = None
+
+        if xml is not None:
+            self.set_elements(xml)
+
+    def _get_date(self):
+        if self.tree is None:
+            return datetime.today()
+        timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
+        epochstamp = time.mktime(time.strptime(timestamp, '%c'))
+        return datetime.fromtimestamp(epochstamp)
+    date = property(_get_date)
+
+    def _get_time(self):
+        return time.mktime(self.date.timetuple())
+    time = property(_get_time, doc='return run time as seconds since epoch')
+
+    def _get_version(self):
+        if self.tree is None:
+            return None
+        return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
+    version = property(_get_version)
+
+    def dump(self):
+        """
+        Debugging function, report current object
+        """
+        print 'Gerald version:', self.version
+        print 'Gerald run date:', self.date
+        print 'Gerald config.xml:', self.tree
+        self.summary.dump()
+
+    def get_elements(self):
+        if self.tree is None or self.summary is None:
+            return None
+
+        gerald = ElementTree.Element(Gerald.GERALD, 
+                                     {'version': unicode(Gerald.XML_VERSION)})
+        gerald.append(self.tree)
+        gerald.append(self.summary.get_elements())
+        if self.eland_results:
+            gerald.append(self.eland_results.get_elements())
+        return gerald
+
+    def set_elements(self, tree):
+        if tree.tag !=  Gerald.GERALD:
+            raise ValueError('exptected GERALD')
+        xml_version = int(tree.attrib.get('version', 0))
+        if xml_version > Gerald.XML_VERSION:
+            logging.warn('XML tree is a higher version than this class')
+        for element in list(tree):
+            tag = element.tag.lower()
+            if tag == Gerald.RUN_PARAMETERS.lower():
+                self.tree = element
+            elif tag == Gerald.SUMMARY.lower():
+                self.summary = Summary(xml=element)
+            elif tag == ELAND.ELAND.lower():
+                self.eland_results = ELAND(xml=element)
+            else:
+                logging.warn("Unrecognized tag %s" % (element.tag,))
+        
+
+def gerald(pathname):
+    g = Gerald()
+    g.pathname = pathname
+    path, name = os.path.split(pathname)
+    config_pathname = os.path.join(pathname, 'config.xml')
+    g.tree = ElementTree.parse(config_pathname).getroot()
+
+    # parse Summary.htm file
+    summary_pathname = os.path.join(pathname, 'Summary.htm')
+    g.summary = Summary(summary_pathname)
+    # parse eland files
+    g.eland_results = eland(g.pathname, g)
+    return g
+
+def tonumber(v):
+    """
+    Convert a value to int if its an int otherwise a float.
+    """
+    try:
+        v = int(v)
+    except ValueError, e:
+        v = float(v)
+    return v
+
+def parse_mean_range(value):
+    """
+    Parse values like 123 +/- 4.5
+    """
+    if value.strip() == 'unknown':
+       return 0, 0
+
+    average, pm, deviation = value.split()
+    if pm != '+/-':
+        raise RuntimeError("Summary.htm file format changed")
+    return tonumber(average), tonumber(deviation)
+
+def make_mean_range_element(parent, name, mean, deviation):
+    """
+    Make an ElementTree subelement <Name mean='mean', deviation='deviation'/>
+    """
+    element = ElementTree.SubElement(parent, name,
+                                     { 'mean': unicode(mean),
+                                       'deviation': unicode(deviation)})
+    return element
+
+def parse_mean_range_element(element):
+    """
+    Grab mean/deviation out of element
+    """
+    return (tonumber(element.attrib['mean']), 
+            tonumber(element.attrib['deviation']))
+
+def parse_summary_element(element):
+    """
+    Determine if we have a simple element or a mean/deviation element
+    """
+    if len(element.attrib) > 0:
+        return parse_mean_range_element(element)
+    else:
+        return element.text
+
+class Summary(object):
+    """
+    Extract some useful information from the Summary.htm file
+    """
+    XML_VERSION = 2
+    SUMMARY = 'Summary'
+
+    class LaneResultSummary(object):
+        """
+        Parse the LaneResultSummary table out of Summary.htm
+        Mostly for the cluster number
+        """
+        LANE_RESULT_SUMMARY = 'LaneResultSummary'
+        TAGS = { 
+          'LaneYield': 'lane_yield',
+          'Cluster': 'cluster', # Raw
+          'ClusterPF': 'cluster_pass_filter',
+          'AverageFirstCycleIntensity': 'average_first_cycle_intensity',
+          'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles',
+          'PercentPassFilterClusters': 'percent_pass_filter_clusters',
+          'PercentPassFilterAlign': 'percent_pass_filter_align',
+          'AverageAlignmentScore': 'average_alignment_score',
+          'PercentErrorRate': 'percent_error_rate'
+        }
+                 
+        def __init__(self, html=None, xml=None):
+            self.lane = None
+            self.lane_yield = None
+            self.cluster = None
+            self.cluster_pass_filter = None
+            self.average_first_cycle_intensity = None
+            self.percent_intensity_after_20_cycles = None
+            self.percent_pass_filter_clusters = None
+            self.percent_pass_filter_align = None
+            self.average_alignment_score = None
+            self.percent_error_rate = None
+
+            if html is not None:
+                self.set_elements_from_html(html)
+            if xml is not None:
+                self.set_elements(xml)
+
+        def set_elements_from_html(self, data):
+            if not len(data) in (8,10):
+                raise RuntimeError("Summary.htm file format changed")
+
+            # same in pre-0.3.0 Summary file and 0.3 summary file
+            self.lane = data[0]
+
+            if len(data) == 8:
+                parsed_data = [ parse_mean_range(x) for x in data[1:] ]
+                # this is the < 0.3 Pipeline version
+                self.cluster = parsed_data[0]
+                self.average_first_cycle_intensity = parsed_data[1]
+                self.percent_intensity_after_20_cycles = parsed_data[2]
+                self.percent_pass_filter_clusters = parsed_data[3]
+                self.percent_pass_filter_align = parsed_data[4]
+                self.average_alignment_score = parsed_data[5]
+                self.percent_error_rate = parsed_data[6]
+            elif len(data) == 10:
+                parsed_data = [ parse_mean_range(x) for x in data[2:] ]
+                # this is the >= 0.3 summary file
+                self.lane_yield = data[1]
+                self.cluster = parsed_data[0]
+                self.cluster_pass_filter = parsed_data[1]
+                self.average_first_cycle_intensity = parsed_data[2]
+                self.percent_intensity_after_20_cycles = parsed_data[3]
+                self.percent_pass_filter_clusters = parsed_data[4]
+                self.percent_pass_filter_align = parsed_data[5]
+                self.average_alignment_score = parsed_data[6]
+                self.percent_error_rate = parsed_data[7]
+
+        def get_elements(self):
+            lane_result = ElementTree.Element(
+                            Summary.LaneResultSummary.LANE_RESULT_SUMMARY, 
+                            {'lane': self.lane})
+            for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
+                value = getattr(self, variable_name)
+                if value is None:
+                    continue
+                # it looks like a sequence
+                elif type(value) in (types.TupleType, types.ListType):
+                    element = make_mean_range_element(
+                      lane_result,
+                      tag,
+                      *value
+                    )
+                else:
+                    element = ElementTree.SubElement(lane_result, tag)
+                    element.text = value
+            return lane_result
+
+        def set_elements(self, tree):
+            if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
+                raise ValueError('Expected %s' % (
+                        Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
+            self.lane = tree.attrib['lane']
+            tags = Summary.LaneResultSummary.TAGS
+            for element in list(tree):
+                try:
+                    variable_name = tags[element.tag]
+                    setattr(self, variable_name, 
+                            parse_summary_element(element))
+                except KeyError, e:
+                    logging.warn('Unrecognized tag %s' % (element.tag,))
+
+    def __init__(self, filename=None, xml=None):
+        self.lane_results = {}
+
+        if filename is not None:
+            self._extract_lane_results(filename)
+        if xml is not None:
+            self.set_elements(xml)
+
+    def __getitem__(self, key):
+        return self.lane_results[key]
+
+    def __len__(self):
+        return len(self.lane_results)
+
+    def keys(self):
+        return self.lane_results.keys()
+
+    def values(self):
+        return self.lane_results.values()
+
+    def items(self):
+        return self.lane_results.items()
+
+    def _flattened_row(self, row):
+        """
+        flatten the children of a <tr>...</tr>
+        """
+        return [flatten(x) for x in row.getchildren() ]
+    
+    def _parse_table(self, table):
+        """
+        assumes the first line is the header of a table, 
+        and that the remaining rows are data
+        """
+        rows = table.getchildren()
+        data = []
+        for r in rows:
+            data.append(self._flattened_row(r))
+        return data
+    
+    def _extract_named_tables(self, pathname):
+        """
+        extract all the 'named' tables from a Summary.htm file
+        and return as a dictionary
+        
+        Named tables are <h2>...</h2><table>...</table> pairs
+        The contents of the h2 tag is considered to the name
+        of the table.
+        """
+        tree = ElementTree.parse(pathname).getroot()
+        body = tree.find('body')
+        tables = {}
+        for i in range(len(body)):
+            if body[i].tag == 'h2' and body[i+1].tag == 'table':
+                # we have an interesting table
+                name = flatten(body[i])
+                table = body[i+1]
+                data = self._parse_table(table)
+                tables[name] = data
+        return tables
+
+    def _extract_lane_results(self, pathname):
+        """
+        extract the Lane Results Summary table
+        """
+
+        tables = self._extract_named_tables(pathname)
+
+        # parse lane result summary
+        lane_summary = tables['Lane Results Summary']
+        # this is version 1 of the summary file
+        if len(lane_summary[-1]) == 8:
+            # strip header
+            headers = lane_summary[0]
+            # grab the lane by lane data
+            lane_summary = lane_summary[1:]
+
+        # this is version 2 of the summary file
+        if len(lane_summary[-1]) == 10:
+            # lane_summary[0] is a different less specific header row
+            headers = lane_summary[1]
+            lane_summary = lane_summary[2:10]
+            # after the last lane, there's a set of chip wide averages
+
+        for r in lane_summary:
+            lrs = Summary.LaneResultSummary(html=r)
+            self.lane_results[lrs.lane] = lrs
+
+    def get_elements(self):
+        summary = ElementTree.Element(Summary.SUMMARY, 
+                                      {'version': unicode(Summary.XML_VERSION)})
+        for lane in self.lane_results.values():
+            summary.append(lane.get_elements())
+        return summary
+
+    def set_elements(self, tree):
+        if tree.tag != Summary.SUMMARY:
+            return ValueError("Expected %s" % (Summary.SUMMARY,))
+        xml_version = int(tree.attrib.get('version', 0))
+        if xml_version > Summary.XML_VERSION:
+            logging.warn('Summary XML tree is a higher version than this class')
+        for element in list(tree):
+            lrs = Summary.LaneResultSummary()
+            lrs.set_elements(element)
+            self.lane_results[lrs.lane] = lrs
+
+    def dump(self):
+        """
+        Debugging function, report current object
+        """
+        pass
+
+
+def build_genome_fasta_map(genome_dir):
+    # build fasta to fasta file map
+    genome = genome_dir.split(os.path.sep)[-1]
+    fasta_map = {}
+    for vld_file in glob(os.path.join(genome_dir, '*.vld')):
+        is_link = False
+        if os.path.islink(vld_file):
+            is_link = True
+        vld_file = os.path.realpath(vld_file)
+        path, vld_name = os.path.split(vld_file)
+        name, ext = os.path.splitext(vld_name)
+        if is_link:
+            fasta_map[name] = name
+        else:
+            fasta_map[name] = os.path.join(genome, name)
+    return fasta_map
+    
+class ElandLane(object):
+    """
+    Process an eland result file
+    """
+    XML_VERSION = 1
+    LANE = 'ElandLane'
+    SAMPLE_NAME = 'SampleName'
+    LANE_ID = 'LaneID'
+    GENOME_MAP = 'GenomeMap'
+    GENOME_ITEM = 'GenomeItem'
+    MAPPED_READS = 'MappedReads'
+    MAPPED_ITEM = 'MappedItem'
+    MATCH_CODES = 'MatchCodes'
+    MATCH_ITEM = 'Code'
+    READS = 'Reads'
+
+    def __init__(self, pathname=None, genome_map=None, xml=None):
+        self.pathname = pathname
+        self._sample_name = None
+        self._lane_id = None
+        self._reads = None
+        self._mapped_reads = None
+        self._match_codes = None
+        if genome_map is None:
+            genome_map = {}
+        self.genome_map = genome_map
+        
+        if xml is not None:
+            self.set_elements(xml)
+
+    def _update(self):
+        """
+        Actually read the file and actually count the reads
+        """
+        # can't do anything if we don't have a file to process
+        if self.pathname is None:
+            return
+
+        if os.stat(self.pathname)[stat.ST_SIZE] == 0:
+            raise RuntimeError("Eland isn't done, try again later.")
+
+        reads = 0
+        mapped_reads = {}
+
+        match_codes = {'NM':0, 'QC':0, 'RM':0, 
+                       'U0':0, 'U1':0, 'U2':0,
+                       'R0':0, 'R1':0, 'R2':0,
+                      }
+        for line in autoopen(self.pathname,'r'):
+            reads += 1
+            fields = line.split()
+            # code = fields[2]
+            # match_codes[code] = match_codes.setdefault(code, 0) + 1
+            # the QC/NM etc codes are in the 3rd field and always present
+            match_codes[fields[2]] += 1
+            # ignore lines that don't have a fasta filename
+            if len(fields) < 7:
+                continue
+            fasta = self.genome_map.get(fields[6], fields[6])
+            mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
+        self._match_codes = match_codes
+        self._mapped_reads = mapped_reads
+        self._reads = reads
+
+    def _update_name(self):
+        # extract the sample name
+        if self.pathname is None:
+            return
+
+        path, name = os.path.split(self.pathname)
+        split_name = name.split('_')
+        self._sample_name = split_name[0]
+        self._lane_id = split_name[1]
+
+    def _get_sample_name(self):
+        if self._sample_name is None:
+            self._update_name()
+        return self._sample_name
+    sample_name = property(_get_sample_name)
+
+    def _get_lane_id(self):
+        if self._lane_id is None:
+            self._update_name()
+        return self._lane_id
+    lane_id = property(_get_lane_id)
+
+    def _get_reads(self):
+        if self._reads is None:
+            self._update()
+        return self._reads
+    reads = property(_get_reads)
+
+    def _get_mapped_reads(self):
+        if self._mapped_reads is None:
+            self._update()
+        return self._mapped_reads
+    mapped_reads = property(_get_mapped_reads)
+
+    def _get_match_codes(self):
+        if self._match_codes is None:
+            self._update()
+        return self._match_codes
+    match_codes = property(_get_match_codes)
+
+    def get_elements(self):
+        lane = ElementTree.Element(ElandLane.LANE, 
+                                   {'version': 
+                                    unicode(ElandLane.XML_VERSION)})
+        sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
+        sample_tag.text = self.sample_name
+        lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
+        lane_tag.text = self.lane_id
+        genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
+        for k, v in self.genome_map.items():
+            item = ElementTree.SubElement(
+                genome_map, ElandLane.GENOME_ITEM, 
+                {'name':k, 'value':unicode(v)})
+        mapped_reads = ElementTree.SubElement(lane, ElandLane.MAPPED_READS)
+        for k, v in self.mapped_reads.items():
+            item = ElementTree.SubElement(
+                mapped_reads, ElandLane.MAPPED_ITEM, 
+                {'name':k, 'value':unicode(v)})
+        match_codes = ElementTree.SubElement(lane, ElandLane.MATCH_CODES)
+        for k, v in self.match_codes.items():
+            item = ElementTree.SubElement(
+                match_codes, ElandLane.MATCH_ITEM, 
+                {'name':k, 'value':unicode(v)})
+        reads = ElementTree.SubElement(lane, ElandLane.READS)
+        reads.text = unicode(self.reads)
+
+        return lane
+
+    def set_elements(self, tree):
+        if tree.tag != ElandLane.LANE:
+            raise ValueError('Exptecting %s' % (ElandLane.LANE,))
+
+        # reset dictionaries
+        self._mapped_reads = {}
+        self._match_codes = {}
+        
+        for element in tree:
+            tag = element.tag.lower()
+            if tag == ElandLane.SAMPLE_NAME.lower():
+                self._sample_name = element.text
+            elif tag == ElandLane.LANE_ID.lower():
+                self._lane_id = element.text
+            elif tag == ElandLane.GENOME_MAP.lower():
+                for child in element:
+                    name = child.attrib['name']
+                    value = child.attrib['value']
+                    self.genome_map[name] = value
+            elif tag == ElandLane.MAPPED_READS.lower():
+                for child in element:
+                    name = child.attrib['name']
+                    value = child.attrib['value']
+                    self._mapped_reads[name] = int(value)
+            elif tag == ElandLane.MATCH_CODES.lower():
+                for child in element:
+                    name = child.attrib['name']
+                    value = int(child.attrib['value'])
+                    self._match_codes[name] = value
+            elif tag == ElandLane.READS.lower():
+                self._reads = int(element.text)
+            else:
+                logging.warn("ElandLane unrecognized tag %s" % (element.tag,))
+
+def extract_eland_sequence(instream, outstream, start, end):
+    """
+    Extract a chunk of sequence out of an eland file
+    """
+    for line in instream:
+        record = line.split()
+        if len(record) > 1:
+            result = [record[0], record[1][start:end]]
+        else:
+            result = [record[0][start:end]]
+        outstream.write("\t".join(result))
+        outstream.write(os.linesep)
+
+class ELAND(object):
+    """
+    Summarize information from eland files
+    """
+    XML_VERSION = 1
+
+    ELAND = 'ElandCollection'
+    LANE = 'Lane'
+    LANE_ID = 'id'
+
+    def __init__(self, xml=None):
+        # we need information from the gerald config.xml
+        self.results = {}
+        
+        if xml is not None:
+            self.set_elements(xml)
+
+    def __len__(self):
+        return len(self.results)
+
+    def keys(self):
+        return self.results.keys()
+    
+    def values(self):
+        return self.results.values()
+
+    def items(self):
+        return self.results.items()
+
+    def __getitem__(self, key):
+        return self.results[key]
+
+    def get_elements(self):
+        root = ElementTree.Element(ELAND.ELAND, 
+                                   {'version': unicode(ELAND.XML_VERSION)})
+        for lane_id, lane in self.results.items():
+            eland_lane = lane.get_elements()
+            eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
+            root.append(eland_lane)
+        return root
+
+    def set_elements(self, tree):
+        if tree.tag.lower() != ELAND.ELAND.lower():
+            raise ValueError('Expecting %s', ELAND.ELAND)
+        for element in list(tree):
+            lane_id = element.attrib[ELAND.LANE_ID]
+            lane = ElandLane(xml=element)
+            self.results[lane_id] = lane
+
+def eland(basedir, gerald=None, genome_maps=None):
+    e = ELAND()
+
+    file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
+    if len(file_list) == 0:
+        # lets handle compressed eland files too
+        file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
+
+    for pathname in file_list:
+        # yes the lane_id is also being computed in ElandLane._update
+        # I didn't want to clutter up my constructor
+        # but I needed to persist the sample_name/lane_id for
+        # runfolder summary_report
+        path, name = os.path.split(pathname)
+        split_name = name.split('_')
+        lane_id = split_name[1]
+
+        if genome_maps is not None:
+            genome_map = genome_maps[lane_id]
+        elif gerald is not None:
+            genome_dir = gerald.lanes[lane_id].eland_genome
+            genome_map = build_genome_fasta_map(genome_dir)
+        else:
+            genome_map = {}
+
+        eland_result = ElandLane(pathname, genome_map)
+        e.results[lane_id] = eland_result
+    return e
diff --git a/htsworkflow/pipeline/recipe_parser.py b/htsworkflow/pipeline/recipe_parser.py
new file mode 100644 (file)
index 0000000..7f5ced6
--- /dev/null
@@ -0,0 +1,48 @@
+from xml import sax
+
+
+def get_cycles(recipe_xml_filepath):
+  """
+  returns the number of cycles found in Recipe*.xml
+  """
+  handler = CycleXmlHandler()
+  sax.parse(recipe_xml_filepath, handler)
+  return handler.cycle_count
+
+
+
+class CycleXmlHandler(sax.ContentHandler):
+
+  def __init__(self):
+    self.cycle_count = 0
+    self.in_protocol = False
+    sax.ContentHandler.__init__(self)
+
+
+  def startDocument(self):
+    self.cycle_count = 0
+    self.in_protocol = False
+
+
+  def startElement(self, name, attrs):
+
+    #Only count Incorporations as cycles if within
+    # the protocol section of the xml document.
+    if name == "Incorporation" and self.in_protocol:
+      #print 'Found a cycle!'
+      self.cycle_count += 1
+      return
+    
+    elif name == 'Protocol':
+      #print 'In protocol'
+      self.in_protocol = True
+      return
+
+    #print 'Skipping: %s' % (name)
+    
+
+  def endElement(self, name):
+    
+    if name == 'Protocol':
+      #print 'End protocol'
+      self.in_protocol = False
diff --git a/htsworkflow/pipeline/retrieve_config.py b/htsworkflow/pipeline/retrieve_config.py
new file mode 100644 (file)
index 0000000..72cff17
--- /dev/null
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser, IndentedHelpFormatter
+from ConfigParser import SafeConfigParser
+
+import logging
+import os
+import sys
+import urllib2
+
+CONFIG_SYSTEM = '/etc/ga_frontend/ga_frontend.conf'
+CONFIG_USER = os.path.expanduser('~/.ga_frontend.conf')
+
+#Disable or enable commandline arg parsing; disabled by default.
+DISABLE_CMDLINE = True
+
+class FlowCellNotFound(Exception): pass
+class WebError404(Exception): pass
+
+class DummyOptions:
+  """
+  Used when command line parsing is disabled; default
+  """
+  def __init__(self):
+    self.url = None
+    self.output_filepath = None
+    self.flowcell = None
+    self.genome_dir = None
+
+class PreformattedDescriptionFormatter(IndentedHelpFormatter):
+  
+  #def format_description(self, description):
+  #  
+  #  if description:
+  #      return description + "\n"
+  #  else:
+  #     return ""
+      
+  def format_epilog(self, epilog):
+    """
+    It was removing my preformated epilog, so this should override
+    that behavior! Muhahaha!
+    """
+    if epilog:
+        return "\n" + epilog + "\n"
+    else:
+        return ""
+
+
+def constructOptionParser():
+  """
+  returns a pre-setup optparser
+  """
+  global DISABLE_CMDLINE
+  
+  if DISABLE_CMDLINE:
+    return None
+  
+  parser = OptionParser(formatter=PreformattedDescriptionFormatter())
+
+  parser.set_description('Retrieves eland config file from ga_frontend web frontend.')
+  
+  parser.epilog = """
+Config File:
+  * %s (System wide)
+  * %s (User specific; overrides system)
+  * command line overrides all config file options
+  
+  Example Config File:
+  
+    [config_file_server]
+    base_host_url=http://somewhere.domain:port
+""" % (CONFIG_SYSTEM, CONFIG_USER)
+  
+  #Special formatter for allowing preformatted description.
+  ##parser.format_epilog(PreformattedDescriptionFormatter())
+
+  parser.add_option("-u", "--url",
+                    action="store", type="string", dest="url")
+  
+  parser.add_option("-o", "--output",
+                    action="store", type="string", dest="output_filepath")
+  
+  parser.add_option("-f", "--flowcell",
+                    action="store", type="string", dest="flowcell")
+
+  parser.add_option("-g", "--genome_dir",
+                    action="store", type="string", dest="genome_dir")
+  
+  #parser.set_default("url", "default")
+  
+  return parser
+
+def constructConfigParser():
+  """
+  returns a pre-setup config parser
+  """
+  parser = SafeConfigParser()
+  parser.read([CONFIG_SYSTEM, CONFIG_USER])
+  if not parser.has_section('config_file_server'):
+    parser.add_section('config_file_server')
+  if not parser.has_section('local_setup'):
+    parser.add_section('local_setup')
+  
+  return parser
+
+
+def getCombinedOptions():
+  """
+  Returns optparse options after it has be updated with ConfigParser
+  config files and merged with parsed commandline options.
+  """
+  cl_parser = constructOptionParser()
+  conf_parser = constructConfigParser()
+  
+  if cl_parser is None:
+    options = DummyOptions()
+  else:
+    options, args = cl_parser.parse_args()
+  
+  if options.url is None:
+    if conf_parser.has_option('config_file_server', 'base_host_url'):
+      options.url = conf_parser.get('config_file_server', 'base_host_url')
+
+  if options.genome_dir is None:
+    if conf_parser.has_option('local_setup', 'genome_dir'):
+      options.genome_dir = conf_parser.get('local_setup', 'genome_dir')
+  
+  print 'USING OPTIONS:'
+  print ' URL:', options.url
+  print ' OUT:', options.output_filepath
+  print '  FC:', options.flowcell
+  print 'GDIR:', options.genome_dir
+  print ''
+  
+  return options
+
+
+def saveConfigFile(flowcell, base_host_url, output_filepath):
+  """
+  retrieves the flowcell eland config file, give the base_host_url
+  (i.e. http://sub.domain.edu:port)
+  """
+  url = base_host_url + '/eland_config/%s/' % (flowcell)
+  
+  f = open(output_filepath, 'w')
+  #try:
+  try:
+    web = urllib2.urlopen(url)
+  except urllib2.URLError, e:
+    errmsg = 'URLError: %d' % (e.code,)
+    logging.error(errmsg)
+    logging.error('opened %s' % (url,))
+    logging.error('%s' % ( e.read(),))
+    raise IOError(errmsg)
+
+  #except IOError, msg:
+  #  if str(msg).find("Connection refused") >= 0:
+  #    print 'Error: Connection refused for: %s' % (url)
+  #    f.close()
+  #    sys.exit(1)
+  #  elif str(msg).find("Name or service not known") >= 0:
+  #    print 'Error: Invalid domain or ip address for: %s' % (url)
+  #    f.close()
+  #    sys.exit(2)
+  #  else:
+  #    raise IOError, msg
+
+  data = web.read()
+
+  if data.find('Hmm, config file for') >= 0:
+    msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url)
+    raise FlowCellNotFound, msg
+
+  if data.find('404 - Not Found') >= 0:
+    msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
+          "Did you get right port #?" % (flowcell, base_host_url, url)
+    raise FlowCellNotFound, msg
+  
+  f.write(data)
+  web.close()
+  f.close()
+  logging.info('Wrote config file to %s' % (output_filepath,))
+
+  
diff --git a/htsworkflow/pipeline/run_status.py b/htsworkflow/pipeline/run_status.py
new file mode 100644 (file)
index 0000000..39dc54c
--- /dev/null
@@ -0,0 +1,478 @@
+import glob
+import re
+import os
+import sys
+import time
+import threading
+
+s_comment = re.compile('^#')
+s_general_read_len = re.compile('^READ_LENGTH ')
+s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
+
+s_firecrest = None
+
+def _four_digit_num_in_string(num):
+  if num < 0:
+    pass
+  elif num < 10:
+    return '000' + str(num)
+  elif num < 100:
+    return '00' + str(num)
+  elif num < 1000:
+    return '0' + str(num)
+  elif num < 10000:
+    return str(num)
+
+  msg = 'Invalid number: %s' % (num)
+  raise ValueError, msg
+
+def _two_digit_num_in_string(num):
+  if num < 0:
+    pass
+  elif num < 10:
+    return '0' + str(num)
+  elif num < 100:
+    return str(num)
+
+  msg = 'Invalid number: %s' % (num)
+  raise ValueError, msg
+
+
+# FIRECREST PATTERNS
+# _p2f(<pattern>, lane, tile, cycle)
+PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
+
+# _p2f(<pattern>, lane, tile)
+PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
+PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
+PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
+PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
+PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
+PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
+PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
+PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
+
+
+# BUSTARD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
+PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
+
+
+
+# GERALD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
+PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
+PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
+PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
+PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
+PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
+PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
+PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
+PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
+PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
+PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
+PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
+PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
+
+# _p2f(<pattern>, lane)
+PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
+PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
+PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
+PATTERN_GERALD_CALLPNG = 's_%s_call.png'
+PATTERN_GERALD_ALLPNG = 's_%s_all.png'
+PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
+PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
+PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
+PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
+PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
+PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
+PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
+PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
+PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
+PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
+
+
+
+def _p2f(pattern, lane, tile=None, cycle=None):
+  """
+  Converts a pattern plus info into file names
+  """
+
+  # lane, and cycle provided (INVALID)
+  if tile is None and cycle is not None:
+    msg = "Handling of cycle without tile is not currently implemented."
+    raise ValueError, msg
+
+  # lane, tile, cycle provided
+  elif cycle:
+    return pattern % (lane,
+                      _four_digit_num_in_string(tile),
+                      _two_digit_num_in_string(cycle))
+  
+  # lane, tile provided
+  elif tile:
+    return pattern % (lane, _four_digit_num_in_string(tile))
+
+  # lane provided
+  else:
+    return pattern % (lane)
+    
+
+class GARunStatus(object):
+
+  def __init__(self, conf_filepath):
+    """
+    Given an eland config file in the top level directory
+    of a run, predicts the files that will be generated
+    during a run and provides methods for retrieving
+    (completed, total) for each step or entire run.
+    """
+    #print 'self._conf_filepath = %s' % (conf_filepath)
+    self._conf_filepath = conf_filepath
+    self._base_dir, junk = os.path.split(conf_filepath)
+    self._image_dir = os.path.join(self._base_dir, 'Images')
+    
+    self.lanes = []
+    self.lane_read_length = {}
+    self.tiles = None
+    self.cycles = None
+    
+    self.status = {}
+    self.status['firecrest'] = {}
+    self.status['bustard'] = {}
+    self.status['gerald'] = {}
+    
+    self._process_config()
+    self._count_tiles()
+    self._count_cycles()
+    self._generate_expected()
+
+
+  def _process_config(self):
+    """
+    Grabs info from self._conf_filepath
+    """
+    f = open(self._conf_filepath, 'r')
+
+    for line in f:
+
+      #Skip comment lines for now.
+      if s_comment.search(line):
+        continue
+
+      mo =  s_general_read_len.search(line)
+      if mo:
+        read_length = int(line[mo.end():])
+        #Handle general READ_LENGTH
+        for i in range(1,9):
+          self.lane_read_length[i] = read_length
+      
+      mo = s_read_len.search(line)
+      if mo:
+        read_length = int(line[mo.end():])
+        lanes, junk = line.split(':')
+
+        #Convert lanes from string of lanes to list of lane #s.
+        lanes = [ int(i) for i in lanes ]
+
+        
+        for lane in lanes:
+
+          #Keep track of which lanes are being run.
+          if lane not in self.lanes:
+            self.lanes.append(lane)
+
+          #Update with lane specific read lengths
+          self.lane_read_length[lane] = read_length
+
+        self.lanes.sort()
+
+
+  def _count_tiles(self):
+    """
+    Count the number of tiles being used
+    """
+    self.tiles = len(glob.glob(os.path.join(self._image_dir,
+                                            'L001',
+                                            'C1.1',
+                                            's_1_*_a.tif')))
+
+  def _count_cycles(self):
+    """
+    Figures out the number of cycles that are available
+    """
+    #print 'self._image_dir = %s' % (self._image_dir)
+    cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
+    #print 'cycle_dirs = %s' % (cycle_dirs)
+    cycle_list = []
+    for cycle_dir in cycle_dirs:
+      junk, c = os.path.split(cycle_dir)
+      cycle_list.append(int(c[1:c.find('.')]))
+
+    self.cycles = max(cycle_list)
+    
+
+
+
+  def _generate_expected(self):
+    """
+    generates a list of files we expect to find.
+    """
+
+    firecrest = self.status['firecrest']
+    bustard = self.status['bustard']
+    gerald = self.status['gerald']
+    
+    
+    for lane in self.lanes:
+      for tile in range(1,self.tiles+1):
+        for cycle in range(1, self.cycles+1):
+
+          ##########################
+          # LANE, TILE, CYCLE LAYER
+
+          # FIRECREST
+          firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
+
+
+        ###################
+        # LANE, TILE LAYER
+
+        # FIRECREST
+        firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
+
+
+        # BUSTARD
+        bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
+        bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
+
+
+        # GERALD
+        #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
+        gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
+
+      ###################
+      # LANE LAYER
+
+      # GERALD
+      #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
+      
+      
+
+    #################
+    # LOOPS FINISHED
+
+    # FIRECREST
+    firecrest['offsets_finished.txt'] = False
+    firecrest['finished.txt'] = False
+
+    # BUSTARD
+    bustard['finished.txt'] = False
+
+    # GERALD
+    gerald['tiles.txt'] = False
+    gerald['FullAll.htm'] = False
+    #gerald['All.htm.tmp'] = False
+    #gerald['Signal_Means.txt.tmp'] = False
+    #gerald['plotIntensity_for_IVC'] = False
+    #gerald['IVC.htm.tmp'] = False
+    gerald['FullError.htm'] = False
+    gerald['FullPerfect.htm'] = False
+    #gerald['Error.htm.tmp'] = False
+    #gerald['Perfect.htm.tmp'] = False
+    #gerald['Summary.htm.tmp'] = False
+    #gerald['Tile.htm.tmp'] = False
+    gerald['finished.txt'] = False
+    
+  def statusFirecrest(self):
+    """
+    returns (<completed>, <total>)
+    """
+    firecrest = self.status['firecrest']
+    total = len(firecrest)
+    completed = firecrest.values().count(True)
+
+    return (completed, total)
+
+
+  def statusBustard(self):
+    """
+    returns (<completed>, <total>)
+    """
+    bustard = self.status['bustard']
+    total = len(bustard)
+    completed = bustard.values().count(True)
+
+    return (completed, total)
+
+
+  def statusGerald(self):
+    """
+    returns (<completed>, <total>)
+    """
+    gerald = self.status['gerald']
+    total = len(gerald)
+    completed = gerald.values().count(True)
+
+    return (completed, total)
+
+
+  def statusTotal(self):
+    """
+    returns (<completed>, <total>)
+    """
+    #f = firecrest  c = completed
+    #b = bustard    t = total
+    #g = gerald
+    fc, ft = self.statusFirecrest()
+    bc, bt = self.statusBustard()
+    gc, gt = self.statusGerald()
+
+    return (fc+bc+gc, ft+bt+gt)
+
+
+  def statusReport(self):
+    """
+    Generate the basic percent complete report
+    """
+    def _percentCompleted(completed, total):
+      """
+      Returns precent completed as float
+      """
+      return (completed / float(total)) * 100
+
+    fc, ft = self.statusFirecrest()
+    bc, bt = self.statusBustard()
+    gc, gt = self.statusGerald()
+    tc, tt = self.statusTotal()
+    
+    fp = _percentCompleted(fc, ft)
+    bp = _percentCompleted(bc, bt)
+    gp = _percentCompleted(gc, gt)
+    tp = _percentCompleted(tc, tt)
+    
+    report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
+              '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
+              '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
+              '-----------------------',
+              '    Total: %s%% (%s/%s)' % (tp, tc, tt),
+             ]
+    return report
+
+  def updateFirecrest(self, filename):
+    """
+    Marks firecrest filename as being completed.
+    """
+    self.status['firecrest'][filename] = True
+    
+
+  def updateBustard(self, filename):
+    """
+    Marks bustard filename as being completed.
+    """
+    self.status['bustard'][filename] = True
+
+
+  def updateGerald(self, filename):
+    """
+    Marks gerald filename as being completed.
+    """
+    self.status['gerald'][filename] = True
+
+
+
+##################################################
+# Functions to be called by Thread(target=<func>)
+def _cmdLineStatusMonitorFunc(conf_info):
+  """
+  Given a ConfigInfo object, provides status to stdout.
+
+  You should probably use startCmdLineStatusMonitor()
+  instead of ths function.
+
+  Use with:
+    t = threading.Thread(target=_cmdLineStatusMonitorFunc,
+                         args=[conf_info])
+    t.setDaemon(True)
+    t.start()
+  """
+  SLEEP_AMOUNT = 30
+
+  while 1:
+    if conf_info.status is None:
+      print "No status object yet."
+      time.sleep(SLEEP_AMOUNT)
+      continue
+
+    report = conf_info.status.statusReport()
+    print os.linesep.join(report)
+    print
+
+    time.sleep(SLEEP_AMOUNT)
+
+
+#############################################
+# Start monitor thread convenience functions
+def startCmdLineStatusMonitor(conf_info):
+  """
+  Starts a command line status monitor given a conf_info object.
+  """
+  t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
+  t.setDaemon(True)
+  t.start()
+
+from optparse import OptionParser
+def make_parser():
+  usage = "%prog: config file"
+
+  parser = OptionParser()
+  return parser
+  
+def main(cmdline=None):
+  parser = make_parser()
+  opt, args = parser.parse_args(cmdline)
+
+  if len(args) != 1:
+    parser.error("need name of configuration file")
+    
+  status = GARunStatus(args[0])
+  print os.linesep.join(status.statusReport())
+  return 0
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
+                   
diff --git a/htsworkflow/pipeline/runfolder.py b/htsworkflow/pipeline/runfolder.py
new file mode 100644 (file)
index 0000000..492d103
--- /dev/null
@@ -0,0 +1,313 @@
+"""
+Core information needed to inspect a runfolder.
+"""
+from glob import glob
+import logging
+import os
+import re
+import shutil
+import stat
+import subprocess
+import sys
+import time
+
+try:
+  from xml.etree import ElementTree
+except ImportError, e:
+  from elementtree import ElementTree
+
+EUROPEAN_STRPTIME = "%d-%m-%Y"
+EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
+VERSION_RE = "([0-9\.]+)"
+USER_RE = "([a-zA-Z0-9]+)"
+LANES_PER_FLOWCELL = 8
+
+from htsworkflow.util.alphanum import alphanum
+from htsworkflow.util.ethelp import indent, flatten
+
+
+class PipelineRun(object):
+    """
+    Capture "interesting" information about a pipeline run
+    """
+    XML_VERSION = 1
+    PIPELINE_RUN = 'PipelineRun'
+    FLOWCELL_ID = 'FlowcellID'
+
+    def __init__(self, pathname=None, firecrest=None, bustard=None, gerald=None, xml=None):
+        if pathname is not None:
+          self.pathname = os.path.normpath(pathname)
+        else:
+          self.pathname = None
+        self._name = None
+        self._flowcell_id = None
+        self.firecrest = firecrest
+        self.bustard = bustard
+        self.gerald = gerald
+
+        if xml is not None:
+          self.set_elements(xml)
+    
+    def _get_flowcell_id(self):
+        # extract flowcell ID
+        if self._flowcell_id is None:
+          config_dir = os.path.join(self.pathname, 'Config')
+          flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+         if os.path.exists(flowcell_id_path):
+            flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+            self._flowcell_id = flowcell_id_tree.findtext('Text')
+         else:
+            path_fields = self.pathname.split('_')
+            if len(path_fields) > 0:
+              # guessing last element of filename
+              flowcell_id = path_fields[-1]
+            else:
+              flowcell_id = 'unknown'
+              
+           logging.warning(
+             "Flowcell id was not found, guessing %s" % (
+                flowcell_id))
+           self._flowcell_id = flowcell_id
+        return self._flowcell_id
+    flowcell_id = property(_get_flowcell_id)
+
+    def get_elements(self):
+        """
+        make one master xml file from all of our sub-components.
+        """
+        root = ElementTree.Element(PipelineRun.PIPELINE_RUN)
+        flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID)
+        flowcell.text = self.flowcell_id
+        root.append(self.firecrest.get_elements())
+        root.append(self.bustard.get_elements())
+        root.append(self.gerald.get_elements())
+        return root
+
+    def set_elements(self, tree):
+        # this file gets imported by all the others,
+        # so we need to hide the imports to avoid a cyclic imports
+        from htsworkflow.pipeline import firecrest
+        from htsworkflow.pipeline import bustard
+        from htsworkflow.pipeline import gerald
+
+        tag = tree.tag.lower()
+        if tag != PipelineRun.PIPELINE_RUN.lower():
+          raise ValueError('Pipeline Run Expecting %s got %s' % (
+              PipelineRun.PIPELINE_RUN, tag))
+        for element in tree:
+          tag = element.tag.lower()
+          if tag == PipelineRun.FLOWCELL_ID.lower():
+            self._flowcell_id = element.text
+          #ok the xword.Xword.XWORD pattern for module.class.constant is lame
+          elif tag == firecrest.Firecrest.FIRECREST.lower():
+            self.firecrest = firecrest.Firecrest(xml=element)
+          elif tag == bustard.Bustard.BUSTARD.lower():
+            self.bustard = bustard.Bustard(xml=element)
+          elif tag == gerald.Gerald.GERALD.lower():
+            self.gerald = gerald.Gerald(xml=element)
+          else:
+            logging.warn('PipelineRun unrecognized tag %s' % (tag,))
+
+    def _get_run_name(self):
+        """
+        Given a run tuple, find the latest date and use that as our name
+        """
+        if self._name is None:
+          tmax = max(self.firecrest.time, self.bustard.time, self.gerald.time)
+          timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
+          self._name = 'run_'+self.flowcell_id+"_"+timestamp+'.xml'
+        return self._name
+    name = property(_get_run_name)
+
+    def save(self, destdir=None):
+        if destdir is None:
+            destdir = ''
+        logging.info("Saving run report "+ self.name)
+        xml = self.get_elements()
+        indent(xml)
+        dest_pathname = os.path.join(destdir, self.name)
+        ElementTree.ElementTree(xml).write(dest_pathname)
+
+    def load(self, filename):
+        logging.info("Loading run report from " + filename)
+        tree = ElementTree.parse(filename).getroot()
+        self.set_elements(tree)
+
+def get_runs(runfolder):
+    """
+    Search through a run folder for all the various sub component runs
+    and then return a PipelineRun for each different combination.
+
+    For example if there are two different GERALD runs, this will
+    generate two different PipelineRun objects, that differ
+    in there gerald component.
+    """
+    from htsworkflow.pipeline import firecrest
+    from htsworkflow.pipeline import bustard
+    from htsworkflow.pipeline import gerald
+
+    datadir = os.path.join(runfolder, 'Data')
+
+    logging.info('Searching for runs in ' + datadir)
+    runs = []
+    for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
+        f = firecrest.firecrest(firecrest_pathname)
+        bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
+        for bustard_pathname in glob(bustard_glob):
+            b = bustard.bustard(bustard_pathname)
+            gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
+            for gerald_pathname in glob(gerald_glob):
+                try:
+                    g = gerald.gerald(gerald_pathname)
+                    runs.append(PipelineRun(runfolder, f, b, g))
+                except IOError, e:
+                    print "Ignoring", str(e)
+    return runs
+                
+    
+def extract_run_parameters(runs):
+    """
+    Search through runfolder_path for various runs and grab their parameters
+    """
+    for run in runs:
+      run.save()
+
+def summarize_mapped_reads(mapped_reads):
+    """
+    Summarize per chromosome reads into a genome count
+    But handle spike-in/contamination symlinks seperately.
+    """
+    summarized_reads = {}
+    genome_reads = 0
+    genome = 'unknown'
+    for k, v in mapped_reads.items():
+        path, k = os.path.split(k)
+        if len(path) > 0:
+            genome = path
+            genome_reads += v
+        else:
+            summarized_reads[k] = summarized_reads.setdefault(k, 0) + v
+    summarized_reads[genome] = genome_reads
+    return summarized_reads
+
+def summary_report(runs):
+    """
+    Summarize cluster numbers and mapped read counts for a runfolder
+    """
+    report = []
+    for run in runs:
+        # print a run name?
+        report.append('Summary for %s' % (run.name,))
+       # sort the report
+       eland_keys = run.gerald.eland_results.results.keys()
+       eland_keys.sort(alphanum)
+
+        lane_results = run.gerald.summary.lane_results
+       for lane_id in eland_keys:
+           result = run.gerald.eland_results.results[lane_id]
+            report.append("Sample name %s" % (result.sample_name))
+            report.append("Lane id %s" % (result.lane_id,))
+            cluster = lane_results[result.lane_id].cluster
+            report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
+            report.append("Total Reads: %d" % (result.reads))
+            mc = result._match_codes
+            nm = mc['NM']
+            nm_percent = float(nm)/result.reads  * 100
+            qc = mc['QC']
+            qc_percent = float(qc)/result.reads * 100
+
+           report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+           report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
+            report.append('Unique (0,1,2 mismatches) %d %d %d' % \
+                          (mc['U0'], mc['U1'], mc['U2']))
+            report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
+                          (mc['R0'], mc['R1'], mc['R2']))
+            report.append("Mapped Reads")
+            mapped_reads = summarize_mapped_reads(result.mapped_reads)
+            for name, counts in mapped_reads.items():
+              report.append("  %s: %d" % (name, counts))
+            report.append('---')
+            report.append('')
+        return os.linesep.join(report)
+
+def extract_results(runs, output_base_dir=None):
+    if output_base_dir is None:
+        output_base_dir = os.getcwd()
+
+    for r in runs:
+      result_dir = os.path.join(output_base_dir, r.flowcell_id)
+      logging.info("Using %s as result directory" % (result_dir,))
+      if not os.path.exists(result_dir):
+        os.mkdir(result_dir)
+      
+      # create cycle_dir
+      cycle = "C%d-%d" % (r.firecrest.start, r.firecrest.stop)
+      logging.info("Filling in %s" % (cycle,))
+      cycle_dir = os.path.join(result_dir, cycle)
+      if os.path.exists(cycle_dir):
+        logging.error("%s already exists, not overwriting" % (cycle_dir,))
+        continue
+      else:
+        os.mkdir(cycle_dir)
+
+      # copy stuff out of the main run
+      g = r.gerald
+
+      # save run file
+      r.save(cycle_dir)
+
+      # Copy Summary.htm
+      summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+      if os.path.exists(summary_path):
+          logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+          shutil.copy(summary_path, cycle_dir)
+      else:
+          logging.info('Summary file %s was not found' % (summary_path,))
+
+      # tar score files
+      score_files = []
+      for f in os.listdir(g.pathname):
+          if re.match('.*_score.txt', f):
+              score_files.append(f)
+
+      tar_cmd = ['/bin/tar', 'c'] + score_files
+      bzip_cmd = [ 'bzip2', '-9', '-c' ]
+      tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
+      tar_dest = open(tar_dest_name, 'w')
+      logging.info("Compressing score files in %s" % (g.pathname,))
+      logging.info("Running tar: " + " ".join(tar_cmd[:10]))
+      logging.info("Running bzip2: " + " ".join(bzip_cmd))
+      logging.info("Writing to %s" %(tar_dest_name))
+      
+      tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, cwd=g.pathname)
+      bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
+      tar.wait()
+
+      # copy & bzip eland files
+      for eland_lane in g.eland_results.values():
+          source_name = eland_lane.pathname
+          path, name = os.path.split(eland_lane.pathname)
+          dest_name = os.path.join(cycle_dir, name+'.bz2')
+
+          args = ['bzip2', '-9', '-c', source_name]
+          logging.info('Running: %s' % ( " ".join(args) ))
+          bzip_dest = open(dest_name, 'w')
+          bzip = subprocess.Popen(args, stdout=bzip_dest)
+          logging.info('Saving to %s' % (dest_name, ))
+          bzip.wait()
+
+def clean_runs(runs):
+    """
+    Clean up run folders to optimize for compression.
+    """
+    # TODO: implement this.
+    # rm RunLog*.xml
+    # rm pipeline_*.txt
+    # rm gclog.txt
+    # rm NetCopy.log
+    # rm nfn.log
+    # rm Images/L*
+    # cd Data/C1-*_Firecrest*
+    # make clean_intermediate
+
+    pass
diff --git a/htsworkflow/pipeline/test/test_genome_mapper.py b/htsworkflow/pipeline/test/test_genome_mapper.py
new file mode 100644 (file)
index 0000000..8916965
--- /dev/null
@@ -0,0 +1,33 @@
+import unittest
+
+from StringIO import StringIO
+from htsworkflow.pipeline import genome_mapper
+
+class testGenomeMapper(unittest.TestCase):
+    def test_construct_mapper(self):
+        genomes = {
+        'Arabidopsis thaliana': {'v01212004': '/arabidopsis'},
+        'Homo sapiens': {'hg18': '/hg18'},
+        'Mus musculus': {'mm8': '/mm8',
+                        'mm9': '/mm9',
+                        'mm10': '/mm10'},
+        'Phage': {'174': '/phi'},
+        }
+        genome_map = genome_mapper.constructMapperDict(genomes)
+        
+        self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+        self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi")
+        self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10")
+        self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+        self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10")
+        
+        self.failUnlessEqual(len(genome_map.keys()), 6)
+        self.failUnlessEqual(len(genome_map.values()), 6)
+        self.failUnlessEqual(len(genome_map.items()), 6)
+        
+        
+def suite():
+    return unittest.makeSuite(testGenomeMapper,'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest="suite")
diff --git a/htsworkflow/pipeline/test/test_runfolder026.py b/htsworkflow/pipeline/test/test_runfolder026.py
new file mode 100644 (file)
index 0000000..74247a6
--- /dev/null
@@ -0,0 +1,601 @@
+#!/usr/bin/env python
+
+from datetime import datetime, date
+import os
+import tempfile
+import shutil
+import unittest
+
+from htsworkflow.pipeline import firecrest
+from htsworkflow.pipeline import bustard
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
+
+
+def make_flowcell_id(runfolder_dir, flowcell_id=None):
+    if flowcell_id is None:
+        flowcell_id = '207BTAAXY'
+
+    config = """<?xml version="1.0"?>
+<FlowcellId>
+  <Text>%s</Text>
+</FlowcellId>""" % (flowcell_id,)
+    config_dir = os.path.join(runfolder_dir, 'Config')
+    
+    if not os.path.exists(config_dir):
+        os.mkdir(config_dir)
+    pathname = os.path.join(config_dir, 'FlowcellId.xml')
+    f = open(pathname,'w')
+    f.write(config)
+    f.close()
+
+def make_matrix(matrix_dir):
+    contents = """# Auto-generated frequency response matrix
+> A
+> C
+> G
+> T
+0.77 0.15 -0.04 -0.04 
+0.76 1.02 -0.05 -0.06 
+-0.10 -0.10 1.17 -0.03 
+-0.13 -0.12 0.80 1.27 
+"""
+    s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
+    f = open(s_matrix, 'w')
+    f.write(contents)
+    f.close()
+    
+def make_phasing_params(bustard_dir):
+    for lane in range(1,9):
+        pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
+        f = open(pathname, 'w')
+        f.write("""<Parameters>
+  <Phasing>0.009900</Phasing>
+  <Prephasing>0.003500</Prephasing>
+</Parameters>
+""")
+        f.close()
+
+def make_gerald_config(gerald_dir):
+    config_xml = """<RunParameters>
+<ChipWideRunParameters>
+  <ANALYSIS>default</ANALYSIS>
+  <BAD_LANES></BAD_LANES>
+  <BAD_TILES></BAD_TILES>
+  <CONTAM_DIR></CONTAM_DIR>
+  <CONTAM_FILE></CONTAM_FILE>
+  <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
+  <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
+  <ELAND_REPEAT></ELAND_REPEAT>
+  <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
+  <EMAIL_LIST>diane</EMAIL_LIST>
+  <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
+  <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
+  <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
+  <FORCE>1</FORCE>
+  <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
+  <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
+  <HAMSTER_FLAG>genome</HAMSTER_FLAG>
+  <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
+  <POST_RUN_COMMAND></POST_RUN_COMMAND>
+  <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
+  <PURE_BASES>12</PURE_BASES>
+  <QF_PARAMS>'((CHASTITY&gt;=0.6))'</QF_PARAMS>
+  <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
+  <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
+  <READ_LENGTH>32</READ_LENGTH>
+  <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
+  <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
+  <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
+  <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
+  <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
+  <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
+  <TILE_ROOT>s</TILE_ROOT>
+  <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
+  <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
+  <USE_BASES>all</USE_BASES>
+  <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
+</ChipWideRunParameters>
+<LaneSpecificRunParameters>
+  <ANALYSIS>
+    <s_1>eland</s_1>
+    <s_2>eland</s_2>
+    <s_3>eland</s_3>
+    <s_4>eland</s_4>
+    <s_5>eland</s_5>
+    <s_6>eland</s_6>
+    <s_7>eland</s_7>
+    <s_8>eland</s_8>
+  </ANALYSIS>
+  <ELAND_GENOME>
+    <s_1>/g/dm3</s_1>
+    <s_2>/g/equcab1</s_2>
+    <s_3>/g/equcab1</s_3>
+    <s_4>/g/canfam2</s_4>
+    <s_5>/g/hg18</s_5>
+    <s_6>/g/hg18</s_6>
+    <s_7>/g/hg18</s_7>
+    <s_8>/g/hg18</s_8>
+  </ELAND_GENOME>
+  <READ_LENGTH>
+    <s_1>32</s_1>
+    <s_2>32</s_2>
+    <s_3>32</s_3>
+    <s_4>32</s_4>
+    <s_5>32</s_5>
+    <s_6>32</s_6>
+    <s_7>32</s_7>
+    <s_8>32</s_8>
+  </READ_LENGTH>
+  <USE_BASES>
+    <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
+    <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
+    <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
+    <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
+    <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
+    <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
+    <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
+    <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
+  </USE_BASES>
+</LaneSpecificRunParameters>
+</RunParameters>
+"""
+    pathname = os.path.join(gerald_dir, 'config.xml')
+    f = open(pathname,'w')
+    f.write(config_xml)
+    f.close()
+    
+
+def make_summary_htm(gerald_dir):
+    summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
+<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
+<html>
+<body>
+
+<a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
+<h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
+<h2><br></br>Chip Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr><td>Machine</td><td>HWI-EAS229</td></tr>
+<tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
+<tr><td>Chip ID</td><td>unknown</td></tr>
+</table>
+<h2><br></br>Lane Parameter Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane</td>
+<td>Sample ID</td>
+<td>Sample Target</td>
+<td>Sample Type</td>
+<td>Length</td>
+<td>Filter</td>
+<td>Tiles</td>
+</tr>
+<tr>
+<td>1</td>
+<td>unknown</td>
+<td>dm3</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane1">Lane 1</a></td>
+</tr>
+<tr>
+<td>2</td>
+<td>unknown</td>
+<td>equcab1</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane2">Lane 2</a></td>
+</tr>
+<tr>
+<td>3</td>
+<td>unknown</td>
+<td>equcab1</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane3">Lane 3</a></td>
+</tr>
+<tr>
+<td>4</td>
+<td>unknown</td>
+<td>canfam2</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane4">Lane 4</a></td>
+</tr>
+<tr>
+<td>5</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane5">Lane 5</a></td>
+</tr>
+<tr>
+<td>6</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane6">Lane 6</a></td>
+</tr>
+<tr>
+<td>7</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane7">Lane 7</a></td>
+</tr>
+<tr>
+<td>8</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane8">Lane 8</a></td>
+</tr>
+</table>
+<h2><br></br>Lane Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+
+<td>Lane </td>
+<td>Clusters </td>
+<td>Av 1st Cycle Int </td>
+<td>% intensity after 20 cycles </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td> % Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>17421 +/- 2139</td>
+<td>7230 +/- 801</td>
+<td>23.73 +/- 10.79</td>
+<td>13.00 +/- 22.91</td>
+<td>32.03 +/- 18.45</td>
+<td>6703.57 +/- 3753.85</td>
+<td>4.55 +/- 4.81</td>
+</tr>
+<tr>
+<td>2</td>
+<td>20311 +/- 2402</td>
+<td>7660 +/- 678</td>
+<td>17.03 +/- 4.40</td>
+<td>40.74 +/- 30.33</td>
+<td>29.54 +/- 9.03</td>
+<td>5184.02 +/- 1631.54</td>
+<td>3.27 +/- 3.94</td>
+</tr>
+<tr>
+<td>3</td>
+<td>20193 +/- 2399</td>
+<td>7700 +/- 797</td>
+<td>15.75 +/- 3.30</td>
+<td>56.56 +/- 17.16</td>
+<td>27.33 +/- 7.48</td>
+<td>4803.49 +/- 1313.31</td>
+<td>3.07 +/- 2.86</td>
+</tr>
+<tr>
+<td>4</td>
+<td>15537 +/- 2531</td>
+<td>7620 +/- 1392</td>
+<td>15.37 +/- 3.79</td>
+<td>63.05 +/- 18.30</td>
+<td>15.88 +/- 4.99</td>
+<td>3162.13 +/- 962.59</td>
+<td>3.11 +/- 2.22</td>
+</tr>
+<tr>
+<td>5</td>
+<td>32047 +/- 3356</td>
+<td>8093 +/- 831</td>
+<td>23.79 +/- 6.18</td>
+<td>53.36 +/- 18.06</td>
+<td>48.04 +/- 13.77</td>
+<td>9866.23 +/- 2877.30</td>
+<td>2.26 +/- 1.16</td>
+</tr>
+<tr>
+<td>6</td>
+<td>32946 +/- 4753</td>
+<td>8227 +/- 736</td>
+<td>24.07 +/- 4.69</td>
+<td>54.65 +/- 12.57</td>
+<td>50.98 +/- 10.54</td>
+<td>10468.86 +/- 2228.53</td>
+<td>2.21 +/- 2.33</td>
+</tr>
+<tr>
+<td>7</td>
+<td>39504 +/- 4171</td>
+<td>8401 +/- 785</td>
+<td>22.55 +/- 4.56</td>
+<td>45.22 +/- 10.34</td>
+<td>48.41 +/- 9.67</td>
+<td>9829.40 +/- 1993.20</td>
+<td>2.26 +/- 1.11</td>
+</tr>
+<tr>
+<td>8</td>
+<td>37998 +/- 3792</td>
+<td>8443 +/- 1211</td>
+<td>39.03 +/- 7.52</td>
+<td>42.16 +/- 12.35</td>
+<td>40.98 +/- 14.89</td>
+<td>8128.87 +/- 3055.34</td>
+<td>3.57 +/- 2.77</td>
+</tr>
+</table>
+</body>
+</html>
+"""
+    pathname = os.path.join(gerald_dir, 'Summary.htm')
+    f = open(pathname, 'w')
+    f.write(summary_htm)
+    f.close()
+
+def make_eland_results(gerald_dir):
+    eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
+>HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
+>HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
+>HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
+"""
+    for i in range(1,9):
+        pathname = os.path.join(gerald_dir, 
+                                's_%d_eland_result.txt' % (i,))
+        f = open(pathname, 'w')
+        f.write(eland_result)
+        f.close()
+                     
+class RunfolderTests(unittest.TestCase):
+    """
+    Test components of the runfolder processing code
+    which includes firecrest, bustard, and gerald
+    """
+    def setUp(self):
+        # make a fake runfolder directory
+        self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
+
+        self.runfolder_dir = os.path.join(self.temp_dir, 
+                                          '080102_HWI-EAS229_0010_207BTAAXX')
+        os.mkdir(self.runfolder_dir)
+
+        self.data_dir = os.path.join(self.runfolder_dir, 'Data')
+        os.mkdir(self.data_dir)
+
+        self.firecrest_dir = os.path.join(self.data_dir, 
+                               'C1-33_Firecrest1.8.28_12-04-2008_diane'
+                             )
+        os.mkdir(self.firecrest_dir)
+        self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
+        os.mkdir(self.matrix_dir)
+        make_matrix(self.matrix_dir)
+
+        self.bustard_dir = os.path.join(self.firecrest_dir, 
+                                        'Bustard1.8.28_12-04-2008_diane')
+        os.mkdir(self.bustard_dir)
+        make_phasing_params(self.bustard_dir)
+        
+        self.gerald_dir = os.path.join(self.bustard_dir,
+                                       'GERALD_12-04-2008_diane')
+        os.mkdir(self.gerald_dir)
+        make_gerald_config(self.gerald_dir)
+        make_summary_htm(self.gerald_dir)
+        make_eland_results(self.gerald_dir)
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir)
+
+    def test_firecrest(self):
+        """
+        Construct a firecrest object
+        """
+        f = firecrest.firecrest(self.firecrest_dir)
+        self.failUnlessEqual(f.version, '1.8.28')
+        self.failUnlessEqual(f.start, 1)
+        self.failUnlessEqual(f.stop, 33)
+        self.failUnlessEqual(f.user, 'diane')
+        self.failUnlessEqual(f.date, date(2008,4,12))
+
+        xml = f.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+
+        f2 = firecrest.Firecrest(xml=xml)
+        self.failUnlessEqual(f.version, f2.version)
+        self.failUnlessEqual(f.start,   f2.start)
+        self.failUnlessEqual(f.stop,    f2.stop)
+        self.failUnlessEqual(f.user,    f2.user)
+        self.failUnlessEqual(f.date,    f2.date)
+
+    def test_bustard(self):
+        """
+        construct a bustard object
+        """
+        b = bustard.bustard(self.bustard_dir)
+        self.failUnlessEqual(b.version, '1.8.28')
+        self.failUnlessEqual(b.date,    date(2008,4,12))
+        self.failUnlessEqual(b.user,    'diane')
+        self.failUnlessEqual(len(b.phasing), 8)
+        self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
+        
+        xml = b.get_elements()
+        b2 = bustard.Bustard(xml=xml)
+        self.failUnlessEqual(b.version, b2.version)
+        self.failUnlessEqual(b.date,    b2.date )
+        self.failUnlessEqual(b.user,    b2.user)
+        self.failUnlessEqual(len(b.phasing), len(b2.phasing))
+        for key in b.phasing.keys():
+            self.failUnlessEqual(b.phasing[key].lane, 
+                                 b2.phasing[key].lane)
+            self.failUnlessEqual(b.phasing[key].phasing, 
+                                 b2.phasing[key].phasing)
+            self.failUnlessEqual(b.phasing[key].prephasing, 
+                                 b2.phasing[key].prephasing)
+
+    def test_gerald(self):
+        # need to update gerald and make tests for it
+        g = gerald.gerald(self.gerald_dir) 
+
+        self.failUnlessEqual(g.version, 
+            '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
+        self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
+        self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
+        self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
+
+        
+        # list of genomes, matches what was defined up in 
+        # make_gerald_config.
+        # the first None is to offset the genomes list to be 1..9
+        # instead of pythons default 0..8
+        genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
+                         '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
+
+        # test lane specific parameters from gerald config file
+        for i in range(1,9):
+            cur_lane = g.lanes[str(i)]
+            self.failUnlessEqual(cur_lane.analysis, 'eland')
+            self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
+            self.failUnlessEqual(cur_lane.read_length, '32')
+            self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
+
+        # test data extracted from summary file
+        clusters = [None, 
+                    (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
+                    (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
+
+        for i in range(1,9):
+            summary_lane = g.summary[str(i)]
+            self.failUnlessEqual(summary_lane.cluster, clusters[i])
+            self.failUnlessEqual(summary_lane.lane, str(i))
+
+        xml = g.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+        g2 = gerald.Gerald(xml=xml)
+
+        # do it all again after extracting from the xml file
+        self.failUnlessEqual(g.version, g2.version)
+        self.failUnlessEqual(g.date, g2.date)
+        self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
+        self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
+
+        # test lane specific parameters from gerald config file
+        for i in range(1,9):
+            g_lane = g.lanes[str(i)]
+            g2_lane = g2.lanes[str(i)]
+            self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
+            self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
+            self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
+            self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+
+        # test (some) summary elements
+        for i in range(1,9):
+            g_summary = g.summary[str(i)]
+            g2_summary = g2.summary[str(i)]
+            self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+            self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+            g_eland = g.eland_results
+            g2_eland = g2.eland_results
+            for lane in g_eland.keys():
+                self.failUnlessEqual(g_eland[lane].reads, 
+                                     g2_eland[lane].reads)
+                self.failUnlessEqual(len(g_eland[lane].mapped_reads), 
+                                     len(g2_eland[lane].mapped_reads))
+                for k in g_eland[lane].mapped_reads.keys():
+                    self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+                                         g2_eland[lane].mapped_reads[k])
+
+                self.failUnlessEqual(len(g_eland[lane].match_codes), 
+                                     len(g2_eland[lane].match_codes))
+                for k in g_eland[lane].match_codes.keys():
+                    self.failUnlessEqual(g_eland[lane].match_codes[k],
+                                         g2_eland[lane].match_codes[k])
+
+
+    def test_eland(self):
+        dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
+                    'chr2L.fa': 'dm3/chr2L.fa',
+                    'Lambda.fa': 'Lambda.fa'}
+        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
+                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+        eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
+        
+        for i in range(1,9):
+            lane = eland[str(i)]
+            self.failUnlessEqual(lane.reads, 4)
+            self.failUnlessEqual(lane.sample_name, "s")
+            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(len(lane.mapped_reads), 3)
+            self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
+            self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
+            self.failUnlessEqual(lane.match_codes['U1'], 2)
+            self.failUnlessEqual(lane.match_codes['NM'], 1)
+
+        xml = eland.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+        e2 = gerald.ELAND(xml=xml)
+
+        for i in range(1,9):
+            l1 = eland[str(i)]
+            l2 = e2[str(i)]
+            self.failUnlessEqual(l1.reads, l2.reads)
+            self.failUnlessEqual(l1.sample_name, l2.sample_name)
+            self.failUnlessEqual(l1.lane_id, l2.lane_id)
+            self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
+            self.failUnlessEqual(len(l1.mapped_reads), 3)
+            for k in l1.mapped_reads.keys():
+                self.failUnlessEqual(l1.mapped_reads[k],
+                                     l2.mapped_reads[k])
+
+            self.failUnlessEqual(len(l1.match_codes), 9)
+            self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
+            for k in l1.match_codes.keys():
+                self.failUnlessEqual(l1.match_codes[k], 
+                                     l2.match_codes[k])
+
+    def test_runfolder(self):
+        runs = runfolder.get_runs(self.runfolder_dir)
+        
+        # do we get the flowcell id from the filename?
+        self.failUnlessEqual(len(runs), 1)
+        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+
+        # do we get the flowcell id from the FlowcellId.xml file
+        make_flowcell_id(self.runfolder_dir, '207BTAAXY')
+        runs = runfolder.get_runs(self.runfolder_dir)
+        self.failUnlessEqual(len(runs), 1)
+        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+        
+        r1 = runs[0]
+        xml = r1.get_elements()
+        xml_str = ElementTree.tostring(xml)
+
+        r2 = runfolder.PipelineRun(xml=xml)
+        self.failUnlessEqual(r1.name, r2.name)
+        self.failIfEqual(r2.firecrest, None)
+        self.failIfEqual(r2.bustard, None)
+        self.failIfEqual(r2.gerald, None)
+        
+
+def suite():
+    return unittest.makeSuite(RunfolderTests,'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest="suite")
+    
diff --git a/htsworkflow/pipeline/test/test_runfolder030.py b/htsworkflow/pipeline/test/test_runfolder030.py
new file mode 100644 (file)
index 0000000..e5d1ea1
--- /dev/null
@@ -0,0 +1,1024 @@
+#!/usr/bin/env python
+
+from datetime import datetime, date
+import os
+import tempfile
+import shutil
+import unittest
+
+from htsworkflow.pipeline import firecrest
+from htsworkflow.pipeline import bustard
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
+
+
+def make_flowcell_id(runfolder_dir, flowcell_id=None):
+    if flowcell_id is None:
+        flowcell_id = '207BTAAXY'
+
+    config = """<?xml version="1.0"?>
+<FlowcellId>
+  <Text>%s</Text>
+</FlowcellId>""" % (flowcell_id,)
+    config_dir = os.path.join(runfolder_dir, 'Config')
+    
+    if not os.path.exists(config_dir):
+        os.mkdir(config_dir)
+    pathname = os.path.join(config_dir, 'FlowcellId.xml')
+    f = open(pathname,'w')
+    f.write(config)
+    f.close()
+
+def make_matrix(matrix_dir):
+    contents = """# Auto-generated frequency response matrix
+> A
+> C
+> G
+> T
+0.77 0.15 -0.04 -0.04 
+0.76 1.02 -0.05 -0.06 
+-0.10 -0.10 1.17 -0.03 
+-0.13 -0.12 0.80 1.27 
+"""
+    s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
+    f = open(s_matrix, 'w')
+    f.write(contents)
+    f.close()
+    
+def make_phasing_params(bustard_dir):
+    for lane in range(1,9):
+        pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
+        f = open(pathname, 'w')
+        f.write("""<Parameters>
+  <Phasing>0.009900</Phasing>
+  <Prephasing>0.003500</Prephasing>
+</Parameters>
+""")
+        f.close()
+
+def make_gerald_config(gerald_dir):
+    config_xml = """<RunParameters>
+<ChipWideRunParameters>
+  <ANALYSIS>default</ANALYSIS>
+  <BAD_LANES></BAD_LANES>
+  <BAD_TILES></BAD_TILES>
+  <CONTAM_DIR></CONTAM_DIR>
+  <CONTAM_FILE></CONTAM_FILE>
+  <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
+  <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
+  <ELAND_REPEAT></ELAND_REPEAT>
+  <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
+  <EMAIL_LIST>diane</EMAIL_LIST>
+  <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
+  <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
+  <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
+  <FORCE>1</FORCE>
+  <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
+  <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
+  <HAMSTER_FLAG>genome</HAMSTER_FLAG>
+  <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
+  <POST_RUN_COMMAND></POST_RUN_COMMAND>
+  <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
+  <PURE_BASES>12</PURE_BASES>
+  <QF_PARAMS>'((CHASTITY&gt;=0.6))'</QF_PARAMS>
+  <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
+  <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
+  <READ_LENGTH>32</READ_LENGTH>
+  <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
+  <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
+  <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
+  <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
+  <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
+  <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
+  <TILE_ROOT>s</TILE_ROOT>
+  <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
+  <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
+  <USE_BASES>all</USE_BASES>
+  <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
+</ChipWideRunParameters>
+<LaneSpecificRunParameters>
+  <ANALYSIS>
+    <s_1>eland</s_1>
+    <s_2>eland</s_2>
+    <s_3>eland</s_3>
+    <s_4>eland</s_4>
+    <s_5>eland</s_5>
+    <s_6>eland</s_6>
+    <s_7>eland</s_7>
+    <s_8>eland</s_8>
+  </ANALYSIS>
+  <ELAND_GENOME>
+    <s_1>/g/dm3</s_1>
+    <s_2>/g/equcab1</s_2>
+    <s_3>/g/equcab1</s_3>
+    <s_4>/g/canfam2</s_4>
+    <s_5>/g/hg18</s_5>
+    <s_6>/g/hg18</s_6>
+    <s_7>/g/hg18</s_7>
+    <s_8>/g/hg18</s_8>
+  </ELAND_GENOME>
+  <READ_LENGTH>
+    <s_1>32</s_1>
+    <s_2>32</s_2>
+    <s_3>32</s_3>
+    <s_4>32</s_4>
+    <s_5>32</s_5>
+    <s_6>32</s_6>
+    <s_7>32</s_7>
+    <s_8>32</s_8>
+  </READ_LENGTH>
+  <USE_BASES>
+    <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
+    <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
+    <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
+    <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
+    <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
+    <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
+    <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
+    <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
+  </USE_BASES>
+</LaneSpecificRunParameters>
+</RunParameters>
+"""
+    pathname = os.path.join(gerald_dir, 'config.xml')
+    f = open(pathname,'w')
+    f.write(config_xml)
+    f.close()
+    
+def make_summary_htm(gerald_dir):
+    summary_htm="""<!--RUN_TIME Wed Jul  2 06:47:44 2008 -->
+<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
+<html>
+<body>
+
+<a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
+<h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
+<h2><br></br>Chip Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr><td>Machine</td><td>HWI-EAS229</td></tr>
+<tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
+<tr><td>Chip ID</td><td>unknown</td></tr>
+</table>
+<h2><br></br>Chip Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Clusters</td>
+<td>Clusters (PF)</td>
+<td>Yield (kbases)</td>
+</tr>
+<tr><td>80933224</td>
+<td>43577803</td>
+<td>1133022</td>
+</tr>
+</table>
+<h2><br></br>Lane Parameter Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane</td>
+<td>Sample ID</td>
+<td>Sample Target</td>
+<td>Sample Type</td>
+<td>Length</td>
+<td>Filter</td>
+<td>Num Tiles</td>
+<td>Tiles</td>
+</tr>
+<tr>
+<td>1</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane1">Lane 1</a></td>
+</tr>
+<tr>
+<td>2</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane2">Lane 2</a></td>
+</tr>
+<tr>
+<td>3</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane3">Lane 3</a></td>
+</tr>
+<tr>
+<td>4</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane4">Lane 4</a></td>
+</tr>
+<tr>
+<td>5</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane5">Lane 5</a></td>
+</tr>
+<tr>
+<td>6</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane6">Lane 6</a></td>
+</tr>
+<tr>
+<td>7</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane7">Lane 7</a></td>
+</tr>
+<tr>
+<td>8</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane8">Lane 8</a></td>
+</tr>
+</table>
+<h2><br></br>Lane Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td colspan="2">Lane Info</td>
+<td colspan="8">Tile Mean +/- SD for Lane</td>
+</tr>
+<tr>
+<td>Lane </td>
+<td>Lane Yield (kbases) </td>
+<td>Clusters (raw)</td>
+<td>Clusters (PF) </td>
+<td>1st Cycle Int (PF) </td>
+<td>% intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Alignment Score (PF) </td>
+<td> % Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>158046</td>
+<td>96483 +/- 9074</td>
+<td>60787 +/- 4240</td>
+<td>329 +/- 35</td>
+<td>101.88 +/- 6.03</td>
+<td>63.21 +/- 3.29</td>
+<td>70.33 +/- 0.24</td>
+<td>9054.08 +/- 59.16</td>
+<td>0.46 +/- 0.18</td>
+</tr>
+<tr>
+<td>2</td>
+<td>156564</td>
+<td>133738 +/- 7938</td>
+<td>60217 +/- 1926</td>
+<td>444 +/- 39</td>
+<td>92.62 +/- 7.58</td>
+<td>45.20 +/- 3.31</td>
+<td>51.98 +/- 0.74</td>
+<td>6692.04 +/- 92.49</td>
+<td>0.46 +/- 0.09</td>
+</tr>
+<tr>
+<td>3</td>
+<td>185818</td>
+<td>152142 +/- 10002</td>
+<td>71468 +/- 2827</td>
+<td>366 +/- 36</td>
+<td>91.53 +/- 8.66</td>
+<td>47.19 +/- 3.80</td>
+<td>82.24 +/- 0.44</td>
+<td>10598.68 +/- 64.13</td>
+<td>0.41 +/- 0.04</td>
+</tr>
+<tr>
+<td>4</td>
+<td>34953</td>
+<td>15784 +/- 2162</td>
+<td>13443 +/- 1728</td>
+<td>328 +/- 40</td>
+<td>97.53 +/- 9.87</td>
+<td>85.29 +/- 1.91</td>
+<td>80.02 +/- 0.53</td>
+<td>10368.82 +/- 71.08</td>
+<td>0.15 +/- 0.05</td>
+</tr>
+<tr>
+<td>5</td>
+<td>167936</td>
+<td>119735 +/- 8465</td>
+<td>64590 +/- 2529</td>
+<td>417 +/- 37</td>
+<td>88.69 +/- 14.79</td>
+<td>54.10 +/- 2.59</td>
+<td>76.95 +/- 0.32</td>
+<td>9936.47 +/- 65.75</td>
+<td>0.28 +/- 0.02</td>
+</tr>
+<tr>
+<td>6</td>
+<td>173463</td>
+<td>152177 +/- 8146</td>
+<td>66716 +/- 2493</td>
+<td>372 +/- 39</td>
+<td>87.06 +/- 9.86</td>
+<td>43.98 +/- 3.12</td>
+<td>78.80 +/- 0.43</td>
+<td>10162.28 +/- 49.65</td>
+<td>0.38 +/- 0.03</td>
+</tr>
+<tr>
+<td>7</td>
+<td>149287</td>
+<td>84649 +/- 7325</td>
+<td>57418 +/- 3617</td>
+<td>295 +/- 28</td>
+<td>89.40 +/- 8.23</td>
+<td>67.97 +/- 1.82</td>
+<td>33.38 +/- 0.25</td>
+<td>4247.92 +/- 32.37</td>
+<td>1.00 +/- 0.03</td>
+</tr>
+<tr>
+<td>8</td>
+<td>106953</td>
+<td>54622 +/- 4812</td>
+<td>41136 +/- 3309</td>
+<td>284 +/- 37</td>
+<td>90.21 +/- 9.10</td>
+<td>75.39 +/- 2.27</td>
+<td>48.33 +/- 0.29</td>
+<td>6169.21 +/- 169.50</td>
+<td>0.86 +/- 1.22</td>
+</tr>
+<tr><td colspan="13">Tile mean across chip</td></tr>
+<tr>
+<td>Av.</td>
+<td></td>
+<td>101166</td>
+<td>54472</td>
+<td>354</td>
+<td>92.36</td>
+<td>60.29</td>
+<td>65.25</td>
+<td>8403.69</td>
+<td>0.50</td>
+</tr>
+</table>
+<h2><br></br>Expanded Lane Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+
+<tr><td colspan="2">Lane Info</td>
+<td colspan="2">Phasing Info</td>
+<td colspan="2">Raw Data (tile mean)</td>
+<td colspan="7">Filtered Data (tile mean)</td></tr>
+<td>Lane </td>
+<td>Clusters (tile mean) (raw)</td>
+<td>% Phasing </td>
+<td>% Prephasing </td>
+<td>% Error Rate (raw) </td>
+<td> Equiv Perfect Clusters (raw) </td>
+<td>% retained </td>
+<td>Cycle 2-4 Av Int (PF) </td>
+<td>Cycle 2-10 Av % Loss (PF) </td>
+<td>Cycle 10-20 Av % Loss (PF) </td>
+<td>% Align (PF) </td>
+<td>% Error Rate (PF) </td>
+<td> Equiv Perfect Clusters (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>96483</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.00</td>
+<td>49676</td>
+<td>63.21</td>
+<td>317 +/- 32</td>
+<td>0.13 +/- 0.44</td>
+<td>-1.14 +/- 0.34</td>
+<td>70.33</td>
+<td>0.46</td>
+<td>41758</td>
+</tr>
+<tr>
+<td>2</td>
+<td>133738</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.22</td>
+<td>40467</td>
+<td>45.20</td>
+<td>415 +/- 33</td>
+<td>0.29 +/- 0.40</td>
+<td>-0.79 +/- 0.35</td>
+<td>51.98</td>
+<td>0.46</td>
+<td>30615</td>
+</tr>
+<tr>
+<td>3</td>
+<td>152142</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.30</td>
+<td>78588</td>
+<td>47.19</td>
+<td>344 +/- 26</td>
+<td>0.68 +/- 0.51</td>
+<td>-0.77 +/- 0.42</td>
+<td>82.24</td>
+<td>0.41</td>
+<td>57552</td>
+</tr>
+<tr>
+<td>4</td>
+<td>15784</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>0.29</td>
+<td>11095</td>
+<td>85.29</td>
+<td>306 +/- 34</td>
+<td>0.20 +/- 0.69</td>
+<td>-1.28 +/- 0.66</td>
+<td>80.02</td>
+<td>0.15</td>
+<td>10671</td>
+</tr>
+<tr>
+<td>5</td>
+<td>119735</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>0.85</td>
+<td>60335</td>
+<td>54.10</td>
+<td>380 +/- 32</td>
+<td>0.34 +/- 0.49</td>
+<td>-1.55 +/- 4.69</td>
+<td>76.95</td>
+<td>0.28</td>
+<td>49015</td>
+</tr>
+<tr>
+<td>6</td>
+<td>152177</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.21</td>
+<td>70905</td>
+<td>43.98</td>
+<td>333 +/- 27</td>
+<td>0.57 +/- 0.50</td>
+<td>-0.91 +/- 0.39</td>
+<td>78.80</td>
+<td>0.38</td>
+<td>51663</td>
+</tr>
+<tr>
+<td>7</td>
+<td>84649</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.38</td>
+<td>21069</td>
+<td>67.97</td>
+<td>272 +/- 20</td>
+<td>1.15 +/- 0.52</td>
+<td>-0.84 +/- 0.58</td>
+<td>33.38</td>
+<td>1.00</td>
+<td>18265</td>
+</tr>
+<tr>
+<td>8</td>
+<td>54622</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.17</td>
+<td>21335</td>
+<td>75.39</td>
+<td>262 +/- 31</td>
+<td>1.10 +/- 0.59</td>
+<td>-1.01 +/- 0.47</td>
+<td>48.33</td>
+<td>0.86</td>
+<td>19104</td>
+</tr>
+</table>
+<b><br></br>IVC Plots</b>
+<p> <a href='IVC.htm' target="_blank"> IVC.htm
+ </a></p>
+<b><br></br>All Intensity Plots</b>
+<p> <a href='All.htm' target="_blank"> All.htm
+ </a></p>
+<b><br></br>Error graphs: </b>
+<p> <a href='Error.htm' target="_blank"> Error.htm
+ </a></p>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>0001</td>
+<td>114972</td>
+<td>326.48</td>
+<td>94.39</td>
+<td>57.44</td>
+<td>70.2</td>
+<td>9038.6</td>
+<td>0.44</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>2</td>
+<td>0001</td>
+<td>147793</td>
+<td>448.12</td>
+<td>83.68</td>
+<td>38.57</td>
+<td>53.7</td>
+<td>6905.4</td>
+<td>0.54</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>3</td>
+<td>0001</td>
+<td>167904</td>
+<td>374.05</td>
+<td>86.91</td>
+<td>40.36</td>
+<td>81.3</td>
+<td>10465.0</td>
+<td>0.47</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>4</td>
+<td>0001</td>
+<td>20308</td>
+<td>276.85</td>
+<td>92.87</td>
+<td>84.26</td>
+<td>80.4</td>
+<td>10413.8</td>
+<td>0.16</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>6</td>
+<td>0001</td>
+<td>166844</td>
+<td>348.12</td>
+<td>77.59</td>
+<td>38.13</td>
+<td>79.7</td>
+<td>10264.4</td>
+<td>0.44</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>7</td>
+<td>0001</td>
+<td>98913</td>
+<td>269.90</td>
+<td>86.66</td>
+<td>64.55</td>
+<td>33.2</td>
+<td>4217.5</td>
+<td>1.02</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>8</td>
+<td>0001</td>
+<td>64972</td>
+<td>243.60</td>
+<td>89.40</td>
+<td>73.17</td>
+<td>48.3</td>
+<td>6182.8</td>
+<td>0.71</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+</body>
+</html>
+"""
+    pathname = os.path.join(gerald_dir, 'Summary.htm')
+    f = open(pathname, 'w')
+    f.write(summary_htm)
+    f.close()
+
+def make_eland_results(gerald_dir):
+    eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
+>HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
+>HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
+>HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
+"""
+    for i in range(1,9):
+        pathname = os.path.join(gerald_dir, 
+                                's_%d_eland_result.txt' % (i,))
+        f = open(pathname, 'w')
+        f.write(eland_result)
+        f.close()
+
+def make_runfolder(obj=None):
+    """
+    Make a fake runfolder, attach all the directories to obj if defined
+    """
+    # make a fake runfolder directory
+    temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
+
+    runfolder_dir = os.path.join(temp_dir, 
+                                 '080102_HWI-EAS229_0010_207BTAAXX')
+    os.mkdir(runfolder_dir)
+
+    data_dir = os.path.join(runfolder_dir, 'Data')
+    os.mkdir(data_dir)
+
+    firecrest_dir = os.path.join(data_dir, 
+                                 'C1-33_Firecrest1.8.28_12-04-2008_diane'
+                                 )
+    os.mkdir(firecrest_dir)
+    matrix_dir = os.path.join(firecrest_dir, 'Matrix')
+    os.mkdir(matrix_dir)
+    make_matrix(matrix_dir)
+
+    bustard_dir = os.path.join(firecrest_dir, 
+                               'Bustard1.8.28_12-04-2008_diane')
+    os.mkdir(bustard_dir)
+    make_phasing_params(bustard_dir)
+
+    gerald_dir = os.path.join(bustard_dir,
+                              'GERALD_12-04-2008_diane')
+    os.mkdir(gerald_dir)
+    make_gerald_config(gerald_dir)
+    make_summary_htm(gerald_dir)
+    make_eland_results(gerald_dir)
+
+    if obj is not None:
+        obj.temp_dir = temp_dir
+        obj.runfolder_dir = runfolder_dir
+        obj.data_dir = data_dir
+        obj.firecrest_dir = firecrest_dir
+        obj.matrix_dir = matrix_dir
+        obj.bustard_dir = bustard_dir
+        obj.gerald_dir = gerald_dir
+        
+                     
+class RunfolderTests(unittest.TestCase):
+    """
+    Test components of the runfolder processing code
+    which includes firecrest, bustard, and gerald
+    """
+    def setUp(self):
+        # attaches all the directories to the object passed in
+        make_runfolder(self)
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir)
+
+    def test_firecrest(self):
+        """
+        Construct a firecrest object
+        """
+        f = firecrest.firecrest(self.firecrest_dir)
+        self.failUnlessEqual(f.version, '1.8.28')
+        self.failUnlessEqual(f.start, 1)
+        self.failUnlessEqual(f.stop, 33)
+        self.failUnlessEqual(f.user, 'diane')
+        self.failUnlessEqual(f.date, date(2008,4,12))
+
+        xml = f.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+
+        f2 = firecrest.Firecrest(xml=xml)
+        self.failUnlessEqual(f.version, f2.version)
+        self.failUnlessEqual(f.start,   f2.start)
+        self.failUnlessEqual(f.stop,    f2.stop)
+        self.failUnlessEqual(f.user,    f2.user)
+        self.failUnlessEqual(f.date,    f2.date)
+
+    def test_bustard(self):
+        """
+        construct a bustard object
+        """
+        b = bustard.bustard(self.bustard_dir)
+        self.failUnlessEqual(b.version, '1.8.28')
+        self.failUnlessEqual(b.date,    date(2008,4,12))
+        self.failUnlessEqual(b.user,    'diane')
+        self.failUnlessEqual(len(b.phasing), 8)
+        self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
+        
+        xml = b.get_elements()
+        b2 = bustard.Bustard(xml=xml)
+        self.failUnlessEqual(b.version, b2.version)
+        self.failUnlessEqual(b.date,    b2.date )
+        self.failUnlessEqual(b.user,    b2.user)
+        self.failUnlessEqual(len(b.phasing), len(b2.phasing))
+        for key in b.phasing.keys():
+            self.failUnlessEqual(b.phasing[key].lane, 
+                                 b2.phasing[key].lane)
+            self.failUnlessEqual(b.phasing[key].phasing, 
+                                 b2.phasing[key].phasing)
+            self.failUnlessEqual(b.phasing[key].prephasing, 
+                                 b2.phasing[key].prephasing)
+
+    def test_gerald(self):
+        # need to update gerald and make tests for it
+        g = gerald.gerald(self.gerald_dir) 
+
+        self.failUnlessEqual(g.version, 
+            '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
+        self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
+        self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
+        self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
+
+        
+        # list of genomes, matches what was defined up in 
+        # make_gerald_config.
+        # the first None is to offset the genomes list to be 1..9
+        # instead of pythons default 0..8
+        genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
+                         '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
+
+        # test lane specific parameters from gerald config file
+        for i in range(1,9):
+            cur_lane = g.lanes[str(i)]
+            self.failUnlessEqual(cur_lane.analysis, 'eland')
+            self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
+            self.failUnlessEqual(cur_lane.read_length, '32')
+            self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
+
+        # test data extracted from summary file
+        clusters = [None, 
+                    (96483, 9074), (133738, 7938), 
+                    (152142, 10002), (15784, 2162), 
+                    (119735, 8465), (152177, 8146),
+                    (84649, 7325), (54622, 4812),]
+
+        for i in range(1,9):
+            summary_lane = g.summary[str(i)]
+            self.failUnlessEqual(summary_lane.cluster, clusters[i])
+            self.failUnlessEqual(summary_lane.lane, str(i))
+
+        xml = g.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+        g2 = gerald.Gerald(xml=xml)
+
+        # do it all again after extracting from the xml file
+        self.failUnlessEqual(g.version, g2.version)
+        self.failUnlessEqual(g.date, g2.date)
+        self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
+        self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
+
+        # test lane specific parameters from gerald config file
+        for i in range(1,9):
+            g_lane = g.lanes[str(i)]
+            g2_lane = g2.lanes[str(i)]
+            self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
+            self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
+            self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
+            self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+
+        # test (some) summary elements
+        for i in range(1,9):
+            g_summary = g.summary[str(i)]
+            g2_summary = g2.summary[str(i)]
+            self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+            self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+            g_eland = g.eland_results
+            g2_eland = g2.eland_results
+            for lane in g_eland.keys():
+                self.failUnlessEqual(g_eland[lane].reads, 
+                                     g2_eland[lane].reads)
+                self.failUnlessEqual(len(g_eland[lane].mapped_reads), 
+                                     len(g2_eland[lane].mapped_reads))
+                for k in g_eland[lane].mapped_reads.keys():
+                    self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+                                         g2_eland[lane].mapped_reads[k])
+
+                self.failUnlessEqual(len(g_eland[lane].match_codes), 
+                                     len(g2_eland[lane].match_codes))
+                for k in g_eland[lane].match_codes.keys():
+                    self.failUnlessEqual(g_eland[lane].match_codes[k],
+                                         g2_eland[lane].match_codes[k])
+
+
+    def test_eland(self):
+        dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
+                    'chr2L.fa': 'dm3/chr2L.fa',
+                    'Lambda.fa': 'Lambda.fa'}
+        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
+                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+        eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
+        
+        for i in range(1,9):
+            lane = eland[str(i)]
+            self.failUnlessEqual(lane.reads, 4)
+            self.failUnlessEqual(lane.sample_name, "s")
+            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(len(lane.mapped_reads), 3)
+            self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
+            self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
+            self.failUnlessEqual(lane.match_codes['U1'], 2)
+            self.failUnlessEqual(lane.match_codes['NM'], 1)
+
+        xml = eland.get_elements()
+        # just make sure that element tree can serialize the tree
+        xml_str = ElementTree.tostring(xml)
+        e2 = gerald.ELAND(xml=xml)
+
+        for i in range(1,9):
+            l1 = eland[str(i)]
+            l2 = e2[str(i)]
+            self.failUnlessEqual(l1.reads, l2.reads)
+            self.failUnlessEqual(l1.sample_name, l2.sample_name)
+            self.failUnlessEqual(l1.lane_id, l2.lane_id)
+            self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
+            self.failUnlessEqual(len(l1.mapped_reads), 3)
+            for k in l1.mapped_reads.keys():
+                self.failUnlessEqual(l1.mapped_reads[k],
+                                     l2.mapped_reads[k])
+
+            self.failUnlessEqual(len(l1.match_codes), 9)
+            self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
+            for k in l1.match_codes.keys():
+                self.failUnlessEqual(l1.match_codes[k], 
+                                     l2.match_codes[k])
+
+    def test_runfolder(self):
+        runs = runfolder.get_runs(self.runfolder_dir)
+        
+        # do we get the flowcell id from the filename?
+        self.failUnlessEqual(len(runs), 1)
+        self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+
+        # do we get the flowcell id from the FlowcellId.xml file
+        make_flowcell_id(self.runfolder_dir, '207BTAAXY')
+        runs = runfolder.get_runs(self.runfolder_dir)
+        self.failUnlessEqual(len(runs), 1)
+        self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+        
+        r1 = runs[0]
+        xml = r1.get_elements()
+        xml_str = ElementTree.tostring(xml)
+
+        r2 = runfolder.PipelineRun(xml=xml)
+        self.failUnlessEqual(r1.name, r2.name)
+        self.failIfEqual(r2.firecrest, None)
+        self.failIfEqual(r2.bustard, None)
+        self.failIfEqual(r2.gerald, None)
+        
+
+def suite():
+    return unittest.makeSuite(RunfolderTests,'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest="suite")
+    
diff --git a/htsworkflow/util/__init__.py b/htsworkflow/util/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/htsworkflow/util/alphanum.py b/htsworkflow/util/alphanum.py
new file mode 100644 (file)
index 0000000..8893bdb
--- /dev/null
@@ -0,0 +1,61 @@
+#\r
+# The Alphanum Algorithm is an improved sorting algorithm for strings\r
+# containing numbers.  Instead of sorting numbers in ASCII order like\r
+# a standard sort, this algorithm sorts numbers in numeric order.\r
+#\r
+# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
+#\r
+#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
+#* Distributed under same license as original\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\r
+#\r
+\r
+import re\r
+\r
+#\r
+# TODO: Make decimal points be considered in the same class as digits\r
+#\r
+\r
+def chunkify(str):\r
+       """return a list of numbers and non-numeric substrings of +str+\r
+\r
+       the numeric substrings are converted to integer, non-numeric are left as is\r
+       """\r
+       chunks = re.findall("(\d+|\D+)",str)\r
+       chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
+       return chunks\r
+\r
+def alphanum(a,b):\r
+       """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
+\r
+       +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
+       Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
+       Numeric sorting, comparison of non-numeric gives Lexicographic order\r
+       """\r
+       # split strings into chunks\r
+       aChunks = chunkify(a)\r
+       bChunks = chunkify(b)\r
+\r
+       return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
+\r
+\r
+\r
+if __name__ == "__main__":\r
+       unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
+       sorted = unsorted[:]\r
+       sorted.sort(alphanum)\r
+       print '+++++Sorted...++++'\r
+       print '\n'.join(sorted)\r
diff --git a/htsworkflow/util/ethelp.py b/htsworkflow/util/ethelp.py
new file mode 100644 (file)
index 0000000..19f6c9f
--- /dev/null
@@ -0,0 +1,32 @@
+"""
+ElementTree helper functions
+"""
+def indent(elem, level=0):
+    """
+    reformat an element tree to be 'pretty' (indented)
+    """
+    i = "\n" + level*"  "
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = i + "  "
+        for child in elem:
+            indent(child, level+1)
+        # we don't want the closing tag indented too far
+        child.tail = i
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+    else:
+        if level and (not elem.tail or not elem.tail.strip()):
+            elem.tail = i
+
+def flatten(elem, include_tail=0):
+    """
+    Extract the text from an element tree 
+    (AKA extract the text that not part of XML tags)
+    """
+    text = elem.text or ""
+    for e in elem:
+        text += flatten(e, 1)
+    if include_tail and elem.tail: text += elem.tail
+    return text
+
diff --git a/htsworkflow/util/fctracker.py b/htsworkflow/util/fctracker.py
new file mode 100644 (file)
index 0000000..57b5dcf
--- /dev/null
@@ -0,0 +1,201 @@
+"""
+Provide some quick and dirty access and reporting for the fctracker database.
+
+The advantage to this code is that it doesn't depend on django being
+installed, so it can run on machines other than the webserver.
+"""
+import datetime
+import os
+import re
+import sys
+import time
+
+if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
+  # we're python 2.5
+  import sqlite3
+else:
+  import pysqlite2.dbapi2 as sqlite3
+
+
+class fctracker:
+    """
+    provide a simple way to interact with the flowcell data in fctracker.db
+    """
+    def __init__(self, database):
+        # default to the current directory
+        if database is None: 
+            self.database = self._guess_fctracker_path()
+        else:
+            self.database = database
+        self.conn = sqlite3.connect(self.database)
+        self._get_library()
+        self._get_species()
+
+    def _guess_fctracker_path(self):
+        """
+        Guess a few obvious places for the database
+        """
+        fctracker = 'fctracker.db'
+        name = fctracker
+        # is it in the current dir?
+        if os.path.exists(name): 
+            return name
+        name = os.path.expanduser(os.path.join('~', fctracker))
+        if os.path.exists(name):
+            return name
+        raise RuntimeError("Can't find fctracker")
+
+    def _make_dict_from_table(self, table_name, pkey_name):
+        """
+        Convert a django table into a dictionary indexed by the primary key.
+        Yes, it really does just load everything into memory, hopefully
+        we stay under a few tens of thousands of runs for a while.
+        """
+        table = {}
+        c = self.conn.cursor()
+        c.execute('select * from %s;' % (table_name))
+        # extract just the field name
+        description = [ f[0] for f in c.description]
+        for row in c:
+            row_dict = dict(zip(description, row))
+            table[row_dict[pkey_name]] = row_dict
+        c.close()
+        return table
+
+    def _add_lanes_to_libraries(self):
+        """
+        add flowcell/lane ids to new attribute 'lanes' in the library dictionary
+        """
+        library_id_re = re.compile('lane_\d_library_id')
+
+        for fc_id, fc in self.flowcells.items():
+            lane_library = [ (x[0][5], x[1]) for x in fc.items() 
+                                             if library_id_re.match(x[0]) ]
+            for lane, library_id in lane_library:
+                if not self.library[library_id].has_key('lanes'):
+                    self.library[library_id]['lanes'] = []
+                self.library[library_id]['lanes'].append((fc_id, lane))
+
+    def _get_library(self):
+        """
+        attach the library dictionary to the instance
+        """
+        self.library = self._make_dict_from_table(
+                         'fctracker_library', 
+                         'library_id')
+                                                  
+        
+    def _get_species(self):
+        """
+        attach the species dictionary to the instance
+        """
+        self.species = self._make_dict_from_table(
+                         'fctracker_species',
+                         'id'
+                       )
+        
+    def _get_flowcells(self, where=None):
+        """
+        attach the flowcell dictionary to the instance
+
+        where is a sql where clause. (eg "where run_date > '2008-1-1'")
+        that can be used to limit what flowcells we select
+        FIXME: please add sanitization code
+        """
+        if where is None:
+            where = ""
+        self.flowcells = {}
+        c = self.conn.cursor()
+        c.execute('select * from fctracker_flowcell %s;' % (where))
+        # extract just the field name
+        description = [ f[0] for f in c.description ]
+        for row in c:
+            row_dict = dict(zip(description, row))
+            fcid, status = self._parse_flowcell_id(row_dict)
+            row_dict['flowcell_id'] = fcid
+            row_dict['flowcell_status'] = status
+
+            for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
+                lane_library = self.library[row_dict[lane+"_id"]]
+                species_id = lane_library['library_species_id']
+                lane_library['library_species'] = self.species[species_id]
+                row_dict[lane] = lane_library
+            # some useful parsing
+            run_date = time.strptime(row_dict['run_date'],  '%Y-%m-%d %H:%M:%S')
+            run_date = datetime.datetime(*run_date[:6])
+            row_dict['run_date'] = run_date
+            self.flowcells[row_dict['flowcell_id']] = row_dict
+
+        self._add_lanes_to_libraries()
+        return self.flowcells
+
+    def _parse_flowcell_id(self, flowcell_row):
+      """
+      Return flowcell id and status
+      
+      We stored the status information in the flowcell id name.
+      this was dumb, but database schemas are hard to update.
+      """
+      fields = flowcell_row['flowcell_id'].split()
+      fcid = None
+      status = None
+      if len(fields) > 0:
+        fcid = fields[0]
+      if len(fields) > 1:
+        status = fields[1]
+      return fcid, status
+      
+
+def flowcell_gone(cell):
+    """
+    Use a variety of heuristics to determine if the flowcell drive
+    has been deleted.
+    """
+    status = cell['flowcell_status']
+    if status is None:
+        return False
+    failures = ['failed', 'deleted', 'not run']
+    for f in failures:
+      if re.search(f, status):
+        return True
+    else:
+      return False
+
+def recoverable_drive_report(flowcells):
+    """
+    Attempt to report what flowcells are still on a hard drive
+    """
+    def format_status(status):
+      if status is None:
+        return ""
+      else:
+        return status+" "
+
+    # sort flowcells by run date
+    flowcell_list = []
+    for key, cell in flowcells.items():
+        flowcell_list.append( (cell['run_date'], key) )
+    flowcell_list.sort()
+
+    report = []
+    line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
+    line += "%(species)s"
+    for run_date, flowcell_id in flowcell_list:
+        cell = flowcells[flowcell_id]
+        if flowcell_gone(cell):
+            continue
+        for l in range(1,9):
+            lane = 'lane_%d' % (l)
+            cell_library = cell['%s_library'%(lane)]
+            fields = {
+              'date': cell['run_date'].strftime('%y-%b-%d'),
+              'id': cell['flowcell_id'],
+              'lane': l,
+              'library_name': cell_library['library_name'],
+              'library_id': cell['%s_library_id'%(lane)],
+              'species': cell_library['library_species']['scientific_name'],
+              'status': format_status(cell['flowcell_status']),
+            }
+            report.append(line % (fields))
+    return os.linesep.join(report)
+
diff --git a/htsworkflow/util/makebed.py b/htsworkflow/util/makebed.py
new file mode 100755 (executable)
index 0000000..4f2b17f
--- /dev/null
@@ -0,0 +1,142 @@
+"""
+Utility functions to make bedfiles.
+"""
+import os
+import re
+
+# map eland_result.txt sense 
+sense_map = { 'F': '+', 'R': '-'}
+sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
+
+def write_bed_header(outstream, name, description):
+  """
+  Produce the headerline for a bedfile
+  """
+  # provide default track names
+  if name is None: name = "track"
+  if description is None: description = "eland result file"
+  bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
+  bed_header += os.linesep
+  outstream.write(bed_header % (name, description))
+
+def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
+  """
+  read an eland result file from instream and write a bedfile to outstream
+  """
+  # indexes into fields in eland_result.txt file
+  SEQ = 1
+  CHR = 6
+  START = 7
+  SENSE = 8
+
+  write_bed_header(outstream, name, description)
+
+  for line in instream:
+    fields = line.split()
+    # we need more than the CHR field, and it needs to match a chromosome
+    if len(fields) <= CHR or \
+          (chromosome_prefix is not None and \
+             fields[CHR][:3] != chromosome_prefix):
+      continue
+    start = fields[START]
+    stop = int(start) + len(fields[SEQ])
+    chromosome, extension = fields[CHR].split('.')
+    assert extension == "fa"
+    outstream.write('%s %s %d read 0 %s - - %s%s' % (
+      chromosome,
+      start,
+      stop,
+      sense_map[fields[SENSE]], 
+      sense_color[fields[SENSE]],
+      os.linesep  
+    ))
+
+
+def make_bed_from_multi_eland_stream(
+  instream, 
+  outstream, 
+  name, 
+  description, 
+  chr_prefix='chr', 
+  max_reads=255
+  ):
+  """
+  read a multi eland stream and write a bedfile
+  """
+  write_bed_header(outstream, name, description)
+  parse_multi_eland(instream, outstream, chr_prefix, max_reads)
+
+def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
+
+  loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
+  other_pattern = '(?P<chr>[^:,]+)'
+  split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
+
+  for line in instream:
+    rec = line.split()
+    if len(rec) > 3:
+      # colony_id = rec[0]
+      seq = rec[1]
+      # number of matches for 0, 1, and 2 mismatches
+      # m0, m1, m2 = [int(x) for x in rec[2].split(':')]
+      compressed_reads = rec[3]
+      cur_chr = ""
+      reads = {0: [], 1: [], 2:[]}
+
+      for token in split_re.finditer(compressed_reads):
+        if token.group('chr') is not None:
+          cur_chr =  token.group('chr')[:-3] # strip off .fa
+        elif token.group('fullloc') is not None:
+          matches = int(token.group('count'))
+          # only emit a bed line if 
+          #  our current chromosome starts with chromosome pattern
+          if chr_prefix is None or cur_chr.startswith(chr_prefix):
+            start = int(token.group('start'))
+            stop = start + len(seq)
+            orientation = token.group('dir')
+            strand = sense_map[orientation]
+            color = sense_color[orientation]
+            # build up list of reads for this record
+            reads[matches].append((cur_chr, start, stop, strand, color))
+
+      # report up to our max_read threshold reporting the fewer-mismatch
+      # matches first
+      reported_reads = 0
+      keys = [0,1,2]
+      for mismatch, read_list in ((k, reads[k]) for k in keys): 
+        reported_reads += len(read_list)
+        if reported_reads <= max_reads:
+          for cur_chr, start, stop, strand, color in read_list:
+            reported_reads += 1
+            outstream.write('%s %d %d read 0 %s - - %s%s' % (
+                cur_chr,
+                start,
+                stop,
+                sense_map[orientation],
+                sense_color[orientation],
+                os.linesep
+            ))
+
+def make_description(database, flowcell_id, lane):
+    """
+    compute a bedfile name and description from the fctracker database
+    """
+    from htsworkflow.util.fctracker import fctracker
+
+    fc = fctracker(database)
+    cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
+    if len(cells) != 1:
+      raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
+    lane = int(lane)
+    if lane < 1 or lane > 8:
+      raise RuntimeError("flowcells only have lanes 1-8")
+
+    name = "%s-%s" % (flowcell_id, lane)
+
+    cell_id, cell = cells.items()[0]
+    assert cell_id == flowcell_id
+
+    cell_library_id = cell['lane_%d_library_id' %(lane,)]
+    cell_library = cell['lane_%d_library' %(lane,)]
+    description = "%s-%s" % (cell_library['library_name'], cell_library_id)
+    return name, description
diff --git a/htsworkflow/util/mount.py b/htsworkflow/util/mount.py
new file mode 100644 (file)
index 0000000..75dbe0a
--- /dev/null
@@ -0,0 +1,65 @@
+"""
+Utilities for working with unix-style mounts.
+"""
+import os
+import subprocess
+
+def list_mount_points():
+    """
+    Return list of current mount points
+
+    Note: unix-like OS specific
+    """
+    mount_points = []
+    likely_locations = ['/sbin/mount', '/bin/mount']
+    for mount in likely_locations:
+        if os.path.exists(mount):
+            p = subprocess.Popen(mount, stdout=subprocess.PIPE)
+            p.wait()
+            for l in p.stdout.readlines():
+                rec = l.split()
+                device = rec[0]            
+                mount_point = rec[2]
+                assert rec[1] == 'on'
+                # looking at the output of mount on linux, osx, and 
+                # sunos, the first 3 elements are always the same
+                # devicename on path
+                # everything after that displays the attributes
+                # of the mount points in wildly differing formats
+                mount_points.append(mount_point)
+            return mount_points
+    else:
+        raise RuntimeError("Couldn't find a mount executable")
+
+def is_mounted(point_to_check):
+    """
+    Return true if argument exactly matches a current mount point.
+    """
+    for mount_point in list_mount_points():
+        if point_to_check == mount_point:
+            return True
+    else:
+        return False
+
+def find_mount_point_for(pathname):
+    """
+    Find the deepest mount point pathname is located on
+    """
+    realpath = os.path.realpath(pathname)
+    mount_points = list_mount_points()
+
+    prefixes = set()
+    for current_mount in mount_points:
+        cp = os.path.commonprefix([current_mount, realpath])
+        prefixes.add((len(cp), cp))
+
+    prefixes = list(prefixes)
+    prefixes.sort()
+    if len(prefixes) == 0:
+        return None
+    else:
+        print prefixes
+        # return longest common prefix
+        return prefixes[-1][1]
+
+
diff --git a/htsworkflow/util/opener.py b/htsworkflow/util/opener.py
new file mode 100644 (file)
index 0000000..035bb24
--- /dev/null
@@ -0,0 +1,57 @@
+"""
+Helpful utilities for turning random names/objects into streams.
+"""
+import os
+import gzip
+import bz2
+import types
+import urllib2
+
+def isfilelike(file_ref, mode):
+    """Does file_ref have the core file operations?
+    """
+    # if mode is w/a check to make sure we writeable ops
+    # but always check to see if we can read
+    read_operations = ['read', 'readline', 'readlines']
+    write_operations = [ 'write', 'writelines' ]
+    #random_operations = [ 'seek', 'tell' ]
+    if mode[0] in ('w', 'a'):
+        for o in write_operations:
+            if not hasattr(file_ref, o):
+                return False
+    for o in read_operations:
+        if not hasattr(file_ref, o):
+            return False
+          
+    return True
+
+def isurllike(file_ref, mode):
+    """
+    does file_ref look like a url?
+    (AKA does it start with protocol:// ?)
+    """
+    #what if mode is 'w'?
+    parsed = urllib2.urlparse.urlparse(file_ref)
+    schema, netloc, path, params, query, fragment = parsed
+    
+    return len(schema) > 0
+
+def autoopen(file_ref, mode='r'):
+    """
+    Attempt to intelligently turn file_ref into a readable stream
+    """
+    # catch being passed a file
+    if type(file_ref) is types.FileType:
+        return file_ref
+    # does it look like a file?
+    elif isfilelike(file_ref, mode):
+        return file_ref
+    elif isurllike(file_ref, mode):
+        return urllib2.urlopen(file_ref)
+    elif os.path.splitext(file_ref)[1] == ".gz":
+        return gzip.open(file_ref, mode)
+    elif os.path.splitext(file_ref)[1] == '.bz2':
+        return bz2.BZ2File(file_ref, mode)
+    else:
+        return open(file_ref,mode)
+
diff --git a/htsworkflow/util/queuecommands.py b/htsworkflow/util/queuecommands.py
new file mode 100644 (file)
index 0000000..78728ae
--- /dev/null
@@ -0,0 +1,87 @@
+"""
+Run up to N simultanous jobs from provided of commands 
+"""
+
+import logging
+from subprocess import PIPE
+import subprocess
+import select
+import sys
+import time
+
+class QueueCommands(object):
+    """
+    Queue up N commands from cmd_list, launching more jobs as the first
+    finish.
+    """
+
+    def __init__(self, cmd_list, N=0, cwd=None):
+        """
+        cmd_list is a list of elements suitable for subprocess
+        N is the  number of simultanious processes to run. 
+        0 is all of them.
+        
+        WARNING: this will not work on windows
+        (It depends on being able to pass local file descriptors to the 
+        select call with isn't supported by the Win32 API)
+        """
+        self.to_run = cmd_list[:]
+        self.running = {}
+        self.N = N
+        self.cwd = cwd
+
+    def under_process_limit(self):
+        """
+        are we still under the total number of allowable jobs?
+        """
+        if self.N == 0:
+            return True
+
+        if len(self.running) < self.N:
+            return True
+
+        return False
+
+    def start_jobs(self):
+        """
+        Launch jobs until we have the maximum allowable running
+        (or have run out of jobs)
+        """
+        queue_log = logging.getLogger('queue')
+        queue_log.info('using %s as cwd' % (self.cwd,))
+
+        while (len(self.to_run) > 0) and self.under_process_limit():
+            queue_log.info('%d left to run', len(self.to_run))
+            cmd = self.to_run.pop(0)
+            p = subprocess.Popen(cmd, stdout=PIPE, cwd=self.cwd, shell=True)
+            self.running[p.stdout] = p
+            queue_log.info("Created process %d from %s" % (p.pid, str(cmd)))
+
+    def run(self):
+        """
+        run up to N jobs until we run out of jobs
+        """
+        queue_log = logging.getLogger('queue')
+
+        # to_run slowly gets consumed by start_jobs
+        while len(self.to_run) > 0 or len(self.running) > 0:
+            # fill any empty spots in our job queue
+            self.start_jobs()
+
+            # build a list of file descriptors
+            # fds=file desciptors
+            fds = [ x.stdout for x in self.running.values()]
+
+            # wait for something to finish
+            # wl= write list, xl=exception list (not used so get bad names)
+            read_list, wl, xl = select.select(fds, [], fds)
+        
+            # for everything that might have finished...
+            for pending_fd in read_list:
+                pending = self.running[pending_fd]
+                # if it really did finish, remove it from running jobs
+                if pending.poll() is not None:
+                    queue_log.info("Process %d finished [%d]",
+                                   pending.pid, pending.returncode)
+                    del self.running[pending_fd]
+            time.sleep(1)
diff --git a/htsworkflow/util/test/test_ethelp.py b/htsworkflow/util/test/test_ethelp.py
new file mode 100644 (file)
index 0000000..63f0ac3
--- /dev/null
@@ -0,0 +1,35 @@
+import os
+import unittest
+
+try:
+  from xml.etree import ElementTree
+except ImportError, e:
+  from elementtree import ElementTree
+
+from htsworkflow.util.ethelp import indent, flatten
+
+class testETHelper(unittest.TestCase):
+    def setUp(self):
+        self.foo = '<foo><bar>asdf</bar><br/></foo>'
+        self.foo_tree = ElementTree.fromstring(self.foo)
+
+    def test_indent(self):
+        flat_foo = ElementTree.tostring(self.foo_tree)
+        self.failUnlessEqual(len(flat_foo.split('\n')), 1)
+
+        indent(self.foo_tree)
+        pretty_foo = ElementTree.tostring(self.foo_tree)
+        self.failUnlessEqual(len(pretty_foo.split('\n')), 5)
+
+    def test_flatten(self):
+        self.failUnless(flatten(self.foo_tree), 'asdf')
+
+def suite():
+    return unittest.makeSuite(testETHelper, 'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest='suite')
+
+
+
+
diff --git a/htsworkflow/util/test/test_makebed.py b/htsworkflow/util/test/test_makebed.py
new file mode 100644 (file)
index 0000000..03c7919
--- /dev/null
@@ -0,0 +1,51 @@
+import os
+from StringIO import StringIO
+import unittest
+
+from htsworkflow.util import makebed
+
+class testMakeBed(unittest.TestCase):
+    def test_multi_1_0_0_limit_1(self):
+      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
+      out = StringIO()
+
+      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+
+    def test_multi_1_0_0_limit_255(self):
+      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
+      out = StringIO()
+
+      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
+      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+
+
+    def test_multi_2_0_0_limit_1(self):
+      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
+      out = StringIO()
+
+      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+      self.failUnlessEqual(out.len, 0)
+
+    def test_multi_2_0_0_limit_255(self):
+      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
+      out = StringIO()
+
+      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
+      self.failUnlessEqual(out.len, 98)
+
+    def test_multi_0_2_0_limit_1(self):
+      instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495    TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG        0:2:1   mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
+      out = StringIO()
+
+      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+      print out.getvalue()
+      self.failUnlessEqual(out.len, 0)
+
+def suite():
+    return unittest.makeSuite(testMakeBed, 'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest='suite')
+
+
diff --git a/htsworkflow/util/test/test_queuecommands.py b/htsworkflow/util/test/test_queuecommands.py
new file mode 100644 (file)
index 0000000..424da88
--- /dev/null
@@ -0,0 +1,58 @@
+import os
+import logging
+import time
+import unittest
+
+
+from htsworkflow.util.queuecommands import QueueCommands
+
+class testQueueCommands(unittest.TestCase):
+    def setUp(self):
+        logging.basicConfig(level=logging.DEBUG,
+                            format='%(asctime)s %(name)-8s %(message)s')
+
+       
+
+    def test_unlimited_run(self):
+        """
+        Run everything at once
+        """
+        cmds = ['/bin/sleep 0',
+                '/bin/sleep 1',
+                '/bin/sleep 2',]
+
+        q = QueueCommands(cmds)
+        start = time.time()
+        q.run()
+        end = time.time()-start
+        # we should only take the length of the longest sleep
+        # pity I had to add a 1 second sleep
+        self.failUnless( end > 2.9 and end < 3.1,
+                         "took %s seconds, exected ~3" % (end,))
+
+    def test_limited_run(self):
+        """
+        Run a limited number of jobs
+        """
+        cmds = ['/bin/sleep 1',
+                '/bin/sleep 2',
+                '/bin/sleep 3',]
+
+        q = QueueCommands(cmds, 2)
+
+        start = time.time()
+        q.run()
+        end = time.time()-start
+        # pity I had to add a 1 second sleep
+        self.failUnless( end > 5.9 and end < 6.1,
+                         "took %s seconds, expected ~6" % (end,)) 
+
+def suite():
+    return unittest.makeSuite(testQueueCommands, 'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest='suite')
+
+
+
+
index 6c94b914db834f1dcb5eb2d034591ed4ed19cb74..4b2e3a05e8c4ba89f6fc03919c831ba42f63eed5 100644 (file)
@@ -2,9 +2,9 @@
 import os
 import sys
 import re
-from gaworkflow.pipeline.configure_run import *
-from gaworkflow.pipeline import retrieve_config as _rc
-from gaworkflow.pipeline.run_status import startCmdLineStatusMonitor
+from htsworkflow.pipeline.configure_run import *
+from htsworkflow.pipeline import retrieve_config as _rc
+from htsworkflow.pipeline.run_status import startCmdLineStatusMonitor
 
 logging.basicConfig(level=logging.DEBUG,
                     format='%(asctime)s %(levelname)-8s %(message)s',
index 7025d57a72ceb85ba1b2b0a040abe34716056d91..9338b077fd53d549cddcecde549cb07d15de577f 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import sys
-from gaworkflow.automation.copier import main
+from htsworkflow.automation.copier import main
 
 if __name__ == "__main__":
   sys.exit(main(sys.argv[1:]))
index ece4bf3acbd8b71f62a3135bff602b48c888b2eb..787fd98c7246ab8bd0b3f34057b51907d99f6955 100755 (executable)
@@ -3,7 +3,7 @@ import optparse
 import os
 import sys
 
-from gaworkflow.pipeline.gerald import extract_eland_sequence
+from htsworkflow.pipeline.gerald import extract_eland_sequence
 
 def make_parser():
   usage = "usage: %prog [options] infile [outfile]"
index c49cc9c823d8483828e679e7b4aa8c54d0f02d99..7a726e72fe327cbc1777f3dc34ed1927a1c7c81e 100644 (file)
@@ -8,7 +8,7 @@ import optparse
 import sys
 import os
 
-from gaworkflow.util.makebed import make_bed_from_eland_stream, make_description
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
 
 def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
     """
index eeeed90e3f47e9f7b2fedafcfaef20a0a5d4f77d..35532f41903a678f51b9d456ba65d1069c10f994 100644 (file)
@@ -7,7 +7,7 @@ installed, so it can run on machines other than the webserver.
 from optparse import OptionParser
 import sys
 
-from gaworkflow.util import fctracker
+from htsworkflow.util import fctracker
 
 def make_parser():
     """
index a4a414b27d57608d89cc5dca4b9b93f629c2fb62..4878697b603970d0ab950d273493041884f82e7a 100755 (executable)
@@ -3,7 +3,7 @@ import optparse
 import sys
 import os
 
-from gaworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
 
 def make_parser():
   parser = optparse.OptionParser()
index c271970b3a4b5790cd2bdbb6bfdefb83e6fca743..7eb30bbf12b119f16354d6561a69da2a2354a1b8 100644 (file)
@@ -6,8 +6,8 @@ import os
 import subprocess
 import sys
 
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
 
 def make_query_filename(eland_obj, output_dir):
     query_name = '%s_%s_eland_query.txt' 
index b0ddd46de7ea721a005a47507ec5cfaf76a156e9..cdd2c5b9c46df2c461681139c8974da1b6937778 100644 (file)
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 import sys
-from gaworkflow.pipeline.retrieve_config import *
-from gaworkflow.pipeline import retrieve_config
-from gaworkflow.pipeline.genome_mapper import getAvailableGenomes
-from gaworkflow.pipeline.genome_mapper import constructMapperDict
+from htsworkflow.pipeline.retrieve_config import *
+from htsworkflow.pipeline import retrieve_config
+from htsworkflow.pipeline.genome_mapper import getAvailableGenomes
+from htsworkflow.pipeline.genome_mapper import constructMapperDict
 
 #Turn on built-in command-line parsing.
 retrieve_config.DISABLE_CMDLINE = False
index d54bf60f3ce9734fcfe5644b6b9e1201ca18e344..8b4444a2c7808d8a0a5dbff7eddca6376acf41be 100644 (file)
@@ -31,8 +31,8 @@ import logging
 import optparse
 import sys
 
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
         
 def make_parser():
     usage = 'usage: %prog [options] runfolder_root_dir'
index 6b6da11e1a85b5e55b89cd965471912ff694e05f..560299f3b7db31ca70b92a0089363408ae63b0f6 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import sys
-from gaworkflow.automation.runner import main
+from htsworkflow.automation.runner import main
 
 if __name__ == "__main__":
   sys.exit(main(sys.argv[1:]))
index 269292e606b36c2706ac33cdd867d15cb36f8ce9..b2f833ee45d7fd9802c8b8f72c30d719676002f9 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import sys
-from gaworkflow.automation.spoolwatcher import main
+from htsworkflow.automation.spoolwatcher import main
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
index f8617eaf3dda492073c3c6cb42013a053afca54a..0f11514c5cf00a607f82202a05405545508cdba3 100644 (file)
@@ -7,7 +7,7 @@ import os
 import subprocess
 import sys
 
-from gaworkflow.util import queuecommands
+from htsworkflow.util import queuecommands
 
 def make_commands(run_name, lanes, site_name, destdir):
   """
index 2229ed6b023ede2c73a331889cb79c8263311e12..0cc5fb50980a25db286f5a6e5858da00ad334679 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -1,15 +1,15 @@
 from setuptools import setup
 
 setup(
-  name="gaworkflow",
+  name="htsworkflow",
   description="some bots and other utilities to help deal with data from an illumina sequencer",
   author="Diane Trout & Brandon King",
   author_email="diane@caltech.edu",
-  packages=["gaworkflow", 
-            "gaworkflow.pipeline",
-            "gaworkflow.frontend",
-            "gaworkflow.frontend.fctracker",
-            "gaworkflow.frontend.eland_config"           
+  packages=["htsworkflow", 
+            "htsworkflow.pipeline",
+            "htsworkflow.frontend",
+            "htsworkflow.frontend.fctracker",
+            "htsworkflow.frontend.eland_config"           
              ],
   scripts=[
         'scripts/configure_pipeline',
index 910f3ea6c16cfe4d974d4177e441dc0cb262d4fb..9a1b73884af6bee909e2805eb5fb770ada0b1631 100644 (file)
@@ -1,7 +1,7 @@
 import unittest
 
 from StringIO import StringIO
-from gaworkflow.automation import copier
+from htsworkflow.automation import copier
 
 class testCopier(unittest.TestCase):
     def test_runfolder_validate(self):