+++ /dev/null
-import ConfigParser
-import copy
-import logging
-import logging.handlers
-import os
-import re
-import subprocess
-import sys
-import time
-import traceback
-
-from benderjab import rpc
-
-def runfolder_validate(fname):
- """
- Return True if fname looks like a runfolder name
- """
- if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
- return True
- else:
- return False
-
-class rsync(object):
- def __init__(self, source, dest, pwfile):
- self.pwfile = os.path.expanduser(pwfile)
- self.cmd = ['/usr/bin/rsync', ]
- self.cmd.append('--password-file=%s' % (self.pwfile))
- self.source_base = source
- self.dest_base = dest
- self.processes = {}
- self.exit_code = None
-
- def list(self):
- """Get a directory listing"""
- args = copy.copy(self.cmd)
- args.append(self.source_base)
-
- logging.debug("Rsync cmd:" + " ".join(args))
- short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
- return self.list_filter(short_process.stdout)
-
- def list_filter(self, lines):
- """
- parse rsync directory listing
- """
- dirs_to_copy = []
- direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
- for permissions, size, filedate, filetime, filename in direntries:
- if permissions[0] == 'd':
- # hey its a directory, the first step to being something we want to
- # copy
- if re.match("[0-9]{6}", filename):
- # it starts with something that looks like a 6 digit date
- # aka good enough for me
- dirs_to_copy.append(filename)
- return dirs_to_copy
-
- def create_copy_process(self, dirname):
- args = copy.copy(self.cmd)
- # we want to copy everything
- args.append('-rlt')
- # from here
- args.append(os.path.join(self.source_base, dirname))
- # to here
- args.append(self.dest_base)
- logging.debug("Rsync cmd:" + " ".join(args))
- return subprocess.Popen(args)
-
- def copy(self):
- """
- copy any interesting looking directories over
- return list of items that we started copying.
- """
- # clean up any lingering non-running processes
- self.poll()
-
- # what's available to copy?
- dirs_to_copy = self.list()
-
- # lets start copying
- started = []
- for d in dirs_to_copy:
- process = self.processes.get(d, None)
-
- if process is None:
- # we don't have a process, so make one
- logging.info("rsyncing %s" % (d))
- self.processes[d] = self.create_copy_process(d)
- started.append(d)
- return started
-
- def poll(self):
- """
- check currently running processes to see if they're done
-
- return path roots that have finished.
- """
- for dir_key, proc_value in self.processes.items():
- retcode = proc_value.poll()
- if retcode is None:
- # process hasn't finished yet
- pass
- elif retcode == 0:
- logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
- del self.processes[dir_key]
- else:
- logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
-
- def __len__(self):
- """
- Return how many active rsync processes we currently have
-
- Call poll first to close finished processes.
- """
- return len(self.processes)
-
- def keys(self):
- """
- Return list of current run folder names
- """
- return self.processes.keys()
-
-class CopierBot(rpc.XmlRpcBot):
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # configfile = '~/.gaworkflow'
-
- super(CopierBot, self).__init__(section, configfile)
-
- # options for rsync command
- self.cfg['rsync_password_file'] = None
- self.cfg['rsync_source'] = None
- self.cfg['rsync_destination'] = None
-
- # options for reporting we're done
- self.cfg['notify_users'] = None
- self.cfg['notify_runner'] = None
-
- self.pending = []
- self.rsync = None
- self.notify_users = None
- self.notify_runner = None
-
- self.register_function(self.startCopy)
- self.register_function(self.sequencingFinished)
- self.eventTasks.append(self.update)
-
- def read_config(self, section=None, configfile=None):
- """
- read the config file
- """
- super(CopierBot, self).read_config(section, configfile)
-
- password = self._check_required_option('rsync_password_file')
- source = self._check_required_option('rsync_source')
- destination = self._check_required_option('rsync_destination')
- self.rsync = rsync(source, destination, password)
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- try:
- self.notify_runner = \
- self._parse_user_list(self.cfg['notify_runner'],
- require_resource=True)
- except bot.JIDMissingResource:
- msg = 'need a full jabber ID + resource for xml-rpc destinations'
- logging.FATAL(msg)
- raise bot.JIDMissingResource(msg)
-
- def startCopy(self, *args):
- """
- start our copy
- """
- logging.info("starting copy scan")
- started = self.rsync.copy()
- logging.info("copying:" + " ".join(started)+".")
- return started
-
- def sequencingFinished(self, runDir, *args):
- """
- The run was finished, if we're done copying, pass the message on
- """
- # close any open processes
- self.rsync.poll()
-
- # see if we're still copying
- if runfolder_validate(runDir):
- logging.info("recevied sequencing finshed for %s" % (runDir))
- self.pending.append(runDir)
- self.startCopy()
- return "PENDING"
- else:
- errmsg = "received bad runfolder name (%s)" % (runDir)
- logging.warning(errmsg)
- # maybe I should use a different error message
- raise RuntimeError(errmsg)
-
- def reportSequencingFinished(self, runDir):
- """
- Send the sequencingFinished message to the interested parties
- """
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Sequencing run %s finished' % (runDir))
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, (runDir,), 'sequencingFinished')
- logging.info("forwarding sequencingFinshed message for %s" % (runDir))
-
- def update(self, *args):
- """
- Update our current status.
- Report if we've finished copying files.
- """
- self.rsync.poll()
- for p in self.pending:
- if p not in self.rsync.keys():
- self.reportSequencingFinished(p)
- self.pending.remove(p)
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can [copy], or report current [status]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("copy", msg):
- started = self.startCopy()
- reply = u"started copying " + ", ".join(started)
- elif re.match(u"status", msg):
- msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
- for d in self.rsync.keys():
- msg.append(u" " + d)
- reply = os.linesep.join(msg)
- else:
- reply = u"I didn't understand '%s'" % (unicode(msg))
- return reply
-
-def main(args=None):
- bot = CopierBot()
- bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-from glob import glob
-import logging
-import os
-import re
-import sys
-import time
-import threading
-
-from benderjab import rpc
-
-from gaworkflow.pipeline.configure_run import *
-
-#s_fc = re.compile('FC[0-9]+')
-s_fc = re.compile('_[0-9a-zA-Z]*$')
-
-
-def _get_flowcell_from_rundir(run_dir):
- """
- Returns flowcell string based on run_dir.
- Returns None and logs error if flowcell can't be found.
- """
- junk, dirname = os.path.split(run_dir)
- mo = s_fc.search(dirname)
- if not mo:
- logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
- return None
-
- return dirname[mo.start()+1:]
-
-
-
-class Runner(rpc.XmlRpcBot):
- """
- Manage running pipeline jobs.
- """
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # self.configfile = "~/.gaworkflow"
- super(Runner, self).__init__(section, configfile)
-
- self.cfg['notify_users'] = None
- self.cfg['genome_dir'] = None
- self.cfg['base_analysis_dir'] = None
-
- self.cfg['notify_users'] = None
- self.cfg['notify_postanalysis'] = None
-
- self.conf_info_dict = {}
-
- self.register_function(self.sequencingFinished)
- #self.eventTasks.append(self.update)
-
-
- def read_config(self, section=None, configfile=None):
- super(Runner, self).read_config(section, configfile)
-
- self.genome_dir = self._check_required_option('genome_dir')
- self.base_analysis_dir = self._check_required_option('base_analysis_dir')
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- #FIXME: process notify_postpipeline cfg
-
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can send [start] a run, or report [status]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("status", msg):
- words = msg.split()
- if len(words) == 2:
- reply = self.getStatusReport(words[1])
- else:
- reply = u"Status available for: %s" \
- % (', '.join([k for k in self.conf_info_dict.keys()]))
- elif re.match(u"start", msg):
- words = msg.split()
- if len(words) == 2:
- self.sequencingFinished(words[1])
- reply = u"starting run for %s" % (words[1])
- else:
- reply = u"need runfolder name"
- else:
- reply = u"I didn't understand '%s'" %(msg)
-
- logging.debug("reply: " + str(reply))
- return reply
-
-
- def getStatusReport(self, fc_num):
- """
- Returns text status report for flow cell number
- """
- if fc_num not in self.conf_info_dict:
- return "No record of a %s run." % (fc_num)
-
- status = self.conf_info_dict[fc_num].status
-
- if status is None:
- return "No status information for %s yet." \
- " Probably still in configure step. Try again later." % (fc_num)
-
- output = status.statusReport()
-
- return '\n'.join(output)
-
-
- def sequencingFinished(self, run_dir):
- """
- Sequenceing (and copying) is finished, time to start pipeline
- """
- logging.debug("received sequencing finished message")
-
- # Setup config info object
- ci = ConfigInfo()
- ci.base_analysis_dir = self.base_analysis_dir
- ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)
-
- # get flowcell from run_dir name
- flowcell = _get_flowcell_from_rundir(run_dir)
-
- # Store ci object in dictionary
- self.conf_info_dict[flowcell] = ci
-
-
- # Launch the job in it's own thread and turn.
- self.launchJob(run_dir, flowcell, ci)
- return "started"
-
-
- def pipelineFinished(self, run_dir):
- # need to strip off self.watch_dir from rundir I suspect.
- logging.info("pipeline finished in" + str(run_dir))
- #pattern = self.watch_dir
- #if pattern[-1] != os.path.sep:
- # pattern += os.path.sep
- #stripped_run_dir = re.sub(pattern, "", run_dir)
- #logging.debug("stripped to " + stripped_run_dir)
-
- # Notify each user that the run has finished.
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Pipeline run %s finished' % (run_dir))
-
- #if self.notify_runner is not None:
- # for r in self.notify_runner:
- # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
- def reportMsg(self, msg):
-
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, msg)
-
-
- def _runner(self, run_dir, flowcell, conf_info):
-
- # retrieve config step
- cfg_filepath = os.path.join(conf_info.analysis_dir,
- 'config32auto.txt')
- status_retrieve_cfg = retrieve_config(conf_info,
- flowcell,
- cfg_filepath,
- self.genome_dir)
- if status_retrieve_cfg:
- logging.info("Runner: Retrieve config: success")
- self.reportMsg("Retrieve config (%s): success" % (run_dir))
- else:
- logging.error("Runner: Retrieve config: failed")
- self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
-
-
- # configure step
- if status_retrieve_cfg:
- status = configure(conf_info)
- if status:
- logging.info("Runner: Configure: success")
- self.reportMsg("Configure (%s): success" % (run_dir))
- self.reportMsg(
- os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
- )
- else:
- logging.error("Runner: Configure: failed")
- self.reportMsg("Configure (%s): FAILED" % (run_dir))
-
- #if successful, continue
- if status:
- # Setup status cmdline status monitor
- #startCmdLineStatusMonitor(ci)
-
- # running step
- print 'Running pipeline now!'
- run_status = run_pipeline(conf_info)
- if run_status is True:
- logging.info('Runner: Pipeline: success')
- self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
- else:
- logging.info('Runner: Pipeline: failed')
- self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
-
-
- def launchJob(self, run_dir, flowcell, conf_info):
- """
- Starts up a thread for running the pipeline
- """
- t = threading.Thread(target=self._runner,
- args=[run_dir, flowcell, conf_info])
- t.setDaemon(True)
- t.start()
-
-
-
-def main(args=None):
- bot = Runner()
- return bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-import logging
-import os
-import re
-import sys
-import time
-#import glob
-
-from gaworkflow.util import mount
-
-# this uses pyinotify
-import pyinotify
-from pyinotify import EventsCodes
-
-from benderjab import rpc
-
-
-class WatcherEvents(object):
- # two events need to be tracked
- # one to send startCopy
- # one to send OMG its broken
- # OMG its broken needs to stop when we've seen enough
- # cycles
- # this should be per runfolder.
- # read the xml files
- def __init__(self):
- pass
-
-
-class Handler(pyinotify.ProcessEvent):
- def __init__(self, watchmanager, bot):
- self.last_event_time = None
- self.watchmanager = watchmanager
- self.bot = bot
-
- def process_IN_CREATE(self, event):
- self.last_event_time = time.time()
- msg = "Create: %s" % os.path.join(event.path, event.name)
- if event.name.lower() == "run.completed":
- try:
- self.bot.sequencingFinished(event.path)
- except IOError, e:
- logging.error("Couldn't send sequencingFinished")
- logging.debug(msg)
-
- def process_IN_DELETE(self, event):
- logging.debug("Remove: %s" % os.path.join(event.path, event.name))
-
- def process_IN_UNMOUNT(self, event):
- pathname = os.path.join(event.path, event.name)
- logging.debug("IN_UNMOUNT: %s" % (pathname,))
- self.bot.unmount_watch()
-
-class SpoolWatcher(rpc.XmlRpcBot):
- """
- Watch a directory and send a message when another process is done writing.
-
- This monitors a directory tree using inotify (linux specific) and
- after some files having been written will send a message after <timeout>
- seconds of no file writing.
-
- (Basically when the solexa machine finishes dumping a round of data
- this'll hopefully send out a message saying hey look theres data available
-
- """
- # these params need to be in the config file
- # I wonder where I should put the documentation
- #:Parameters:
- # `watchdir` - which directory tree to monitor for modifications
- # `profile` - specify which .gaworkflow profile to use
- # `write_timeout` - how many seconds to wait for writes to finish to
- # the spool
- # `notify_timeout` - how often to timeout from notify
-
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # self.configfile = "~/.gaworkflow"
- super(SpoolWatcher, self).__init__(section, configfile)
-
- self.cfg['watchdir'] = None
- self.cfg['write_timeout'] = 10
- self.cfg['notify_users'] = None
- self.cfg['notify_runner'] = None
-
- self.notify_timeout = 0.001
- self.wm = pyinotify.WatchManager()
- self.handler = Handler(self.wm, self)
- self.notifier = pyinotify.Notifier(self.wm, self.handler)
- self.wdd = None
- self.mount_point = None
- self.mounted = True
-
- self.notify_users = None
- self.notify_runner = None
-
- self.eventTasks.append(self.process_notify)
-
- def read_config(self, section=None, configfile=None):
- super(SpoolWatcher, self).read_config(section, configfile)
-
- self.watch_dir = self._check_required_option('watchdir')
- self.write_timeout = int(self.cfg['write_timeout'])
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- try:
- self.notify_runner = \
- self._parse_user_list(self.cfg['notify_runner'],
- require_resource=True)
- except bot.JIDMissingResource:
- msg = 'need a full jabber ID + resource for xml-rpc destinations'
- logging.FATAL(msg)
- raise bot.JIDMissingResource(msg)
-
- def add_watch(self, watchdir=None):
- """
- start watching watchdir or self.watch_dir
- we're currently limited to watching one directory tree.
- """
- # the one tree limit is mostly because self.wdd is a single item
- # but managing it as a list might be a bit more annoying
- if watchdir is None:
- watchdir = self.watch_dir
- logging.info("Watching:"+str(watchdir))
-
- self.mount_point = mount.find_mount_point_for(watchdir)
-
- mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
- # rec traverses the tree and adds all the directories that are there
- # at the start.
- # auto_add will add in new directories as they are created
- self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
-
- def unmount_watch(self):
- if self.wdd is not None:
- self.wm.rm_watch(self.wdd.values())
- self.wdd = None
- self.mounted = False
-
- def process_notify(self, *args):
- # process the queue of events as explained above
- self.notifier.process_events()
- #check events waits timeout
- if self.notifier.check_events(self.notify_timeout):
- # read notified events and enqeue them
- self.notifier.read_events()
- # should we do something?
- # has something happened?
- last_event_time = self.handler.last_event_time
- if last_event_time is not None:
- time_delta = time.time() - last_event_time
- if time_delta > self.write_timeout:
- self.startCopy()
- self.handler.last_event_time = None
- # handle unmounted filesystems
- if not self.mounted:
- if mount.is_mounted(self.mount_point):
- # we've been remounted. Huzzah!
- # restart the watch
- self.add_watch()
- self.mounted = True
- logging.info(
- "%s was remounted, restarting watch" % \
- (self.mount_point)
- )
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can send [copy] message, or squencer [finished]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("copy", msg):
- self.startCopy()
- reply = u"sent copy message"
- elif re.match(u"finished", msg):
- words = msg.split()
- if len(words) == 2:
- self.sequencingFinished(words[1])
- reply = u"sending sequencing finished for %s" % (words[1])
- else:
- reply = u"need runfolder name"
- else:
- reply = u"I didn't understand '%s'" %(msg)
- return reply
-
- def start(self, daemonize):
- """
- Start application
- """
- self.add_watch()
- super(SpoolWatcher, self).start(daemonize)
-
- def stop(self):
- """
- shutdown application
- """
- # destroy the inotify's instance on this interrupt (stop monitoring)
- self.notifier.stop()
- super(SpoolWatcher, self).stop()
-
- def startCopy(self):
- logging.debug("writes seem to have stopped")
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, tuple(), 'startCopy')
-
- def sequencingFinished(self, run_dir):
- # need to strip off self.watch_dir from rundir I suspect.
- logging.info("run.completed in " + str(run_dir))
- pattern = self.watch_dir
- if pattern[-1] != os.path.sep:
- pattern += os.path.sep
- stripped_run_dir = re.sub(pattern, "", run_dir)
- logging.debug("stripped to " + stripped_run_dir)
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
-def main(args=None):
- bot = SpoolWatcher()
- return bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-from django import newforms as forms
-from django.newforms.util import ErrorList
-
-
-SPECIES_LIST = [#('--choose--', '--Choose--'),
- ('hg18', 'Homo sapiens (Hg18)'),
- ('Mm8', 'Mus musculus (Mm8)'),
- ('arabv6', 'Arabadopsis Thaliana v6'),
- ('other', 'Other species (Include in description)')]
-
-
-class DivErrorList(ErrorList):
- def __unicode__(self):
- return self.as_divs()
-
- def as_divs(self):
- if not self: return u''
- return u'<div class="errorlist">%s</div>' % (''.join([u'<div class="error">%s</div>' % e for e in self]))
-
-
-
-class ConfigForm(forms.Form):
-
- flow_cell_number = forms.CharField(min_length=2)
- run_date = forms.DateTimeField()
- advanced_run = forms.BooleanField(required=False)
- read_length = forms.IntegerField(min_value=1, initial=32)
- #eland_repeat = forms.BooleanField()
-
- #needs a for loop or something to allow for n configurations
- #analysis_type = forms.ChoiceField(choices=[('eland','eland')])
- lane1_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane1_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane2_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane2_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane3_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane3_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane4_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane4_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane5_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane5_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane6_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane6_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane7_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane7_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- lane8_species = forms.ChoiceField(choices=SPECIES_LIST)
- lane8_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
-
- notes = forms.CharField(widget=forms.Textarea(attrs={'cols':'70'}), required=False)
-
- #lane_specific_read_length = forms.IntegerField(min_value=1)
-
- #eland_genome_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
- # ('lane2','2'),
- # ('lane3','3'),
- # ('lane4','4'),
- # ('lane5','5'),
- # ('lane6','6'),
- # ('lane7','7'),
- # ('lane8','8') ])
-
- #eland_genome = forms.ChoiceField(choices=)
-
- #use_bases_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
- # ('lane2','2'),
- # ('lane3','3'),
- # ('lane4','4'),
- # ('lane5','5'),
- # ('lane6','6'),
- # ('lane7','7'),
- # ('lane8','8') ])
-
- #use_bases_mask = forms.CharField()
-
- #sequence_format = forms.ChoiceField(choices=[('scarf', 'scarf')])
-
-
-
- #subject = forms.CharField(max_length=100)
- #message = forms.CharField()
- #sender = forms.EmailField()
- #cc_myself = forms.BooleanField()
-
- def as_custom(self):
- """
- Displays customized html output
- """
- html = []
-
- fcn = self['flow_cell_number']
-
- html.append(fcn.label_tag() + ': ' + str(fcn) + str(fcn.errors) + '<br />')
-
- run_date = self['run_date']
- html.append(run_date.label_tag() + ': ' + str(run_date) + str(run_date.errors) + '<br />')
-
- arun = self['advanced_run']
- html.append(arun.label_tag() + ': ' + str(arun) + str(arun.errors) + '<br />')
-
- rl = self['read_length']
- html.append(rl.label_tag() + ': ' + str(rl) + str(rl.errors) + '<br /><br />')
-
- html.append('<table border="0">')
- html.append(' <tr><td>%s</td><td>%s</td><td>%s</td></tr>' \
- % ('Lane', 'Species', 'Description'))
-
- l1s = self['lane1_species']
- l1d = self['lane1_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('1', str(l1s), str(l1s.errors), str(l1d), str(l1d.errors)))
-
- l2s = self['lane2_species']
- l2d = self['lane2_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('2', str(l2s), str(l2s.errors), str(l2d), str(l2d.errors)))
-
- l3s = self['lane3_species']
- l3d = self['lane3_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('3', str(l3s), str(l3s.errors), str(l3d), str(l3d.errors)))
-
- l4s = self['lane4_species']
- l4d = self['lane4_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('4', str(l4s), str(l4s.errors), str(l4d), str(l4d.errors)))
-
- l5s = self['lane5_species']
- l5d = self['lane5_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('5', str(l5s), str(l5s.errors), str(l5d), str(l5d.errors)))
-
- l6s = self['lane6_species']
- l6d = self['lane6_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('6', str(l6s), str(l6s.errors), str(l6d), str(l6d.errors)))
-
- l7s = self['lane7_species']
- l7d = self['lane7_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('7', str(l7s), str(l7s.errors), str(l7d), str(l7d.errors)))
-
- l8s = self['lane8_species']
- l8d = self['lane8_description']
- html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
- % ('8', str(l8s), str(l8s.errors), str(l8d), str(l8d.errors)))
-
- html.append('</table><br />')
-
- notes = self['notes']
- html.append('<p>Notes:</p>')
- html.append(' %s<br />' % (str(notes)))
-
- return '\n'.join(html)
-
-
-
\ No newline at end of file
+++ /dev/null
-from django.db import models
-
-# Create your models here.
+++ /dev/null
-from django.conf.urls.defaults import *
-
-urlpatterns = patterns('',
- # Example:
-
- (r'^(?P<flowcell>\w+)/$', 'gaworkflow.frontend.eland_config.views.config'),
- (r'^$', 'gaworkflow.frontend.eland_config.views.config'),
- #(r'^$', 'gaworkflow.frontend.eland_config.views.index')
-
-)
+++ /dev/null
-from django.http import HttpResponse
-from django.shortcuts import render_to_response
-from django.core.exceptions import ObjectDoesNotExist
-
-from gaworkflow.frontend.eland_config import forms
-from gaworkflow.frontend import settings
-from gaworkflow.frontend.fctracker import models
-
-import os
-import glob
-# Create your views here.
-
-
-def _validate_input(data):
- #if data.find('..') == -1 or data.find('/') == -1 or data.find('\\') == -1:
- return data.replace('..', '').replace('/', '_').replace('\\', '_')
-
-#def contact(request):
-# if request.method == 'POST':
-# form = ContactForm(request.POST)
-# if form.is_valid():
-# # Do form processing here...
-# return HttpResponseRedirect('/url/on_success/')
-# else:
-# form = ContactForm()
-# return
-
-
-
-#def _saveConfigFile(form):
-# """
-# Given a valid form, save eland config to file based on flowcell number.
-# """
-# assert form.is_valid()
-#
-# clean_data = form.cleaned_data
-# flowcell = clean_data['flow_cell_number'].replace('/','_').replace('..', '__')
-#
-# file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
-#
-# f = open(file_path, 'w')
-# cfg = generateElandConfig(form)
-# f.write(cfg)
-# f.close()
-#
-#
-#def _saveToDb(form):
-# """
-# Save info to the database.
-# """
-# clean_data = form.cleaned_data
-#
-# fc_id = clean_data['flow_cell_number']
-#
-# try:
-# fc = models.FlowCell.objects.get(flowcell_id=fc_id)
-# except models.FlowCell.DoesNotExist:
-# fc = models.FlowCell()
-#
-# fc.flowcell_id = fc_id
-# fc.run_date = clean_data['run_date']
-#
-# #LANE 1
-# fc.lane1_sample = clean_data['lane1_description']
-# species_name = clean_data['lane1_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane1_species = specie
-#
-# #LANE 2
-# fc.lane2_sample = clean_data['lane2_description']
-# species_name = clean_data['lane2_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane2_species = specie
-#
-# #LANE 3
-# fc.lane3_sample = clean_data['lane3_description']
-# species_name = clean_data['lane3_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane3_species = specie
-#
-# #LANE 4
-# fc.lane4_sample = clean_data['lane4_description']
-# species_name = clean_data['lane4_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane4_species = specie
-#
-# #LANE 5
-# fc.lane5_sample = clean_data['lane5_description']
-# species_name = clean_data['lane5_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane5_species = specie
-#
-# #LANE 6
-# fc.lane6_sample = clean_data['lane6_description']
-# species_name = clean_data['lane6_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane6_species = specie
-#
-# #LANE 7
-# fc.lane7_sample = clean_data['lane7_description']
-# species_name = clean_data['lane7_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane7_species = specie
-#
-# #LANE 8
-# fc.lane8_sample = clean_data['lane8_description']
-# species_name = clean_data['lane8_species']
-# try:
-# specie = models.Specie.objects.get(scientific_name=species_name)
-# except models.Specie.DoesNotExist:
-# specie = models.Specie(scientific_name=species_name)
-# specie.save()
-# fc.lane8_species = specie
-#
-# fc.notes = clean_data['notes']
-#
-# fc.save()
-#
-# return fc
-#
-#
-#def generateElandConfig(form):
-# data = []
-#
-# form = form.cleaned_data
-#
-# BASE_DIR = '/data-store01/compbio/genomes'
-#
-# data.append("# FLOWCELL: %s" % (form['flow_cell_number']))
-# data.append("#")
-#
-# notes = form['notes'].replace('\r\n', '\n').replace('\r', '\n')
-# notes = notes.replace('\n', '\n# ')
-# data.append("# NOTES:")
-# data.append("# %s\n#" % (notes))
-#
-# #Convert all newline conventions to unix style
-# l1d = form['lane1_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l2d = form['lane2_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l3d = form['lane3_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l4d = form['lane4_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l5d = form['lane5_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l6d = form['lane6_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l7d = form['lane7_description'].replace('\r\n', '\n').replace('\r', '\n')
-# l8d = form['lane8_description'].replace('\r\n', '\n').replace('\r', '\n')
-#
-# # Turn new lines into indented commented newlines
-# l1d = l1d.replace('\n', '\n# ')
-# l2d = l2d.replace('\n', '\n# ')
-# l3d = l3d.replace('\n', '\n# ')
-# l4d = l4d.replace('\n', '\n# ')
-# l5d = l5d.replace('\n', '\n# ')
-# l6d = l6d.replace('\n', '\n# ')
-# l7d = l7d.replace('\n', '\n# ')
-# l8d = l8d.replace('\n', '\n# ')
-#
-# data.append("# Lane1: %s" % (l1d))
-# data.append("# Lane2: %s" % (l2d))
-# data.append("# Lane3: %s" % (l3d))
-# data.append("# Lane4: %s" % (l4d))
-# data.append("# Lane5: %s" % (l5d))
-# data.append("# Lane6: %s" % (l6d))
-# data.append("# Lane7: %s" % (l7d))
-# data.append("# Lane8: %s" % (l8d))
-#
-# #data.append("GENOME_DIR %s" % (BASE_DIR))
-# #data.append("CONTAM_DIR %s" % (BASE_DIR))
-# read_length = form['read_length']
-# data.append("READ_LENGTH %d" % (read_length))
-# #data.append("ELAND_REPEAT")
-# data.append("ELAND_MULTIPLE_INSTANCES 8")
-#
-# #Construct genome dictionary to figure out what lanes to put
-# # in the config file.
-# genome_dict = {}
-# l1s = form['lane1_species']
-# genome_dict.setdefault(l1s, []).append('1')
-# l2s = form['lane2_species']
-# genome_dict.setdefault(l2s, []).append('2')
-# l3s = form['lane3_species']
-# genome_dict.setdefault(l3s, []).append('3')
-# l4s = form['lane4_species']
-# genome_dict.setdefault(l4s, []).append('4')
-# l5s = form['lane5_species']
-# genome_dict.setdefault(l5s, []).append('5')
-# l6s = form['lane6_species']
-# genome_dict.setdefault(l6s, []).append('6')
-# l7s = form['lane7_species']
-# genome_dict.setdefault(l7s, []).append('7')
-# l8s = form['lane8_species']
-# genome_dict.setdefault(l8s, []).append('8')
-#
-# genome_list = genome_dict.keys()
-# genome_list.sort()
-#
-# #Loop through and create entries for each species.
-# for genome in genome_list:
-# lanes = ''.join(genome_dict[genome])
-# data.append('%s:ANALYSIS eland' % (lanes))
-# data.append('%s:READ_LENGTH %s' % (lanes, read_length))
-# data.append('%s:ELAND_GENOME %s' % (lanes, os.path.join(BASE_DIR, genome)))
-# data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
-#
-# data.append('SEQUENCE_FORMAT --scarf')
-#
-# return '\n'.join(data)
-
-
-def getElandConfig(flowcell, regenerate=False):
-
- file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
-
- #If we are regenerating the config file, skip
- # reading of existing file. If the file doesn't
- # exist, try to generate it form the DB.
- if not regenerate and os.path.isfile(file_path):
- f = open(file_path, 'r')
- data = f.read()
- f.close()
- return data
-
- try:
- fcObj = models.FlowCell.objects.get(flowcell_id__iexact=flowcell)
- except ObjectDoesNotExist:
- return None
-
- data = []
-
- #form = form.cleaned_data
-
- BASE_DIR = '/data-store01/compbio/genomes'
-
- data.append("# FLOWCELL: %s" % (fcObj.flowcell_id))
- data.append("#")
-
- notes = fcObj.notes.replace('\r\n', '\n').replace('\r', '\n')
- notes = notes.replace('\n', '\n# ')
- data.append("# NOTES:")
- data.append("# %s\n#" % (notes))
-
- #Convert all newline conventions to unix style
- l1d = str(fcObj.lane_1_library.library_id) + '|' \
- + fcObj.lane_1_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l2d = str(fcObj.lane_2_library.library_id) + '|' \
- + fcObj.lane_2_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l3d = str(fcObj.lane_3_library.library_id) + '|' \
- + fcObj.lane_3_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l4d = str(fcObj.lane_4_library.library_id) + '|' \
- + fcObj.lane_4_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-
- l5d = str(fcObj.lane_5_library.library_id) + '|' \
- + fcObj.lane_5_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l6d = str(fcObj.lane_6_library.library_id) + '|' \
- + fcObj.lane_6_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l7d = str(fcObj.lane_7_library.library_id) + '|' \
- + fcObj.lane_7_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
- l8d = str(fcObj.lane_8_library.library_id) + '|' \
- + fcObj.lane_8_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
-
- # Turn new lines into indented commented newlines
- l1d = l1d.replace('\n', '\n# ')
- l2d = l2d.replace('\n', '\n# ')
- l3d = l3d.replace('\n', '\n# ')
- l4d = l4d.replace('\n', '\n# ')
- l5d = l5d.replace('\n', '\n# ')
- l6d = l6d.replace('\n', '\n# ')
- l7d = l7d.replace('\n', '\n# ')
- l8d = l8d.replace('\n', '\n# ')
-
- data.append("# Lane1: %s" % (l1d))
- data.append("# Lane2: %s" % (l2d))
- data.append("# Lane3: %s" % (l3d))
- data.append("# Lane4: %s" % (l4d))
- data.append("# Lane5: %s" % (l5d))
- data.append("# Lane6: %s" % (l6d))
- data.append("# Lane7: %s" % (l7d))
- data.append("# Lane8: %s" % (l8d))
-
- #data.append("GENOME_DIR %s" % (BASE_DIR))
- #data.append("CONTAM_DIR %s" % (BASE_DIR))
- read_length = fcObj.read_length
- data.append("READ_LENGTH %d" % (read_length))
- #data.append("ELAND_REPEAT")
- data.append("ELAND_MULTIPLE_INSTANCES 8")
-
- #Construct genome dictionary to figure out what lanes to put
- # in the config file.
- genome_dict = {}
-
- #l1s = form['lane1_species']
- l1s = fcObj.lane_1_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_1_library.library_species.use_genome_build
- genome_dict.setdefault(l1s, []).append('1')
- l2s = fcObj.lane_2_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_2_library.library_species.use_genome_build
- genome_dict.setdefault(l2s, []).append('2')
- l3s = fcObj.lane_3_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_3_library.library_species.use_genome_build
- genome_dict.setdefault(l3s, []).append('3')
- l4s = fcObj.lane_4_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_4_library.library_species.use_genome_build
- genome_dict.setdefault(l4s, []).append('4')
- l5s = fcObj.lane_5_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_5_library.library_species.use_genome_build
- genome_dict.setdefault(l5s, []).append('5')
- l6s = fcObj.lane_6_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_6_library.library_species.use_genome_build
- genome_dict.setdefault(l6s, []).append('6')
- l7s = fcObj.lane_7_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_7_library.library_species.use_genome_build
- genome_dict.setdefault(l7s, []).append('7')
- l8s = fcObj.lane_8_library.library_species.scientific_name #+ '|' + \
- #fcObj.lane_8_library.library_species.use_genome_build
- genome_dict.setdefault(l8s, []).append('8')
-
- genome_list = genome_dict.keys()
- genome_list.sort()
-
- #Loop through and create entries for each species.
- for genome in genome_list:
- lanes = ''.join(genome_dict[genome])
- data.append('%s:ANALYSIS eland' % (lanes))
- data.append('%s:READ_LENGTH %s' % (lanes, read_length))
- data.append('%s:ELAND_GENOME %s' % (lanes, '%%(%s)s' % (genome)))
- data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
-
- data.append('SEQUENCE_FORMAT --scarf')
-
- data = '\n'.join(data)
-
- f = open(file_path, 'w')
- f.write(data)
- f.close()
-
- return data
-
-
-
-def config(request, flowcell=None):
- """
- Returns eland config file for a given flowcell number,
- or returns a list of available flowcell numbers.
- """
-
- # Provide INDEX of available Flowcell config files.
- if flowcell is None:
- #Find all FC* config files and report an index html file
- #fc_list = [ os.path.split(file_path)[1] for file_path in glob.glob(os.path.join(settings.UPLOADTO_CONFIG_FILE, 'FC*')) ]
- fc_list = [ fc.flowcell_id for fc in models.FlowCell.objects.all() ]
-
- #Convert FC* list to html links
- fc_html = [ '<a href="/eland_config/%s/">%s</a>' % (fc_name, fc_name) for fc_name in fc_list ]
-
- return HttpResponse('<br />'.join(fc_html))
-
- #FIXME: Should validate flowcell input before using.
- flowcell = _validate_input(flowcell)
- cfg = getElandConfig(flowcell, regenerate=True)
-
- if not cfg:
- return HttpResponse("Hmm, config file for %s does not seem to exist." % (flowcell))
-
-
- return HttpResponse(cfg, mimetype="text/plain")
-
-
-
-
-#def index(request):
-# """
-# Return a form for filling out information about the flowcell
-# """
-# if request.method == 'POST':
-# form = forms.ConfigForm(request.POST, error_class=forms.DivErrorList)
-# if form.is_valid():
-# #cfg = generateElandConfig(form)
-# _saveConfigFile(form)
-# _saveToDb(form)
-# return HttpResponse("Eland Config Saved!", mimetype="text/plain")
-# else:
-# return render_to_response('config_form.html', {'form': form })
-#
-# else:
-# fm = forms.ConfigForm(error_class=forms.DivErrorList)
-# return render_to_response('config_form.html', {'form': fm })
+++ /dev/null
-from django.db import models
-from django.contrib.auth.models import User
-from gaworkflow.frontend import settings
-
-# Create your models here.
-
-class Antibody(models.Model):
- antigene = models.CharField(max_length=500, db_index=True)
- catalog = models.CharField(max_length=50, unique=True, db_index=True)
- antibodies = models.CharField(max_length=500, db_index=True)
- source = models.CharField(max_length=500, blank=True, db_index=True)
- biology = models.TextField(blank=True)
- notes = models.TextField(blank=True)
- def __str__(self):
- return '%s - %s (%s)' % (self.antigene, self.antibodies, self.catalog)
- class Meta:
- verbose_name_plural = "antibodies"
- ordering = ["antigene"]
- class Admin:
- list_display = ('antigene','antibodies','catalog','source','biology','notes')
- list_filter = ('antibodies','source')
- fields = (
- (None, {
- 'fields': (('antigene','antibodies'),('catalog','source'),('biology'),('notes'))
- }),
- )
-
-class Cellline(models.Model):
- cellline_name = models.CharField(max_length=100, unique=True, db_index=True)
- notes = models.TextField(blank=True)
- def __str__(self):
- return '%s' % (self.cellline_name)
-
- class Meta:
- ordering = ["cellline_name"]
-
- class Admin:
- fields = (
- (None, {
- 'fields': (('cellline_name'),('notes'),)
- }),
- )
-
-class Condition(models.Model):
- condition_name = models.CharField(max_length=2000, unique=True, db_index=True)
- notes = models.TextField(blank=True)
- def __str__(self):
- return '%s' % (self.condition_name)
-
- class Meta:
- ordering = ["condition_name"]
-
- class Admin:
- fields = (
- (None, {
- 'fields': (('condition_name'),('notes'),)
- }),
- )
-
-class Species(models.Model):
-
- scientific_name = models.CharField(max_length=256, unique=False, db_index=True, core=True)
- common_name = models.CharField(max_length=256, blank=True)
- use_genome_build = models.CharField(max_length=100, blank=False, null=False)
-
- def __str__(self):
- return '%s (%s)|%s' % (self.scientific_name, self.common_name, self.use_genome_build)
-
- class Meta:
- verbose_name_plural = "species"
- ordering = ["scientific_name"]
-
- class Admin:
- fields = (
- (None, {
- 'fields': (('scientific_name', 'common_name'), ('use_genome_build'))
- }),
- )
-
-class Lab(models.Model):
-
- name = models.CharField(max_length=100, blank=False, unique=True)
-
- def __str__(self):
- return self.name
-
- class Admin:
- pass
-
-class UserProfile(models.Model):
-
- # This allows you to use user.get_profile() to get this object
- user = models.ForeignKey(User, unique=True)
-
- lab = models.ForeignKey(Lab)
- #email = models.CharField(max_length=50, blank=True, null=True)
-
- def __str__(self):
- return '%s (%s lab)' % (self.user, self.lab)
-
- class Meta:
- #verbose_name_plural = "people"
- #ordering = ["lab"]
- pass
-
- class Admin:
- #fields = (
- # (None, {
- # 'fields': (('email', 'lab'), ('email'))
- # }),
- #)
- pass
-
-
-class Library(models.Model):
-
- library_id = models.CharField(max_length=30, primary_key=True, db_index=True, core=True)
- library_name = models.CharField(max_length=100, unique=True, core=True)
- library_species = models.ForeignKey(Species, core=True)
- cell_line = models.ForeignKey(Cellline,core=True)
- condition = models.ForeignKey(Condition,core=True)
- antibody = models.ForeignKey(Antibody,blank=True,null=True,core=True)
-
- EXPERIMENT_TYPES = (
- ('INPUT_RXLCh','INPUT_RXLCh'),
- ('ChIP-seq', 'ChIP-seq'),
- ('Sheared', 'Sheared'),
- ('RNA-seq', 'RNA-seq'),
- ('Methyl-seq', 'Methyl-seq'),
- ('DIP-seq', 'DIP-seq'),
- )
- experiment_type = models.CharField(max_length=50, choices=EXPERIMENT_TYPES,
- default='RNA-seq')
-
- creation_date = models.DateField(blank=True, null=True)
- made_for = models.ForeignKey(User)
- made_by = models.CharField(max_length=50, blank=True, default="Lorian")
-
- PROTOCOL_END_POINTS = (
- ('?', 'Unknown'),
- ('Sample', 'Raw sample'),
- ('Progress', 'In progress'),
- ('1A', 'Ligation, then gel'),
- ('PCR', 'Ligation, then PCR'),
- ('1Ab', 'Ligation, PCR, then gel'),
- ('1Aa', 'Ligation, gel, then PCR'),
- ('2A', 'Ligation, PCR, gel, PCR'),
- ('Done', 'Completed'),
- )
- stopping_point = models.CharField(max_length=25, choices=PROTOCOL_END_POINTS, default='Done')
- amplified_from_sample = models.ForeignKey('self', blank=True, null=True)
-
- undiluted_concentration = models.DecimalField("Undiluted concentration (ng/ul)", max_digits=5, decimal_places=2, default=0, blank=True, null=True)
- successful_pM = models.DecimalField(max_digits=5, decimal_places=2, blank=True, null=True)
- ten_nM_dilution = models.BooleanField()
- avg_lib_size = models.IntegerField(default=225, blank=True, null=True)
- notes = models.TextField(blank=True)
-
- def __str__(self):
- return '#%s: %s' % (self.library_id, self.library_name)
-
- class Meta:
- verbose_name_plural = "libraries"
- ordering = ["-library_id"]
-
- class Admin:
- date_hierarchy = "creation_date"
- save_as = True
- save_on_top = True
- search_fields = ['library_name', 'library_id']
- list_display = ('library_id', 'library_name', 'made_for', 'creation_date', 'stopping_point')
- list_display_links = ('library_id', 'library_name')
- list_filter = ('stopping_point', 'library_species', 'made_for', 'made_by', 'experiment_type')
- fields = (
- (None, {
- 'fields': (('library_id', 'library_name'), ('library_species', 'experiment_type'),)
- }),
- ('Creation Information:', {
- 'fields' : (('made_for', 'made_by', 'creation_date'), ('stopping_point', 'amplified_from_sample'), ('undiluted_concentration', 'library_size'), 'notes',)
- }),
- ('Run Information:', {
- 'fields' : (('ten_nM_dilution','successful_pM'),)
- }),
- )
-
-class FlowCell(models.Model):
-
- flowcell_id = models.CharField(max_length=20, unique=True, db_index=True, core=True)
- run_date = models.DateTimeField(core=True)
- advanced_run = models.BooleanField(default=False)
- read_length = models.IntegerField(default=32)
-
-
- FLOWCELL_STATUSES = (
- ('No', 'Not run'),
- ('F', 'Failed'),
- ('Del', 'Data deleted'),
- ('A', 'Data available'),
- ('In', 'In progress'),
- )
- flowcell_status = models.CharField(max_length=10, choices=FLOWCELL_STATUSES)
-
- lane_1_library = models.ForeignKey(Library, related_name="lane_1_library")
- lane_2_library = models.ForeignKey(Library, related_name="lane_2_library")
- lane_3_library = models.ForeignKey(Library, related_name="lane_3_library")
- lane_4_library = models.ForeignKey(Library, related_name="lane_4_library")
- lane_5_library = models.ForeignKey(Library, related_name="lane_5_library")
- lane_6_library = models.ForeignKey(Library, related_name="lane_6_library")
- lane_7_library = models.ForeignKey(Library, related_name="lane_7_library")
- lane_8_library = models.ForeignKey(Library, related_name="lane_8_library")
-
- lane_1_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_2_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_3_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_4_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_5_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_6_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_7_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
- lane_8_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
-
- lane_1_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_2_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_3_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_4_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_5_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_6_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_7_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
- lane_8_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
-
- kit_1000148 = models.IntegerField(blank=True, null=True)
- kit_1000147 = models.IntegerField(blank=True, null=True)
- kit_1000183 = models.IntegerField(blank=True, null=True)
- kit_1001625 = models.IntegerField(blank=True, null=True)
-
- cluster_station_id = models.CharField(max_length=50, blank=True, null=True)
- sequencer_id = models.CharField(max_length=50, blank=True, null=True)
-
- notes = models.TextField(blank=True)
-
- def __str__(self):
- return '%s (%s)' % (self.flowcell_id, self.run_date)
-
- class Meta:
- ordering = ["-run_date"]
-
- class Admin:
- date_hierarchy = "run_date"
- save_as = True
- save_on_top = True
- search_fields = ['flowcell_id', 'lane_1_library__library_id', 'lane_1_library__library_name', 'lane_2_library__library_id', 'lane_2_library__library_name', 'lane_3_library__library_id', 'lane_3_library__library_name', 'lane_4_library__library_id', 'lane_4_library__library_name', 'lane_5_library__library_id', 'lane_5_library__library_name', 'lane_6_library__library_id', 'lane_6_library__library_name', 'lane_7_library__library_id', 'lane_7_library__library_name', 'lane_8_library__library_id', 'lane_8_library__library_name']
- list_display = ('run_date', 'flowcell_status', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
- list_display_links = ('run_date', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
- fields = (
- (None, {
- 'fields': ('run_date', ('flowcell_id', 'flowcell_status'), ('read_length', 'advanced_run'),)
- }),
- ('Lanes:', {
- 'fields' : (('lane_1_library', 'lane_1_pM'), ('lane_2_library', 'lane_2_pM'), ('lane_3_library', 'lane_3_pM'), ('lane_4_library', 'lane_4_pM'), ('lane_5_library', 'lane_5_pM'), ('lane_6_library', 'lane_6_pM'), ('lane_7_library', 'lane_7_pM'), ('lane_8_library', 'lane_8_pM'),)
- }),
- (None, {
- 'fields' : ('notes',)
- }),
- ('Kits & Machines:', {
- 'classes': 'collapse',
- 'fields' : (('kit_1000148', 'kit_1000147', 'kit_1000183', 'kit_1001625'), ('cluster_station_id', 'sequencer_id'),)
- }),
- ('Cluster Estimates:', {
- 'classes': 'collapse',
- 'fields' : (('lane_1_cluster_estimate', 'lane_2_cluster_estimate'), ('lane_3_cluster_estimate', 'lane_4_cluster_estimate'), ('lane_5_cluster_estimate', 'lane_6_cluster_estimate'), ('lane_7_cluster_estimate', 'lane_8_cluster_estimate',),)
- }),
- )
-
-# Did not finish implementing, removing to avoid further confusion.
-#class ElandResult(models.Model):
-#
-# class Admin: pass
-#
-# flow_cell = models.ForeignKey(FlowCell)
-# config_file = models.FileField(upload_to=settings.UPLOADTO_CONFIG_FILE)
-# eland_result_pack = models.FileField(upload_to=settings.UPLOADTO_ELAND_RESULT_PACKS)
-# bed_file_pack = models.FileField(upload_to=settings.UPLOADTO_BED_PACKS)
-#
-# notes = models.TextField(blank=True)
+++ /dev/null
-# Create your views here.
\ No newline at end of file
+++ /dev/null
-#!/usr/bin/env python
-from django.core.management import execute_manager
-try:
- import settings # Assumed to be in the same directory.
-except ImportError:
- import sys
- sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
- sys.exit(1)
-
-if __name__ == "__main__":
- execute_manager(settings)
+++ /dev/null
-import os
-
-# Django settings for elandifier project.
-
-DEBUG = True
-TEMPLATE_DEBUG = DEBUG
-
-ADMINS = (
- # ('Your Name', 'your_email@domain.com'),
-)
-
-MANAGERS = ADMINS
-
-DATABASE_ENGINE = 'sqlite3' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'.
-DATABASE_NAME = os.path.abspath('../../fctracker.db') # Or path to database file if using sqlite3.
-DATABASE_USER = '' # Not used with sqlite3.
-DATABASE_PASSWORD = '' # Not used with sqlite3.
-DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3.
-DATABASE_PORT = '' # Set to empty string for default. Not used with sqlite3.
-
-# Local time zone for this installation. Choices can be found here:
-# http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE
-# although not all variations may be possible on all operating systems.
-# If running in a Windows environment this must be set to the same as your
-# system time zone.
-TIME_ZONE = 'America/Los_Angeles'
-
-# Language code for this installation. All choices can be found here:
-# http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
-# http://blogs.law.harvard.edu/tech/stories/storyReader$15
-LANGUAGE_CODE = 'en-us'
-
-SITE_ID = 1
-
-# If you set this to False, Django will make some optimizations so as not
-# to load the internationalization machinery.
-USE_I18N = True
-
-# Absolute path to the directory that holds media.
-# Example: "/home/media/media.lawrence.com/"
-MEDIA_ROOT = ''
-
-# URL that handles the media served from MEDIA_ROOT.
-# Example: "http://media.lawrence.com"
-MEDIA_URL = ''
-
-# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
-# trailing slash.
-# Examples: "http://foo.com/media/", "/media/".
-ADMIN_MEDIA_PREFIX = '/media/'
-
-# Make this unique, and don't share it with anybody.
-SECRET_KEY = '(ekv^=gf(j9f(x25@a7r+8)hqlz%&_1!tw^75l%^041#vi=@4n'
-
-# List of callables that know how to import templates from various sources.
-TEMPLATE_LOADERS = (
- 'django.template.loaders.filesystem.load_template_source',
- 'django.template.loaders.app_directories.load_template_source',
-# 'django.template.loaders.eggs.load_template_source',
-)
-
-MIDDLEWARE_CLASSES = (
- 'django.middleware.common.CommonMiddleware',
- 'django.contrib.sessions.middleware.SessionMiddleware',
- 'django.contrib.auth.middleware.AuthenticationMiddleware',
- 'django.middleware.doc.XViewMiddleware',
-)
-
-ROOT_URLCONF = 'gaworkflow.frontend.urls'
-
-TEMPLATE_DIRS = (
- # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
- # Always use forward slashes, even on Windows.
- # Don't forget to use absolute paths, not relative paths.
- os.path.abspath("../../templates"),
-)
-
-INSTALLED_APPS = (
- 'django.contrib.admin',
- 'django.contrib.auth',
- 'django.contrib.contenttypes',
- 'django.contrib.sessions',
- 'django.contrib.sites',
- 'gaworkflow.frontend.eland_config',
- 'gaworkflow.frontend.fctracker',
- 'django.contrib.databrowse',
-)
-
-# Project specific settings
-UPLOADTO_HOME = os.path.abspath('../../uploads')
-UPLOADTO_CONFIG_FILE = os.path.join(UPLOADTO_HOME, 'eland_config')
-UPLOADTO_ELAND_RESULT_PACKS = os.path.join(UPLOADTO_HOME, 'eland_results')
-UPLOADTO_BED_PACKS = os.path.join(UPLOADTO_HOME, 'bed_packs')
-
+++ /dev/null
-from django.conf.urls.defaults import *
-
-# Databrowser:
-from django.contrib import databrowse
-from fctracker.models import Library, FlowCell
-databrowse.site.register(Library)
-databrowse.site.register(FlowCell)
-
-urlpatterns = patterns('',
- # Base:
- (r'^eland_config/', include('gaworkflow.frontend.eland_config.urls')),
- # Admin:
- (r'^admin/', include('django.contrib.admin.urls')),
- # Databrowser:
- (r'^databrowse/(.*)', databrowse.site.root),
-)
+++ /dev/null
-
-from datetime import date
-from glob import glob
-import logging
-import os
-import time
-import re
-
-from gaworkflow.pipeline.runfolder import \
- ElementTree, \
- VERSION_RE, \
- EUROPEAN_STRPTIME
-
-class Phasing(object):
- PHASING = 'Phasing'
- PREPHASING = 'Prephasing'
-
- def __init__(self, fromfile=None, xml=None):
- self.lane = None
- self.phasing = None
- self.prephasing = None
-
- if fromfile is not None:
- self._initialize_from_file(fromfile)
- elif xml is not None:
- self.set_elements(xml)
-
- def _initialize_from_file(self, pathname):
- path, name = os.path.split(pathname)
- basename, ext = os.path.splitext(name)
- # the last character of the param base filename should be the
- # lane number
- tree = ElementTree.parse(pathname).getroot()
- self.set_elements(tree)
- self.lane = int(basename[-1])
-
- def get_elements(self):
- root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
- phasing = ElementTree.SubElement(root, Phasing.PHASING)
- phasing.text = str(self.phasing)
- prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
- prephasing.text = str(self.prephasing)
- return root
-
- def set_elements(self, tree):
- if tree.tag not in ('Phasing', 'Parameters'):
- raise ValueError('exptected Phasing or Parameters')
- lane = tree.attrib.get('lane', None)
- if lane is not None:
- self.lane = int(lane)
- for element in list(tree):
- if element.tag == Phasing.PHASING:
- self.phasing = float(element.text)
- elif element.tag == Phasing.PREPHASING:
- self.prephasing = float(element.text)
-
-class Bustard(object):
- XML_VERSION = 1
-
- # Xml Tags
- BUSTARD = 'Bustard'
- SOFTWARE_VERSION = 'version'
- DATE = 'run_time'
- USER = 'user'
- PARAMETERS = 'Parameters'
-
- def __init__(self, xml=None):
- self.version = None
- self.date = date.today()
- self.user = None
- self.phasing = {}
-
- if xml is not None:
- self.set_elements(xml)
-
- def _get_time(self):
- return time.mktime(self.date.timetuple())
- time = property(_get_time, doc='return run time as seconds since epoch')
-
- def dump(self):
- print "Bustard version:", self.version
- print "Run date", self.date
- print "user:", self.user
- for lane, tree in self.phasing.items():
- print lane
- print tree
-
- def get_elements(self):
- root = ElementTree.Element('Bustard',
- {'version': str(Bustard.XML_VERSION)})
- version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
- version.text = self.version
- run_date = ElementTree.SubElement(root, Bustard.DATE)
- run_date.text = str(self.time)
- user = ElementTree.SubElement(root, Bustard.USER)
- user.text = self.user
- params = ElementTree.SubElement(root, Bustard.PARAMETERS)
- for p in self.phasing.values():
- params.append(p.get_elements())
- return root
-
- def set_elements(self, tree):
- if tree.tag != Bustard.BUSTARD:
- raise ValueError('Expected "Bustard" SubElements')
- xml_version = int(tree.attrib.get('version', 0))
- if xml_version > Bustard.XML_VERSION:
- logging.warn('Bustard XML tree is a higher version than this class')
- for element in list(tree):
- if element.tag == Bustard.SOFTWARE_VERSION:
- self.version = element.text
- elif element.tag == Bustard.DATE:
- self.date = date.fromtimestamp(float(element.text))
- elif element.tag == Bustard.USER:
- self.user = element.text
- elif element.tag == Bustard.PARAMETERS:
- for param in element:
- p = Phasing(xml=param)
- self.phasing[p.lane] = p
- else:
- raise ValueError("Unrecognized tag: %s" % (element.tag,))
-
-
-
-def bustard(pathname):
- """
- Construct a Bustard object from pathname
- """
- b = Bustard()
- path, name = os.path.split(pathname)
- groups = name.split("_")
- version = re.search(VERSION_RE, groups[0])
- b.version = version.group(1)
- t = time.strptime(groups[1], EUROPEAN_STRPTIME)
- b.date = date(*t[0:3])
- b.user = groups[2]
- paramfiles = glob(os.path.join(pathname, "params?.xml"))
- for paramfile in paramfiles:
- phasing = Phasing(paramfile)
- assert (phasing.lane >= 1 and phasing.lane <= 8)
- b.phasing[phasing.lane] = phasing
- return b
-
-def fromxml(tree):
- b = Bustard()
- b.set_elements(tree)
- return b
+++ /dev/null
-#!/usr/bin/python
-import subprocess
-import logging
-import time
-import re
-import os
-
-from gaworkflow.pipeline.retrieve_config import getCombinedOptions, saveConfigFile
-from gaworkflow.pipeline.retrieve_config import FlowCellNotFound, WebError404
-from gaworkflow.pipeline.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict
-from gaworkflow.pipeline.run_status import GARunStatus
-
-from pyinotify import WatchManager, ThreadedNotifier
-from pyinotify import EventsCodes, ProcessEvent
-
-class ConfigInfo:
-
- def __init__(self):
- #run_path = firecrest analysis directory to run analysis from
- self.run_path = None
- self.bustard_path = None
- self.config_filepath = None
- self.status = None
-
- #top level directory where all analyses are placed
- self.base_analysis_dir = None
- #analysis_dir, top level analysis dir...
- # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150'
- self.analysis_dir = None
-
-
- def createStatusObject(self):
- """
- Creates a status object which can be queried for
- status of running the pipeline
-
- returns True if object created
- returns False if object cannot be created
- """
- if self.config_filepath is None:
- return False
-
- self.status = GARunStatus(self.config_filepath)
- return True
-
-
-
-####################################
-# inotify event processor
-
-s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt')
-s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt')
-s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt')
-
-s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/')
-s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/')
-s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/')
-
-class RunEvent(ProcessEvent):
-
- def __init__(self, conf_info):
-
- self.run_status_dict = {'firecrest': False,
- 'bustard': False,
- 'gerald': False}
-
- self._ci = conf_info
-
- ProcessEvent.__init__(self)
-
-
- def process_IN_CREATE(self, event):
- fullpath = os.path.join(event.path, event.name)
- if s_finished.search(fullpath):
- logging.info("File Found: %s" % (fullpath))
-
- if s_firecrest_finished.search(fullpath):
- self.run_status_dict['firecrest'] = True
- self._ci.status.updateFirecrest(event.name)
- elif s_bustard_finished.search(fullpath):
- self.run_status_dict['bustard'] = True
- self._ci.status.updateBustard(event.name)
- elif s_gerald_finished.search(fullpath):
- self.run_status_dict['gerald'] = True
- self._ci.status.updateGerald(event.name)
-
- #WARNING: The following order is important!!
- # Firecrest regex will catch all gerald, bustard, and firecrest
- # Bustard regex will catch all gerald and bustard
- # Gerald regex will catch all gerald
- # So, order needs to be Gerald, Bustard, Firecrest, or this
- # won't work properly.
- elif s_gerald_all.search(fullpath):
- self._ci.status.updateGerald(event.name)
- elif s_bustard_all.search(fullpath):
- self._ci.status.updateBustard(event.name)
- elif s_firecrest_all.search(fullpath):
- self._ci.status.updateFirecrest(event.name)
-
- #print "Create: %s" % (os.path.join(event.path, event.name))
-
- def process_IN_DELETE(self, event):
- #print "Remove %s" % (os.path.join(event.path, event.name))
- pass
-
-
-
-
-#FLAGS
-# Config Step Error
-RUN_ABORT = 'abort'
-# Run Step Error
-RUN_FAILED = 'failed'
-
-
-#####################################
-# Configure Step (goat_pipeline.py)
-#Info
-s_start = re.compile('Starting Genome Analyzer Pipeline')
-s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
-s_generating = re.compile('^Generating journals, Makefiles')
-s_seq_folder = re.compile('^Sequence folder: ')
-s_seq_folder_sub = re.compile('want to make ')
-s_stderr_taskcomplete = re.compile('^Task complete, exiting')
-
-#Errors
-s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
-s_species_dir_err = re.compile('Error: Lane [1-8]:')
-s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
-s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+')
-
-SUPPRESS_MISSING_CYCLES = False
-
-
-##Ignore - Example of out above each ignore regex.
-#NOTE: Commenting out an ignore will cause it to be
-# logged as DEBUG with the logging module.
-#CF_STDERR_IGNORE_LIST = []
-s_skip = re.compile('s_[0-8]_[0-9]+')
-
-
-##########################################
-# Pipeline Run Step (make -j8 recursive)
-
-##Info
-s_finished = re.compile('finished')
-
-##Errors
-s_make_error = re.compile('^make[\S\s]+Error')
-s_no_gnuplot = re.compile('gnuplot: command not found')
-s_no_convert = re.compile('^Can\'t exec "convert"')
-s_no_ghostscript = re.compile('gs: command not found')
-
-##Ignore - Example of out above each ignore regex.
-#NOTE: Commenting out an ignore will cause it to be
-# logged as DEBUG with the logging module.
-#
-PL_STDERR_IGNORE_LIST = []
-# Info: PF 11802
-PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
-# About to analyse intensity file s_4_0101_sig2.txt
-PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
-# Will send output to standard output
-PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
-# Found 31877 clusters
-PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
-# Will use quality criterion ((CHASTITY>=0.6)
-PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
-# Quality criterion translated to (($F[5]>=0.6))
-PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
-# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
-# AND
-# opened s_4_0103_qhg.txt
-PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
-# 81129 sequences out of 157651 passed filter criteria
-PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
-
-
-def pl_stderr_ignore(line):
- """
- Searches lines for lines to ignore (i.e. not to log)
-
- returns True if line should be ignored
- returns False if line should NOT be ignored
- """
- for s in PL_STDERR_IGNORE_LIST:
- if s.search(line):
- return True
- return False
-
-
-def config_stdout_handler(line, conf_info):
- """
- Processes each line of output from GOAT
- and stores useful information using the logging module
-
- Loads useful information into conf_info as well, for future
- use outside the function.
-
- returns True if found condition that signifies success.
- """
-
- # Skip irrelevant line (without logging)
- if s_skip.search(line):
- pass
-
- # Detect invalid command-line arguments
- elif s_invalid_cmdline.search(line):
- logging.error("Invalid commandline options!")
-
- # Detect starting of configuration
- elif s_start.search(line):
- logging.info('START: Configuring pipeline')
-
- # Detect it made it past invalid arguments
- elif s_gerald.search(line):
- logging.info('Running make now')
-
- # Detect that make files have been generated (based on output)
- elif s_generating.search(line):
- logging.info('Make files generted')
- return True
-
- # Capture run directory
- elif s_seq_folder.search(line):
- mo = s_seq_folder_sub.search(line)
- #Output changed when using --tiles=<tiles>
- # at least in pipeline v0.3.0b2
- if mo:
- firecrest_bustard_gerald_makefile = line[mo.end():]
- firecrest_bustard_gerald, junk = \
- os.path.split(firecrest_bustard_gerald_makefile)
- firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
- firecrest, junk = os.path.split(firecrest_bustard)
-
- conf_info.bustard_path = firecrest_bustard
- conf_info.run_path = firecrest
-
- #Standard output handling
- else:
- print 'Sequence line:', line
- mo = s_seq_folder.search(line)
- conf_info.bustard_path = line[mo.end():]
- conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
-
- # Log all other output for debugging purposes
- else:
- logging.warning('CONF:?: %s' % (line))
-
- return False
-
-
-
-def config_stderr_handler(line, conf_info):
- """
- Processes each line of output from GOAT
- and stores useful information using the logging module
-
- Loads useful information into conf_info as well, for future
- use outside the function.
-
- returns RUN_ABORT upon detecting failure;
- True on success message;
- False if neutral message
- (i.e. doesn't signify failure or success)
- """
- global SUPPRESS_MISSING_CYCLES
-
- # Detect invalid species directory error
- if s_species_dir_err.search(line):
- logging.error(line)
- return RUN_ABORT
- # Detect goat_pipeline.py traceback
- elif s_goat_traceb.search(line):
- logging.error("Goat config script died, traceback in debug output")
- return RUN_ABORT
- # Detect indication of successful configuration (from stderr; odd, but ok)
- elif s_stderr_taskcomplete.search(line):
- logging.info('Configure step successful (from: stderr)')
- return True
- # Detect missing cycles
- elif s_missing_cycles.search(line):
-
- # Only display error once
- if not SUPPRESS_MISSING_CYCLES:
- logging.error("Missing cycles detected; Not all cycles copied?")
- logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line))
- SUPPRESS_MISSING_CYCLES = True
- return RUN_ABORT
-
- # Log all other output as debug output
- else:
- logging.debug('CONF:STDERR:?: %s' % (line))
-
- # Neutral (not failure; nor success)
- return False
-
-
-#def pipeline_stdout_handler(line, conf_info):
-# """
-# Processes each line of output from running the pipeline
-# and stores useful information using the logging module
-#
-# Loads useful information into conf_info as well, for future
-# use outside the function.
-#
-# returns True if found condition that signifies success.
-# """
-#
-# #f.write(line + '\n')
-#
-# return True
-
-
-
-def pipeline_stderr_handler(line, conf_info):
- """
- Processes each line of stderr from pipelien run
- and stores useful information using the logging module
-
- ##FIXME: Future feature (doesn't actually do this yet)
- #Loads useful information into conf_info as well, for future
- #use outside the function.
-
- returns RUN_FAILED upon detecting failure;
- #True on success message; (no clear success state)
- False if neutral message
- (i.e. doesn't signify failure or success)
- """
-
- if pl_stderr_ignore(line):
- pass
- elif s_make_error.search(line):
- logging.error("make error detected; run failed")
- return RUN_FAILED
- elif s_no_gnuplot.search(line):
- logging.error("gnuplot not found")
- return RUN_FAILED
- elif s_no_convert.search(line):
- logging.error("imagemagick's convert command not found")
- return RUN_FAILED
- elif s_no_ghostscript.search(line):
- logging.error("ghostscript not found")
- return RUN_FAILED
- else:
- logging.debug('PIPE:STDERR:?: %s' % (line))
-
- return False
-
-
-def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir):
- """
- Gets the config file from server...
- requires config file in:
- /etc/ga_frontend/ga_frontend.conf
- or
- ~/.ga_frontend.conf
-
- with:
- [config_file_server]
- base_host_url: http://host:port
-
- return True if successful, False is failure
- """
- options = getCombinedOptions()
-
- if options.url is None:
- logging.error("~/.ga_frontend.conf or /etc/ga_frontend/ga_frontend.conf" \
- " missing base_host_url option")
- return False
-
- try:
- saveConfigFile(flowcell, options.url, cfg_filepath)
- conf_info.config_filepath = cfg_filepath
- except FlowCellNotFound, e:
- logging.error(e)
- return False
- except WebError404, e:
- logging.error(e)
- return False
- except IOError, e:
- logging.error(e)
- return False
- except Exception, e:
- logging.error(e)
- return False
-
- f = open(cfg_filepath, 'r')
- data = f.read()
- f.close()
-
- genome_dict = getAvailableGenomes(genome_dir)
- mapper_dict = constructMapperDict(genome_dict)
-
- logging.debug(data)
-
- f = open(cfg_filepath, 'w')
- f.write(data % (mapper_dict))
- f.close()
-
- return True
-
-
-
-def configure(conf_info):
- """
- Attempts to configure the GA pipeline using goat.
-
- Uses logging module to store information about status.
-
- returns True if configuration successful, otherwise False.
- """
- #ERROR Test:
- #pipe = subprocess.Popen(['goat_pipeline.py',
- # '--GERALD=config32bk.txt',
- # '--make .',],
- # #'.'],
- # stdout=subprocess.PIPE,
- # stderr=subprocess.PIPE)
-
- #ERROR Test (2), causes goat_pipeline.py traceback
- #pipe = subprocess.Popen(['goat_pipeline.py',
- # '--GERALD=%s' % (conf_info.config_filepath),
- # '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
- # '--make',
- # '.'],
- # stdout=subprocess.PIPE,
- # stderr=subprocess.PIPE)
-
- ##########################
- # Run configuration step
- # Not a test; actual configure attempt.
- #pipe = subprocess.Popen(['goat_pipeline.py',
- # '--GERALD=%s' % (conf_info.config_filepath),
- # '--make',
- # '.'],
- # stdout=subprocess.PIPE,
- # stderr=subprocess.PIPE)
-
-
- stdout_filepath = os.path.join(conf_info.analysis_dir,
- "pipeline_configure_stdout.txt")
- stderr_filepath = os.path.join(conf_info.analysis_dir,
- "pipeline_configure_stderr.txt")
-
- fout = open(stdout_filepath, 'w')
- ferr = open(stderr_filepath, 'w')
-
- pipe = subprocess.Popen(['goat_pipeline.py',
- '--GERALD=%s' % (conf_info.config_filepath),
- #'--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
- '--make',
- conf_info.analysis_dir],
- stdout=fout,
- stderr=ferr)
-
- print "Configuring pipeline: %s" % (time.ctime())
- error_code = pipe.wait()
-
- # Clean up
- fout.close()
- ferr.close()
-
-
- ##################
- # Process stdout
- fout = open(stdout_filepath, 'r')
-
- stdout_line = fout.readline()
-
- complete = False
- while stdout_line != '':
- # Handle stdout
- if config_stdout_handler(stdout_line, conf_info):
- complete = True
- stdout_line = fout.readline()
-
- fout.close()
-
-
- #error_code = pipe.wait()
- if error_code:
- logging.error('Recieved error_code: %s' % (error_code))
- else:
- logging.info('We are go for launch!')
-
- #Process stderr
- ferr = open(stderr_filepath, 'r')
- stderr_line = ferr.readline()
-
- abort = 'NO!'
- stderr_success = False
- while stderr_line != '':
- stderr_status = config_stderr_handler(stderr_line, conf_info)
- if stderr_status == RUN_ABORT:
- abort = RUN_ABORT
- elif stderr_status is True:
- stderr_success = True
- stderr_line = ferr.readline()
-
- ferr.close()
-
-
- #Success requirements:
- # 1) The stdout completed without error
- # 2) The program exited with status 0
- # 3) No errors found in stdout
- print '#Expect: True, False, True, True'
- print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
- status = complete is True and \
- bool(error_code) is False and \
- abort != RUN_ABORT and \
- stderr_success is True
-
- # If everything was successful, but for some reason
- # we didn't retrieve the path info, log it.
- if status is True:
- if conf_info.bustard_path is None or conf_info.run_path is None:
- logging.error("Failed to retrieve run_path")
- return False
-
- return status
-
-
-def run_pipeline(conf_info):
- """
- Run the pipeline and monitor status.
- """
- # Fail if the run_path doesn't actually exist
- if not os.path.exists(conf_info.run_path):
- logging.error('Run path does not exist: %s' \
- % (conf_info.run_path))
- return False
-
- # Change cwd to run_path
- stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt')
- stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt')
-
- # Create status object
- conf_info.createStatusObject()
-
- # Monitor file creation
- wm = WatchManager()
- mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE
- event = RunEvent(conf_info)
- notifier = ThreadedNotifier(wm, event)
- notifier.start()
- wdd = wm.add_watch(conf_info.run_path, mask, rec=True)
-
- # Log pipeline starting
- logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
-
- # Start the pipeline (and hide!)
- #pipe = subprocess.Popen(['make',
- # '-j8',
- # 'recursive'],
- # stdout=subprocess.PIPE,
- # stderr=subprocess.PIPE)
-
- fout = open(stdout_filepath, 'w')
- ferr = open(stderr_filepath, 'w')
-
- pipe = subprocess.Popen(['make',
- '--directory=%s' % (conf_info.run_path),
- '-j8',
- 'recursive'],
- stdout=fout,
- stderr=ferr)
- #shell=True)
- # Wait for run to finish
- retcode = pipe.wait()
-
-
- # Clean up
- notifier.stop()
- fout.close()
- ferr.close()
-
- # Process stderr
- ferr = open(stderr_filepath, 'r')
-
- run_failed_stderr = False
- for line in ferr:
- err_status = pipeline_stderr_handler(line, conf_info)
- if err_status == RUN_FAILED:
- run_failed_stderr = True
-
- ferr.close()
-
- # Finished file check!
- print 'RUN SUCCESS CHECK:'
- for key, value in event.run_status_dict.items():
- print ' %s: %s' % (key, value)
-
- dstatus = event.run_status_dict
-
- # Success or failure check
- status = (retcode == 0) and \
- run_failed_stderr is False and \
- dstatus['firecrest'] is True and \
- dstatus['bustard'] is True and \
- dstatus['gerald'] is True
-
- return status
-
-
+++ /dev/null
-"""
-Extract information about the Firecrest run
-
-Firecrest - class holding the properties we found
-firecrest - Firecrest factory function initalized from a directory name
-fromxml - Firecrest factory function initalized from an xml dump from
- the Firecrest object.
-"""
-
-from datetime import date
-import os
-import re
-import time
-
-from gaworkflow.pipeline.runfolder import \
- ElementTree, \
- VERSION_RE, \
- EUROPEAN_STRPTIME
-
-class Firecrest(object):
- XML_VERSION=1
-
- # xml tag names
- FIRECREST = 'Firecrest'
- SOFTWARE_VERSION = 'version'
- START = 'FirstCycle'
- STOP = 'LastCycle'
- DATE = 'run_time'
- USER = 'user'
- MATRIX = 'matrix'
-
- def __init__(self, xml=None):
- self.start = None
- self.stop = None
- self.version = None
- self.date = date.today()
- self.user = None
- self.matrix = None
-
- if xml is not None:
- self.set_elements(xml)
-
- def _get_time(self):
- return time.mktime(self.date.timetuple())
- time = property(_get_time, doc='return run time as seconds since epoch')
-
- def dump(self):
- print "Starting cycle:", self.start
- print "Ending cycle:", self.stop
- print "Firecrest version:", self.version
- print "Run date:", self.date
- print "user:", self.user
-
- def get_elements(self):
- attribs = {'version': str(Firecrest.XML_VERSION) }
- root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
- version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
- version.text = self.version
- start_cycle = ElementTree.SubElement(root, Firecrest.START)
- start_cycle.text = str(self.start)
- stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
- stop_cycle.text = str(self.stop)
- run_date = ElementTree.SubElement(root, Firecrest.DATE)
- run_date.text = str(self.time)
- user = ElementTree.SubElement(root, Firecrest.USER)
- user.text = self.user
- matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
- matrix.text = self.matrix
- return root
-
- def set_elements(self, tree):
- if tree.tag != Firecrest.FIRECREST:
- raise ValueError('Expected "Firecrest" SubElements')
- xml_version = int(tree.attrib.get('version', 0))
- if xml_version > Firecrest.XML_VERSION:
- logging.warn('Firecrest XML tree is a higher version than this class')
- for element in list(tree):
- if element.tag == Firecrest.SOFTWARE_VERSION:
- self.version = element.text
- elif element.tag == Firecrest.START:
- self.start = int(element.text)
- elif element.tag == Firecrest.STOP:
- self.stop = int(element.text)
- elif element.tag == Firecrest.DATE:
- self.date = date.fromtimestamp(float(element.text))
- elif element.tag == Firecrest.USER:
- self.user = element.text
- elif element.tag == Firecrest.MATRIX:
- self.matrix = element.text
- else:
- raise ValueError("Unrecognized tag: %s" % (element.tag,))
-
-def firecrest(pathname):
- """
- Examine the directory at pathname and initalize a Firecrest object
- """
- f = Firecrest()
-
- # parse firecrest directory name
- path, name = os.path.split(pathname)
- groups = name.split('_')
- # grab the start/stop cycle information
- cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
- f.start = int(cycle.group(1))
- f.stop = int(cycle.group(2))
- # firecrest version
- version = re.search(VERSION_RE, groups[1])
- f.version = (version.group(1))
- # datetime
- t = time.strptime(groups[2], EUROPEAN_STRPTIME)
- f.date = date(*t[0:3])
- # username
- f.user = groups[3]
-
- # should I parse this deeper than just stashing the
- # contents of the matrix file?
- matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
- f.matrix = open(matrix_pathname, 'r').read()
- return f
-
-def fromxml(tree):
- """
- Initialize a Firecrest object from an element tree node
- """
- f = Firecrest()
- f.set_elements(tree)
- return f
+++ /dev/null
-#!/usr/bin/python
-import glob
-import sys
-import os
-import re
-
-import logging
-
-from gaworkflow.util.alphanum import alphanum
-
-class DuplicateGenome(Exception): pass
-
-
-def _has_metainfo(genome_dir):
- metapath = os.path.join(genome_dir, '_metainfo_')
- if os.path.isfile(metapath):
- return True
- else:
- return False
-
-def getAvailableGenomes(genome_base_dir):
- """
- raises IOError (on genome_base_dir not found)
- raises DuplicateGenome on duplicate genomes found.
-
- returns a double dictionary (i.e. d[species][build] = path)
- """
-
- # Need valid directory
- if not os.path.exists(genome_base_dir):
- msg = "Directory does not exist: %s" % (genome_base_dir)
- raise IOError, msg
-
- # Find all subdirectories
- filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
- potential_genome_dirs = \
- [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
-
- # Get list of metadata files
- genome_dir_list = \
- [ dirpath \
- for dirpath in potential_genome_dirs \
- if _has_metainfo(dirpath) ]
-
- # Genome double dictionary
- d = {}
-
- for genome_dir in genome_dir_list:
- line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
-
- # Get species, build... log and skip on failure
- try:
- species, build = line.split('|')
- except:
- logging.warning('Skipping: Invalid metafile (%s) line: %s' \
- % (metafile, line))
- continue
-
- build_dict = d.setdefault(species, {})
- if build in build_dict:
- msg = "Duplicate genome for %s|%s" % (species, build)
- raise DuplicateGenome, msg
-
- build_dict[build] = genome_dir
-
- return d
-
-
-class constructMapperDict(object):
- """
- Emulate a dictionary to map genome|build names to paths.
-
- It uses the dictionary generated by getAvailableGenomes.
- """
- def __init__(self, genome_dict):
- self.genome_dict = genome_dict
-
- def __getitem__(self, key):
- """
- Return the best match for key
- """
- elements = re.split("\|", key)
-
- if len(elements) == 1:
- # we just the species name
- # get the set of builds
- builds = self.genome_dict[elements[0]]
-
- # sort build names the way humans would
- keys = builds.keys()
- keys.sort(cmp=alphanum)
-
- # return the path from the 'last' build name
- return builds[keys[-1]]
-
- elif len(elements) == 2:
- # we have species, and build name
- return self.genome_dict[elements[0]][elements[1]]
- else:
- raise KeyError("Unrecognized key")
-
- def keys(self):
- keys = []
- for species in self.genome_dict.keys():
- for build in self.genome_dict[species]:
- keys.append([species+'|'+build])
- return keys
-
- def values(self):
- values = []
- for species in self.genome_dict.keys():
- for build in self.genome_dict[species]:
- values.append(self.genome_dict[species][build])
- return values
-
- def items(self):
- items = []
- for species in self.genome_dict.keys():
- for build in self.genome_dict[species]:
- key = [species+'|'+build]
- value = self.genome_dict[species][build]
- items.append((key, value))
- return items
-
-if __name__ == '__main__':
-
- if len(sys.argv) != 2:
- print 'useage: %s <base_genome_dir>' % (sys.argv[0])
- sys.exit(1)
-
- d = getAvailableGenomes(sys.argv[1])
- d2 = constructMapperDict(d)
-
- for k,v in d2.items():
- print '%s: %s' % (k,v)
-
-
+++ /dev/null
-"""
-Provide access to information stored in the GERALD directory.
-"""
-from datetime import datetime, date
-from glob import glob
-import logging
-import os
-import stat
-import time
-import types
-
-from gaworkflow.pipeline.runfolder import \
- ElementTree, \
- EUROPEAN_STRPTIME, \
- LANES_PER_FLOWCELL, \
- VERSION_RE
-from gaworkflow.util.ethelp import indent, flatten
-from gaworkflow.util.opener import autoopen
-
-class Gerald(object):
- """
- Capture meaning out of the GERALD directory
- """
- XML_VERSION = 1
- GERALD='Gerald'
- RUN_PARAMETERS='RunParameters'
- SUMMARY='Summary'
-
- class LaneParameters(object):
- """
- Make it easy to access elements of LaneSpecificRunParameters from python
- """
- def __init__(self, gerald, key):
- self._gerald = gerald
- self._key = key
-
- def __get_attribute(self, xml_tag):
- subtree = self._gerald.tree.find('LaneSpecificRunParameters')
- container = subtree.find(xml_tag)
- if container is None:
- return None
- if len(container.getchildren()) > LANES_PER_FLOWCELL:
- raise RuntimeError('GERALD config.xml file changed')
- lanes = [x.tag.split('_')[1] for x in container.getchildren()]
- index = lanes.index(self._key)
- element = container[index]
- return element.text
- def _get_analysis(self):
- return self.__get_attribute('ANALYSIS')
- analysis = property(_get_analysis)
-
- def _get_eland_genome(self):
- genome = self.__get_attribute('ELAND_GENOME')
- # default to the chipwide parameters if there isn't an
- # entry in the lane specific paramaters
- if genome is None:
- subtree = self._gerald.tree.find('ChipWideRunParameters')
- container = subtree.find('ELAND_GENOME')
- genome = container.text
- return genome
- eland_genome = property(_get_eland_genome)
-
- def _get_read_length(self):
- return self.__get_attribute('READ_LENGTH')
- read_length = property(_get_read_length)
-
- def _get_use_bases(self):
- return self.__get_attribute('USE_BASES')
- use_bases = property(_get_use_bases)
-
- class LaneSpecificRunParameters(object):
- """
- Provide access to LaneSpecificRunParameters
- """
- def __init__(self, gerald):
- self._gerald = gerald
- self._keys = None
- def __getitem__(self, key):
- return Gerald.LaneParameters(self._gerald, key)
- def keys(self):
- if self._keys is None:
- tree = self._gerald.tree
- analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
- # according to the pipeline specs I think their fields
- # are sampleName_laneID, with sampleName defaulting to s
- # since laneIDs are constant lets just try using
- # those consistently.
- self._keys = [ x.tag.split('_')[1] for x in analysis]
- return self._keys
- def values(self):
- return [ self[x] for x in self.keys() ]
- def items(self):
- return zip(self.keys(), self.values())
- def __len__(self):
- return len(self.keys())
-
- def __init__(self, xml=None):
- self.pathname = None
- self.tree = None
-
- # parse lane parameters out of the config.xml file
- self.lanes = Gerald.LaneSpecificRunParameters(self)
-
- self.summary = None
- self.eland_results = None
-
- if xml is not None:
- self.set_elements(xml)
-
- def _get_date(self):
- if self.tree is None:
- return datetime.today()
- timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
- epochstamp = time.mktime(time.strptime(timestamp, '%c'))
- return datetime.fromtimestamp(epochstamp)
- date = property(_get_date)
-
- def _get_time(self):
- return time.mktime(self.date.timetuple())
- time = property(_get_time, doc='return run time as seconds since epoch')
-
- def _get_version(self):
- if self.tree is None:
- return None
- return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
- version = property(_get_version)
-
- def dump(self):
- """
- Debugging function, report current object
- """
- print 'Gerald version:', self.version
- print 'Gerald run date:', self.date
- print 'Gerald config.xml:', self.tree
- self.summary.dump()
-
- def get_elements(self):
- if self.tree is None or self.summary is None:
- return None
-
- gerald = ElementTree.Element(Gerald.GERALD,
- {'version': unicode(Gerald.XML_VERSION)})
- gerald.append(self.tree)
- gerald.append(self.summary.get_elements())
- if self.eland_results:
- gerald.append(self.eland_results.get_elements())
- return gerald
-
- def set_elements(self, tree):
- if tree.tag != Gerald.GERALD:
- raise ValueError('exptected GERALD')
- xml_version = int(tree.attrib.get('version', 0))
- if xml_version > Gerald.XML_VERSION:
- logging.warn('XML tree is a higher version than this class')
- for element in list(tree):
- tag = element.tag.lower()
- if tag == Gerald.RUN_PARAMETERS.lower():
- self.tree = element
- elif tag == Gerald.SUMMARY.lower():
- self.summary = Summary(xml=element)
- elif tag == ELAND.ELAND.lower():
- self.eland_results = ELAND(xml=element)
- else:
- logging.warn("Unrecognized tag %s" % (element.tag,))
-
-
-def gerald(pathname):
- g = Gerald()
- g.pathname = pathname
- path, name = os.path.split(pathname)
- config_pathname = os.path.join(pathname, 'config.xml')
- g.tree = ElementTree.parse(config_pathname).getroot()
-
- # parse Summary.htm file
- summary_pathname = os.path.join(pathname, 'Summary.htm')
- g.summary = Summary(summary_pathname)
- # parse eland files
- g.eland_results = eland(g.pathname, g)
- return g
-
-def tonumber(v):
- """
- Convert a value to int if its an int otherwise a float.
- """
- try:
- v = int(v)
- except ValueError, e:
- v = float(v)
- return v
-
-def parse_mean_range(value):
- """
- Parse values like 123 +/- 4.5
- """
- if value.strip() == 'unknown':
- return 0, 0
-
- average, pm, deviation = value.split()
- if pm != '+/-':
- raise RuntimeError("Summary.htm file format changed")
- return tonumber(average), tonumber(deviation)
-
-def make_mean_range_element(parent, name, mean, deviation):
- """
- Make an ElementTree subelement <Name mean='mean', deviation='deviation'/>
- """
- element = ElementTree.SubElement(parent, name,
- { 'mean': unicode(mean),
- 'deviation': unicode(deviation)})
- return element
-
-def parse_mean_range_element(element):
- """
- Grab mean/deviation out of element
- """
- return (tonumber(element.attrib['mean']),
- tonumber(element.attrib['deviation']))
-
-def parse_summary_element(element):
- """
- Determine if we have a simple element or a mean/deviation element
- """
- if len(element.attrib) > 0:
- return parse_mean_range_element(element)
- else:
- return element.text
-
-class Summary(object):
- """
- Extract some useful information from the Summary.htm file
- """
- XML_VERSION = 2
- SUMMARY = 'Summary'
-
- class LaneResultSummary(object):
- """
- Parse the LaneResultSummary table out of Summary.htm
- Mostly for the cluster number
- """
- LANE_RESULT_SUMMARY = 'LaneResultSummary'
- TAGS = {
- 'LaneYield': 'lane_yield',
- 'Cluster': 'cluster', # Raw
- 'ClusterPF': 'cluster_pass_filter',
- 'AverageFirstCycleIntensity': 'average_first_cycle_intensity',
- 'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles',
- 'PercentPassFilterClusters': 'percent_pass_filter_clusters',
- 'PercentPassFilterAlign': 'percent_pass_filter_align',
- 'AverageAlignmentScore': 'average_alignment_score',
- 'PercentErrorRate': 'percent_error_rate'
- }
-
- def __init__(self, html=None, xml=None):
- self.lane = None
- self.lane_yield = None
- self.cluster = None
- self.cluster_pass_filter = None
- self.average_first_cycle_intensity = None
- self.percent_intensity_after_20_cycles = None
- self.percent_pass_filter_clusters = None
- self.percent_pass_filter_align = None
- self.average_alignment_score = None
- self.percent_error_rate = None
-
- if html is not None:
- self.set_elements_from_html(html)
- if xml is not None:
- self.set_elements(xml)
-
- def set_elements_from_html(self, data):
- if not len(data) in (8,10):
- raise RuntimeError("Summary.htm file format changed")
-
- # same in pre-0.3.0 Summary file and 0.3 summary file
- self.lane = data[0]
-
- if len(data) == 8:
- parsed_data = [ parse_mean_range(x) for x in data[1:] ]
- # this is the < 0.3 Pipeline version
- self.cluster = parsed_data[0]
- self.average_first_cycle_intensity = parsed_data[1]
- self.percent_intensity_after_20_cycles = parsed_data[2]
- self.percent_pass_filter_clusters = parsed_data[3]
- self.percent_pass_filter_align = parsed_data[4]
- self.average_alignment_score = parsed_data[5]
- self.percent_error_rate = parsed_data[6]
- elif len(data) == 10:
- parsed_data = [ parse_mean_range(x) for x in data[2:] ]
- # this is the >= 0.3 summary file
- self.lane_yield = data[1]
- self.cluster = parsed_data[0]
- self.cluster_pass_filter = parsed_data[1]
- self.average_first_cycle_intensity = parsed_data[2]
- self.percent_intensity_after_20_cycles = parsed_data[3]
- self.percent_pass_filter_clusters = parsed_data[4]
- self.percent_pass_filter_align = parsed_data[5]
- self.average_alignment_score = parsed_data[6]
- self.percent_error_rate = parsed_data[7]
-
- def get_elements(self):
- lane_result = ElementTree.Element(
- Summary.LaneResultSummary.LANE_RESULT_SUMMARY,
- {'lane': self.lane})
- for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
- value = getattr(self, variable_name)
- if value is None:
- continue
- # it looks like a sequence
- elif type(value) in (types.TupleType, types.ListType):
- element = make_mean_range_element(
- lane_result,
- tag,
- *value
- )
- else:
- element = ElementTree.SubElement(lane_result, tag)
- element.text = value
- return lane_result
-
- def set_elements(self, tree):
- if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
- raise ValueError('Expected %s' % (
- Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
- self.lane = tree.attrib['lane']
- tags = Summary.LaneResultSummary.TAGS
- for element in list(tree):
- try:
- variable_name = tags[element.tag]
- setattr(self, variable_name,
- parse_summary_element(element))
- except KeyError, e:
- logging.warn('Unrecognized tag %s' % (element.tag,))
-
- def __init__(self, filename=None, xml=None):
- self.lane_results = {}
-
- if filename is not None:
- self._extract_lane_results(filename)
- if xml is not None:
- self.set_elements(xml)
-
- def __getitem__(self, key):
- return self.lane_results[key]
-
- def __len__(self):
- return len(self.lane_results)
-
- def keys(self):
- return self.lane_results.keys()
-
- def values(self):
- return self.lane_results.values()
-
- def items(self):
- return self.lane_results.items()
-
- def _flattened_row(self, row):
- """
- flatten the children of a <tr>...</tr>
- """
- return [flatten(x) for x in row.getchildren() ]
-
- def _parse_table(self, table):
- """
- assumes the first line is the header of a table,
- and that the remaining rows are data
- """
- rows = table.getchildren()
- data = []
- for r in rows:
- data.append(self._flattened_row(r))
- return data
-
- def _extract_named_tables(self, pathname):
- """
- extract all the 'named' tables from a Summary.htm file
- and return as a dictionary
-
- Named tables are <h2>...</h2><table>...</table> pairs
- The contents of the h2 tag is considered to the name
- of the table.
- """
- tree = ElementTree.parse(pathname).getroot()
- body = tree.find('body')
- tables = {}
- for i in range(len(body)):
- if body[i].tag == 'h2' and body[i+1].tag == 'table':
- # we have an interesting table
- name = flatten(body[i])
- table = body[i+1]
- data = self._parse_table(table)
- tables[name] = data
- return tables
-
- def _extract_lane_results(self, pathname):
- """
- extract the Lane Results Summary table
- """
-
- tables = self._extract_named_tables(pathname)
-
- # parse lane result summary
- lane_summary = tables['Lane Results Summary']
- # this is version 1 of the summary file
- if len(lane_summary[-1]) == 8:
- # strip header
- headers = lane_summary[0]
- # grab the lane by lane data
- lane_summary = lane_summary[1:]
-
- # this is version 2 of the summary file
- if len(lane_summary[-1]) == 10:
- # lane_summary[0] is a different less specific header row
- headers = lane_summary[1]
- lane_summary = lane_summary[2:10]
- # after the last lane, there's a set of chip wide averages
-
- for r in lane_summary:
- lrs = Summary.LaneResultSummary(html=r)
- self.lane_results[lrs.lane] = lrs
-
- def get_elements(self):
- summary = ElementTree.Element(Summary.SUMMARY,
- {'version': unicode(Summary.XML_VERSION)})
- for lane in self.lane_results.values():
- summary.append(lane.get_elements())
- return summary
-
- def set_elements(self, tree):
- if tree.tag != Summary.SUMMARY:
- return ValueError("Expected %s" % (Summary.SUMMARY,))
- xml_version = int(tree.attrib.get('version', 0))
- if xml_version > Summary.XML_VERSION:
- logging.warn('Summary XML tree is a higher version than this class')
- for element in list(tree):
- lrs = Summary.LaneResultSummary()
- lrs.set_elements(element)
- self.lane_results[lrs.lane] = lrs
-
- def dump(self):
- """
- Debugging function, report current object
- """
- pass
-
-
-def build_genome_fasta_map(genome_dir):
- # build fasta to fasta file map
- genome = genome_dir.split(os.path.sep)[-1]
- fasta_map = {}
- for vld_file in glob(os.path.join(genome_dir, '*.vld')):
- is_link = False
- if os.path.islink(vld_file):
- is_link = True
- vld_file = os.path.realpath(vld_file)
- path, vld_name = os.path.split(vld_file)
- name, ext = os.path.splitext(vld_name)
- if is_link:
- fasta_map[name] = name
- else:
- fasta_map[name] = os.path.join(genome, name)
- return fasta_map
-
-class ElandLane(object):
- """
- Process an eland result file
- """
- XML_VERSION = 1
- LANE = 'ElandLane'
- SAMPLE_NAME = 'SampleName'
- LANE_ID = 'LaneID'
- GENOME_MAP = 'GenomeMap'
- GENOME_ITEM = 'GenomeItem'
- MAPPED_READS = 'MappedReads'
- MAPPED_ITEM = 'MappedItem'
- MATCH_CODES = 'MatchCodes'
- MATCH_ITEM = 'Code'
- READS = 'Reads'
-
- def __init__(self, pathname=None, genome_map=None, xml=None):
- self.pathname = pathname
- self._sample_name = None
- self._lane_id = None
- self._reads = None
- self._mapped_reads = None
- self._match_codes = None
- if genome_map is None:
- genome_map = {}
- self.genome_map = genome_map
-
- if xml is not None:
- self.set_elements(xml)
-
- def _update(self):
- """
- Actually read the file and actually count the reads
- """
- # can't do anything if we don't have a file to process
- if self.pathname is None:
- return
-
- if os.stat(self.pathname)[stat.ST_SIZE] == 0:
- raise RuntimeError("Eland isn't done, try again later.")
-
- reads = 0
- mapped_reads = {}
-
- match_codes = {'NM':0, 'QC':0, 'RM':0,
- 'U0':0, 'U1':0, 'U2':0,
- 'R0':0, 'R1':0, 'R2':0,
- }
- for line in autoopen(self.pathname,'r'):
- reads += 1
- fields = line.split()
- # code = fields[2]
- # match_codes[code] = match_codes.setdefault(code, 0) + 1
- # the QC/NM etc codes are in the 3rd field and always present
- match_codes[fields[2]] += 1
- # ignore lines that don't have a fasta filename
- if len(fields) < 7:
- continue
- fasta = self.genome_map.get(fields[6], fields[6])
- mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
- self._match_codes = match_codes
- self._mapped_reads = mapped_reads
- self._reads = reads
-
- def _update_name(self):
- # extract the sample name
- if self.pathname is None:
- return
-
- path, name = os.path.split(self.pathname)
- split_name = name.split('_')
- self._sample_name = split_name[0]
- self._lane_id = split_name[1]
-
- def _get_sample_name(self):
- if self._sample_name is None:
- self._update_name()
- return self._sample_name
- sample_name = property(_get_sample_name)
-
- def _get_lane_id(self):
- if self._lane_id is None:
- self._update_name()
- return self._lane_id
- lane_id = property(_get_lane_id)
-
- def _get_reads(self):
- if self._reads is None:
- self._update()
- return self._reads
- reads = property(_get_reads)
-
- def _get_mapped_reads(self):
- if self._mapped_reads is None:
- self._update()
- return self._mapped_reads
- mapped_reads = property(_get_mapped_reads)
-
- def _get_match_codes(self):
- if self._match_codes is None:
- self._update()
- return self._match_codes
- match_codes = property(_get_match_codes)
-
- def get_elements(self):
- lane = ElementTree.Element(ElandLane.LANE,
- {'version':
- unicode(ElandLane.XML_VERSION)})
- sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
- sample_tag.text = self.sample_name
- lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
- lane_tag.text = self.lane_id
- genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
- for k, v in self.genome_map.items():
- item = ElementTree.SubElement(
- genome_map, ElandLane.GENOME_ITEM,
- {'name':k, 'value':unicode(v)})
- mapped_reads = ElementTree.SubElement(lane, ElandLane.MAPPED_READS)
- for k, v in self.mapped_reads.items():
- item = ElementTree.SubElement(
- mapped_reads, ElandLane.MAPPED_ITEM,
- {'name':k, 'value':unicode(v)})
- match_codes = ElementTree.SubElement(lane, ElandLane.MATCH_CODES)
- for k, v in self.match_codes.items():
- item = ElementTree.SubElement(
- match_codes, ElandLane.MATCH_ITEM,
- {'name':k, 'value':unicode(v)})
- reads = ElementTree.SubElement(lane, ElandLane.READS)
- reads.text = unicode(self.reads)
-
- return lane
-
- def set_elements(self, tree):
- if tree.tag != ElandLane.LANE:
- raise ValueError('Exptecting %s' % (ElandLane.LANE,))
-
- # reset dictionaries
- self._mapped_reads = {}
- self._match_codes = {}
-
- for element in tree:
- tag = element.tag.lower()
- if tag == ElandLane.SAMPLE_NAME.lower():
- self._sample_name = element.text
- elif tag == ElandLane.LANE_ID.lower():
- self._lane_id = element.text
- elif tag == ElandLane.GENOME_MAP.lower():
- for child in element:
- name = child.attrib['name']
- value = child.attrib['value']
- self.genome_map[name] = value
- elif tag == ElandLane.MAPPED_READS.lower():
- for child in element:
- name = child.attrib['name']
- value = child.attrib['value']
- self._mapped_reads[name] = int(value)
- elif tag == ElandLane.MATCH_CODES.lower():
- for child in element:
- name = child.attrib['name']
- value = int(child.attrib['value'])
- self._match_codes[name] = value
- elif tag == ElandLane.READS.lower():
- self._reads = int(element.text)
- else:
- logging.warn("ElandLane unrecognized tag %s" % (element.tag,))
-
-def extract_eland_sequence(instream, outstream, start, end):
- """
- Extract a chunk of sequence out of an eland file
- """
- for line in instream:
- record = line.split()
- if len(record) > 1:
- result = [record[0], record[1][start:end]]
- else:
- result = [record[0][start:end]]
- outstream.write("\t".join(result))
- outstream.write(os.linesep)
-
-class ELAND(object):
- """
- Summarize information from eland files
- """
- XML_VERSION = 1
-
- ELAND = 'ElandCollection'
- LANE = 'Lane'
- LANE_ID = 'id'
-
- def __init__(self, xml=None):
- # we need information from the gerald config.xml
- self.results = {}
-
- if xml is not None:
- self.set_elements(xml)
-
- def __len__(self):
- return len(self.results)
-
- def keys(self):
- return self.results.keys()
-
- def values(self):
- return self.results.values()
-
- def items(self):
- return self.results.items()
-
- def __getitem__(self, key):
- return self.results[key]
-
- def get_elements(self):
- root = ElementTree.Element(ELAND.ELAND,
- {'version': unicode(ELAND.XML_VERSION)})
- for lane_id, lane in self.results.items():
- eland_lane = lane.get_elements()
- eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
- root.append(eland_lane)
- return root
-
- def set_elements(self, tree):
- if tree.tag.lower() != ELAND.ELAND.lower():
- raise ValueError('Expecting %s', ELAND.ELAND)
- for element in list(tree):
- lane_id = element.attrib[ELAND.LANE_ID]
- lane = ElandLane(xml=element)
- self.results[lane_id] = lane
-
-def eland(basedir, gerald=None, genome_maps=None):
- e = ELAND()
-
- file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
- if len(file_list) == 0:
- # lets handle compressed eland files too
- file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
-
- for pathname in file_list:
- # yes the lane_id is also being computed in ElandLane._update
- # I didn't want to clutter up my constructor
- # but I needed to persist the sample_name/lane_id for
- # runfolder summary_report
- path, name = os.path.split(pathname)
- split_name = name.split('_')
- lane_id = split_name[1]
-
- if genome_maps is not None:
- genome_map = genome_maps[lane_id]
- elif gerald is not None:
- genome_dir = gerald.lanes[lane_id].eland_genome
- genome_map = build_genome_fasta_map(genome_dir)
- else:
- genome_map = {}
-
- eland_result = ElandLane(pathname, genome_map)
- e.results[lane_id] = eland_result
- return e
+++ /dev/null
-from xml import sax
-
-
-def get_cycles(recipe_xml_filepath):
- """
- returns the number of cycles found in Recipe*.xml
- """
- handler = CycleXmlHandler()
- sax.parse(recipe_xml_filepath, handler)
- return handler.cycle_count
-
-
-
-class CycleXmlHandler(sax.ContentHandler):
-
- def __init__(self):
- self.cycle_count = 0
- self.in_protocol = False
- sax.ContentHandler.__init__(self)
-
-
- def startDocument(self):
- self.cycle_count = 0
- self.in_protocol = False
-
-
- def startElement(self, name, attrs):
-
- #Only count Incorporations as cycles if within
- # the protocol section of the xml document.
- if name == "Incorporation" and self.in_protocol:
- #print 'Found a cycle!'
- self.cycle_count += 1
- return
-
- elif name == 'Protocol':
- #print 'In protocol'
- self.in_protocol = True
- return
-
- #print 'Skipping: %s' % (name)
-
-
- def endElement(self, name):
-
- if name == 'Protocol':
- #print 'End protocol'
- self.in_protocol = False
+++ /dev/null
-#!/usr/bin/env python
-
-from optparse import OptionParser, IndentedHelpFormatter
-from ConfigParser import SafeConfigParser
-
-import logging
-import os
-import sys
-import urllib2
-
-CONFIG_SYSTEM = '/etc/ga_frontend/ga_frontend.conf'
-CONFIG_USER = os.path.expanduser('~/.ga_frontend.conf')
-
-#Disable or enable commandline arg parsing; disabled by default.
-DISABLE_CMDLINE = True
-
-class FlowCellNotFound(Exception): pass
-class WebError404(Exception): pass
-
-class DummyOptions:
- """
- Used when command line parsing is disabled; default
- """
- def __init__(self):
- self.url = None
- self.output_filepath = None
- self.flowcell = None
- self.genome_dir = None
-
-class PreformattedDescriptionFormatter(IndentedHelpFormatter):
-
- #def format_description(self, description):
- #
- # if description:
- # return description + "\n"
- # else:
- # return ""
-
- def format_epilog(self, epilog):
- """
- It was removing my preformated epilog, so this should override
- that behavior! Muhahaha!
- """
- if epilog:
- return "\n" + epilog + "\n"
- else:
- return ""
-
-
-def constructOptionParser():
- """
- returns a pre-setup optparser
- """
- global DISABLE_CMDLINE
-
- if DISABLE_CMDLINE:
- return None
-
- parser = OptionParser(formatter=PreformattedDescriptionFormatter())
-
- parser.set_description('Retrieves eland config file from ga_frontend web frontend.')
-
- parser.epilog = """
-Config File:
- * %s (System wide)
- * %s (User specific; overrides system)
- * command line overrides all config file options
-
- Example Config File:
-
- [config_file_server]
- base_host_url=http://somewhere.domain:port
-""" % (CONFIG_SYSTEM, CONFIG_USER)
-
- #Special formatter for allowing preformatted description.
- ##parser.format_epilog(PreformattedDescriptionFormatter())
-
- parser.add_option("-u", "--url",
- action="store", type="string", dest="url")
-
- parser.add_option("-o", "--output",
- action="store", type="string", dest="output_filepath")
-
- parser.add_option("-f", "--flowcell",
- action="store", type="string", dest="flowcell")
-
- parser.add_option("-g", "--genome_dir",
- action="store", type="string", dest="genome_dir")
-
- #parser.set_default("url", "default")
-
- return parser
-
-def constructConfigParser():
- """
- returns a pre-setup config parser
- """
- parser = SafeConfigParser()
- parser.read([CONFIG_SYSTEM, CONFIG_USER])
- if not parser.has_section('config_file_server'):
- parser.add_section('config_file_server')
- if not parser.has_section('local_setup'):
- parser.add_section('local_setup')
-
- return parser
-
-
-def getCombinedOptions():
- """
- Returns optparse options after it has be updated with ConfigParser
- config files and merged with parsed commandline options.
- """
- cl_parser = constructOptionParser()
- conf_parser = constructConfigParser()
-
- if cl_parser is None:
- options = DummyOptions()
- else:
- options, args = cl_parser.parse_args()
-
- if options.url is None:
- if conf_parser.has_option('config_file_server', 'base_host_url'):
- options.url = conf_parser.get('config_file_server', 'base_host_url')
-
- if options.genome_dir is None:
- if conf_parser.has_option('local_setup', 'genome_dir'):
- options.genome_dir = conf_parser.get('local_setup', 'genome_dir')
-
- print 'USING OPTIONS:'
- print ' URL:', options.url
- print ' OUT:', options.output_filepath
- print ' FC:', options.flowcell
- print 'GDIR:', options.genome_dir
- print ''
-
- return options
-
-
-def saveConfigFile(flowcell, base_host_url, output_filepath):
- """
- retrieves the flowcell eland config file, give the base_host_url
- (i.e. http://sub.domain.edu:port)
- """
- url = base_host_url + '/eland_config/%s/' % (flowcell)
-
- f = open(output_filepath, 'w')
- #try:
- try:
- web = urllib2.urlopen(url)
- except urllib2.URLError, e:
- errmsg = 'URLError: %d' % (e.code,)
- logging.error(errmsg)
- logging.error('opened %s' % (url,))
- logging.error('%s' % ( e.read(),))
- raise IOError(errmsg)
-
- #except IOError, msg:
- # if str(msg).find("Connection refused") >= 0:
- # print 'Error: Connection refused for: %s' % (url)
- # f.close()
- # sys.exit(1)
- # elif str(msg).find("Name or service not known") >= 0:
- # print 'Error: Invalid domain or ip address for: %s' % (url)
- # f.close()
- # sys.exit(2)
- # else:
- # raise IOError, msg
-
- data = web.read()
-
- if data.find('Hmm, config file for') >= 0:
- msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url)
- raise FlowCellNotFound, msg
-
- if data.find('404 - Not Found') >= 0:
- msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
- "Did you get right port #?" % (flowcell, base_host_url, url)
- raise FlowCellNotFound, msg
-
- f.write(data)
- web.close()
- f.close()
- logging.info('Wrote config file to %s' % (output_filepath,))
-
-
+++ /dev/null
-import glob
-import re
-import os
-import sys
-import time
-import threading
-
-s_comment = re.compile('^#')
-s_general_read_len = re.compile('^READ_LENGTH ')
-s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
-
-s_firecrest = None
-
-def _four_digit_num_in_string(num):
- if num < 0:
- pass
- elif num < 10:
- return '000' + str(num)
- elif num < 100:
- return '00' + str(num)
- elif num < 1000:
- return '0' + str(num)
- elif num < 10000:
- return str(num)
-
- msg = 'Invalid number: %s' % (num)
- raise ValueError, msg
-
-def _two_digit_num_in_string(num):
- if num < 0:
- pass
- elif num < 10:
- return '0' + str(num)
- elif num < 100:
- return str(num)
-
- msg = 'Invalid number: %s' % (num)
- raise ValueError, msg
-
-
-# FIRECREST PATTERNS
-# _p2f(<pattern>, lane, tile, cycle)
-PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
-
-# _p2f(<pattern>, lane, tile)
-PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
-PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
-PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
-PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
-PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
-PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
-PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
-PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
-
-
-# BUSTARD PATTERNS
-# _p2f(<pattern>, lane, tile)
-PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
-PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
-
-
-
-# GERALD PATTERNS
-# _p2f(<pattern>, lane, tile)
-PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
-PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
-PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
-PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
-PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
-PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
-PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
-PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
-PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
-PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
-PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
-PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
-PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
-
-# _p2f(<pattern>, lane)
-PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
-PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
-PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
-PATTERN_GERALD_CALLPNG = 's_%s_call.png'
-PATTERN_GERALD_ALLPNG = 's_%s_all.png'
-PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
-PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
-PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
-PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
-PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
-PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
-PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
-PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
-PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
-PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
-
-
-
-def _p2f(pattern, lane, tile=None, cycle=None):
- """
- Converts a pattern plus info into file names
- """
-
- # lane, and cycle provided (INVALID)
- if tile is None and cycle is not None:
- msg = "Handling of cycle without tile is not currently implemented."
- raise ValueError, msg
-
- # lane, tile, cycle provided
- elif cycle:
- return pattern % (lane,
- _four_digit_num_in_string(tile),
- _two_digit_num_in_string(cycle))
-
- # lane, tile provided
- elif tile:
- return pattern % (lane, _four_digit_num_in_string(tile))
-
- # lane provided
- else:
- return pattern % (lane)
-
-
-class GARunStatus(object):
-
- def __init__(self, conf_filepath):
- """
- Given an eland config file in the top level directory
- of a run, predicts the files that will be generated
- during a run and provides methods for retrieving
- (completed, total) for each step or entire run.
- """
- #print 'self._conf_filepath = %s' % (conf_filepath)
- self._conf_filepath = conf_filepath
- self._base_dir, junk = os.path.split(conf_filepath)
- self._image_dir = os.path.join(self._base_dir, 'Images')
-
- self.lanes = []
- self.lane_read_length = {}
- self.tiles = None
- self.cycles = None
-
- self.status = {}
- self.status['firecrest'] = {}
- self.status['bustard'] = {}
- self.status['gerald'] = {}
-
- self._process_config()
- self._count_tiles()
- self._count_cycles()
- self._generate_expected()
-
-
- def _process_config(self):
- """
- Grabs info from self._conf_filepath
- """
- f = open(self._conf_filepath, 'r')
-
- for line in f:
-
- #Skip comment lines for now.
- if s_comment.search(line):
- continue
-
- mo = s_general_read_len.search(line)
- if mo:
- read_length = int(line[mo.end():])
- #Handle general READ_LENGTH
- for i in range(1,9):
- self.lane_read_length[i] = read_length
-
- mo = s_read_len.search(line)
- if mo:
- read_length = int(line[mo.end():])
- lanes, junk = line.split(':')
-
- #Convert lanes from string of lanes to list of lane #s.
- lanes = [ int(i) for i in lanes ]
-
-
- for lane in lanes:
-
- #Keep track of which lanes are being run.
- if lane not in self.lanes:
- self.lanes.append(lane)
-
- #Update with lane specific read lengths
- self.lane_read_length[lane] = read_length
-
- self.lanes.sort()
-
-
- def _count_tiles(self):
- """
- Count the number of tiles being used
- """
- self.tiles = len(glob.glob(os.path.join(self._image_dir,
- 'L001',
- 'C1.1',
- 's_1_*_a.tif')))
-
- def _count_cycles(self):
- """
- Figures out the number of cycles that are available
- """
- #print 'self._image_dir = %s' % (self._image_dir)
- cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
- #print 'cycle_dirs = %s' % (cycle_dirs)
- cycle_list = []
- for cycle_dir in cycle_dirs:
- junk, c = os.path.split(cycle_dir)
- cycle_list.append(int(c[1:c.find('.')]))
-
- self.cycles = max(cycle_list)
-
-
-
-
- def _generate_expected(self):
- """
- generates a list of files we expect to find.
- """
-
- firecrest = self.status['firecrest']
- bustard = self.status['bustard']
- gerald = self.status['gerald']
-
-
- for lane in self.lanes:
- for tile in range(1,self.tiles+1):
- for cycle in range(1, self.cycles+1):
-
- ##########################
- # LANE, TILE, CYCLE LAYER
-
- # FIRECREST
- firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
-
-
- ###################
- # LANE, TILE LAYER
-
- # FIRECREST
- firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
- firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
-
-
- # BUSTARD
- bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
- bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
-
-
- # GERALD
- #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
- gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
- #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
-
- ###################
- # LANE LAYER
-
- # GERALD
- #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
- gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
- gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
- gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
- gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
- gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
- #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
- gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
-
-
-
- #################
- # LOOPS FINISHED
-
- # FIRECREST
- firecrest['offsets_finished.txt'] = False
- firecrest['finished.txt'] = False
-
- # BUSTARD
- bustard['finished.txt'] = False
-
- # GERALD
- gerald['tiles.txt'] = False
- gerald['FullAll.htm'] = False
- #gerald['All.htm.tmp'] = False
- #gerald['Signal_Means.txt.tmp'] = False
- #gerald['plotIntensity_for_IVC'] = False
- #gerald['IVC.htm.tmp'] = False
- gerald['FullError.htm'] = False
- gerald['FullPerfect.htm'] = False
- #gerald['Error.htm.tmp'] = False
- #gerald['Perfect.htm.tmp'] = False
- #gerald['Summary.htm.tmp'] = False
- #gerald['Tile.htm.tmp'] = False
- gerald['finished.txt'] = False
-
- def statusFirecrest(self):
- """
- returns (<completed>, <total>)
- """
- firecrest = self.status['firecrest']
- total = len(firecrest)
- completed = firecrest.values().count(True)
-
- return (completed, total)
-
-
- def statusBustard(self):
- """
- returns (<completed>, <total>)
- """
- bustard = self.status['bustard']
- total = len(bustard)
- completed = bustard.values().count(True)
-
- return (completed, total)
-
-
- def statusGerald(self):
- """
- returns (<completed>, <total>)
- """
- gerald = self.status['gerald']
- total = len(gerald)
- completed = gerald.values().count(True)
-
- return (completed, total)
-
-
- def statusTotal(self):
- """
- returns (<completed>, <total>)
- """
- #f = firecrest c = completed
- #b = bustard t = total
- #g = gerald
- fc, ft = self.statusFirecrest()
- bc, bt = self.statusBustard()
- gc, gt = self.statusGerald()
-
- return (fc+bc+gc, ft+bt+gt)
-
-
- def statusReport(self):
- """
- Generate the basic percent complete report
- """
- def _percentCompleted(completed, total):
- """
- Returns precent completed as float
- """
- return (completed / float(total)) * 100
-
- fc, ft = self.statusFirecrest()
- bc, bt = self.statusBustard()
- gc, gt = self.statusGerald()
- tc, tt = self.statusTotal()
-
- fp = _percentCompleted(fc, ft)
- bp = _percentCompleted(bc, bt)
- gp = _percentCompleted(gc, gt)
- tp = _percentCompleted(tc, tt)
-
- report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
- ' Bustard: %s%% (%s/%s)' % (bp, bc, bt),
- ' Gerald: %s%% (%s/%s)' % (gp, gc, gt),
- '-----------------------',
- ' Total: %s%% (%s/%s)' % (tp, tc, tt),
- ]
- return report
-
- def updateFirecrest(self, filename):
- """
- Marks firecrest filename as being completed.
- """
- self.status['firecrest'][filename] = True
-
-
- def updateBustard(self, filename):
- """
- Marks bustard filename as being completed.
- """
- self.status['bustard'][filename] = True
-
-
- def updateGerald(self, filename):
- """
- Marks gerald filename as being completed.
- """
- self.status['gerald'][filename] = True
-
-
-
-##################################################
-# Functions to be called by Thread(target=<func>)
-def _cmdLineStatusMonitorFunc(conf_info):
- """
- Given a ConfigInfo object, provides status to stdout.
-
- You should probably use startCmdLineStatusMonitor()
- instead of ths function.
-
- Use with:
- t = threading.Thread(target=_cmdLineStatusMonitorFunc,
- args=[conf_info])
- t.setDaemon(True)
- t.start()
- """
- SLEEP_AMOUNT = 30
-
- while 1:
- if conf_info.status is None:
- print "No status object yet."
- time.sleep(SLEEP_AMOUNT)
- continue
-
- report = conf_info.status.statusReport()
- print os.linesep.join(report)
- print
-
- time.sleep(SLEEP_AMOUNT)
-
-
-#############################################
-# Start monitor thread convenience functions
-def startCmdLineStatusMonitor(conf_info):
- """
- Starts a command line status monitor given a conf_info object.
- """
- t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
- t.setDaemon(True)
- t.start()
-
-from optparse import OptionParser
-def make_parser():
- usage = "%prog: config file"
-
- parser = OptionParser()
- return parser
-
-def main(cmdline=None):
- parser = make_parser()
- opt, args = parser.parse_args(cmdline)
-
- if len(args) != 1:
- parser.error("need name of configuration file")
-
- status = GARunStatus(args[0])
- print os.linesep.join(status.statusReport())
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-"""
-Core information needed to inspect a runfolder.
-"""
-from glob import glob
-import logging
-import os
-import re
-import shutil
-import stat
-import subprocess
-import sys
-import time
-
-try:
- from xml.etree import ElementTree
-except ImportError, e:
- from elementtree import ElementTree
-
-EUROPEAN_STRPTIME = "%d-%m-%Y"
-EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
-VERSION_RE = "([0-9\.]+)"
-USER_RE = "([a-zA-Z0-9]+)"
-LANES_PER_FLOWCELL = 8
-
-from gaworkflow.util.alphanum import alphanum
-from gaworkflow.util.ethelp import indent, flatten
-
-
-class PipelineRun(object):
- """
- Capture "interesting" information about a pipeline run
- """
- XML_VERSION = 1
- PIPELINE_RUN = 'PipelineRun'
- FLOWCELL_ID = 'FlowcellID'
-
- def __init__(self, pathname=None, firecrest=None, bustard=None, gerald=None, xml=None):
- if pathname is not None:
- self.pathname = os.path.normpath(pathname)
- else:
- self.pathname = None
- self._name = None
- self._flowcell_id = None
- self.firecrest = firecrest
- self.bustard = bustard
- self.gerald = gerald
-
- if xml is not None:
- self.set_elements(xml)
-
- def _get_flowcell_id(self):
- # extract flowcell ID
- if self._flowcell_id is None:
- config_dir = os.path.join(self.pathname, 'Config')
- flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
- if os.path.exists(flowcell_id_path):
- flowcell_id_tree = ElementTree.parse(flowcell_id_path)
- self._flowcell_id = flowcell_id_tree.findtext('Text')
- else:
- path_fields = self.pathname.split('_')
- if len(path_fields) > 0:
- # guessing last element of filename
- flowcell_id = path_fields[-1]
- else:
- flowcell_id = 'unknown'
-
- logging.warning(
- "Flowcell id was not found, guessing %s" % (
- flowcell_id))
- self._flowcell_id = flowcell_id
- return self._flowcell_id
- flowcell_id = property(_get_flowcell_id)
-
- def get_elements(self):
- """
- make one master xml file from all of our sub-components.
- """
- root = ElementTree.Element(PipelineRun.PIPELINE_RUN)
- flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID)
- flowcell.text = self.flowcell_id
- root.append(self.firecrest.get_elements())
- root.append(self.bustard.get_elements())
- root.append(self.gerald.get_elements())
- return root
-
- def set_elements(self, tree):
- # this file gets imported by all the others,
- # so we need to hide the imports to avoid a cyclic imports
- from gaworkflow.pipeline import firecrest
- from gaworkflow.pipeline import bustard
- from gaworkflow.pipeline import gerald
-
- tag = tree.tag.lower()
- if tag != PipelineRun.PIPELINE_RUN.lower():
- raise ValueError('Pipeline Run Expecting %s got %s' % (
- PipelineRun.PIPELINE_RUN, tag))
- for element in tree:
- tag = element.tag.lower()
- if tag == PipelineRun.FLOWCELL_ID.lower():
- self._flowcell_id = element.text
- #ok the xword.Xword.XWORD pattern for module.class.constant is lame
- elif tag == firecrest.Firecrest.FIRECREST.lower():
- self.firecrest = firecrest.Firecrest(xml=element)
- elif tag == bustard.Bustard.BUSTARD.lower():
- self.bustard = bustard.Bustard(xml=element)
- elif tag == gerald.Gerald.GERALD.lower():
- self.gerald = gerald.Gerald(xml=element)
- else:
- logging.warn('PipelineRun unrecognized tag %s' % (tag,))
-
- def _get_run_name(self):
- """
- Given a run tuple, find the latest date and use that as our name
- """
- if self._name is None:
- tmax = max(self.firecrest.time, self.bustard.time, self.gerald.time)
- timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
- self._name = 'run_'+self.flowcell_id+"_"+timestamp+'.xml'
- return self._name
- name = property(_get_run_name)
-
- def save(self, destdir=None):
- if destdir is None:
- destdir = ''
- logging.info("Saving run report "+ self.name)
- xml = self.get_elements()
- indent(xml)
- dest_pathname = os.path.join(destdir, self.name)
- ElementTree.ElementTree(xml).write(dest_pathname)
-
- def load(self, filename):
- logging.info("Loading run report from " + filename)
- tree = ElementTree.parse(filename).getroot()
- self.set_elements(tree)
-
-def get_runs(runfolder):
- """
- Search through a run folder for all the various sub component runs
- and then return a PipelineRun for each different combination.
-
- For example if there are two different GERALD runs, this will
- generate two different PipelineRun objects, that differ
- in there gerald component.
- """
- from gaworkflow.pipeline import firecrest
- from gaworkflow.pipeline import bustard
- from gaworkflow.pipeline import gerald
-
- datadir = os.path.join(runfolder, 'Data')
-
- logging.info('Searching for runs in ' + datadir)
- runs = []
- for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
- f = firecrest.firecrest(firecrest_pathname)
- bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
- for bustard_pathname in glob(bustard_glob):
- b = bustard.bustard(bustard_pathname)
- gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
- for gerald_pathname in glob(gerald_glob):
- try:
- g = gerald.gerald(gerald_pathname)
- runs.append(PipelineRun(runfolder, f, b, g))
- except IOError, e:
- print "Ignoring", str(e)
- return runs
-
-
-def extract_run_parameters(runs):
- """
- Search through runfolder_path for various runs and grab their parameters
- """
- for run in runs:
- run.save()
-
-def summarize_mapped_reads(mapped_reads):
- """
- Summarize per chromosome reads into a genome count
- But handle spike-in/contamination symlinks seperately.
- """
- summarized_reads = {}
- genome_reads = 0
- genome = 'unknown'
- for k, v in mapped_reads.items():
- path, k = os.path.split(k)
- if len(path) > 0:
- genome = path
- genome_reads += v
- else:
- summarized_reads[k] = summarized_reads.setdefault(k, 0) + v
- summarized_reads[genome] = genome_reads
- return summarized_reads
-
-def summary_report(runs):
- """
- Summarize cluster numbers and mapped read counts for a runfolder
- """
- report = []
- for run in runs:
- # print a run name?
- report.append('Summary for %s' % (run.name,))
- # sort the report
- eland_keys = run.gerald.eland_results.results.keys()
- eland_keys.sort(alphanum)
-
- lane_results = run.gerald.summary.lane_results
- for lane_id in eland_keys:
- result = run.gerald.eland_results.results[lane_id]
- report.append("Sample name %s" % (result.sample_name))
- report.append("Lane id %s" % (result.lane_id,))
- cluster = lane_results[result.lane_id].cluster
- report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
- report.append("Total Reads: %d" % (result.reads))
- mc = result._match_codes
- nm = mc['NM']
- nm_percent = float(nm)/result.reads * 100
- qc = mc['QC']
- qc_percent = float(qc)/result.reads * 100
-
- report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
- report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
- report.append('Unique (0,1,2 mismatches) %d %d %d' % \
- (mc['U0'], mc['U1'], mc['U2']))
- report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
- (mc['R0'], mc['R1'], mc['R2']))
- report.append("Mapped Reads")
- mapped_reads = summarize_mapped_reads(result.mapped_reads)
- for name, counts in mapped_reads.items():
- report.append(" %s: %d" % (name, counts))
- report.append('---')
- report.append('')
- return os.linesep.join(report)
-
-def extract_results(runs, output_base_dir=None):
- if output_base_dir is None:
- output_base_dir = os.getcwd()
-
- for r in runs:
- result_dir = os.path.join(output_base_dir, r.flowcell_id)
- logging.info("Using %s as result directory" % (result_dir,))
- if not os.path.exists(result_dir):
- os.mkdir(result_dir)
-
- # create cycle_dir
- cycle = "C%d-%d" % (r.firecrest.start, r.firecrest.stop)
- logging.info("Filling in %s" % (cycle,))
- cycle_dir = os.path.join(result_dir, cycle)
- if os.path.exists(cycle_dir):
- logging.error("%s already exists, not overwriting" % (cycle_dir,))
- continue
- else:
- os.mkdir(cycle_dir)
-
- # copy stuff out of the main run
- g = r.gerald
-
- # save run file
- r.save(cycle_dir)
-
- # Copy Summary.htm
- summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
- if os.path.exists(summary_path):
- logging.info('Copying %s to %s' % (summary_path, cycle_dir))
- shutil.copy(summary_path, cycle_dir)
- else:
- logging.info('Summary file %s was not found' % (summary_path,))
-
- # tar score files
- score_files = []
- for f in os.listdir(g.pathname):
- if re.match('.*_score.txt', f):
- score_files.append(f)
-
- tar_cmd = ['/bin/tar', 'c'] + score_files
- bzip_cmd = [ 'bzip2', '-9', '-c' ]
- tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
- tar_dest = open(tar_dest_name, 'w')
- logging.info("Compressing score files in %s" % (g.pathname,))
- logging.info("Running tar: " + " ".join(tar_cmd[:10]))
- logging.info("Running bzip2: " + " ".join(bzip_cmd))
- logging.info("Writing to %s" %(tar_dest_name))
-
- tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, cwd=g.pathname)
- bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
- tar.wait()
-
- # copy & bzip eland files
- for eland_lane in g.eland_results.values():
- source_name = eland_lane.pathname
- path, name = os.path.split(eland_lane.pathname)
- dest_name = os.path.join(cycle_dir, name+'.bz2')
-
- args = ['bzip2', '-9', '-c', source_name]
- logging.info('Running: %s' % ( " ".join(args) ))
- bzip_dest = open(dest_name, 'w')
- bzip = subprocess.Popen(args, stdout=bzip_dest)
- logging.info('Saving to %s' % (dest_name, ))
- bzip.wait()
-
-def clean_runs(runs):
- """
- Clean up run folders to optimize for compression.
- """
- # TODO: implement this.
- # rm RunLog*.xml
- # rm pipeline_*.txt
- # rm gclog.txt
- # rm NetCopy.log
- # rm nfn.log
- # rm Images/L*
- # cd Data/C1-*_Firecrest*
- # make clean_intermediate
-
- pass
+++ /dev/null
-import unittest
-
-from StringIO import StringIO
-from gaworkflow.pipeline import genome_mapper
-
-class testGenomeMapper(unittest.TestCase):
- def test_construct_mapper(self):
- genomes = {
- 'Arabidopsis thaliana': {'v01212004': '/arabidopsis'},
- 'Homo sapiens': {'hg18': '/hg18'},
- 'Mus musculus': {'mm8': '/mm8',
- 'mm9': '/mm9',
- 'mm10': '/mm10'},
- 'Phage': {'174': '/phi'},
- }
- genome_map = genome_mapper.constructMapperDict(genomes)
-
- self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
- self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi")
- self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10")
- self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
- self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10")
-
- self.failUnlessEqual(len(genome_map.keys()), 6)
- self.failUnlessEqual(len(genome_map.values()), 6)
- self.failUnlessEqual(len(genome_map.items()), 6)
-
-
-def suite():
- return unittest.makeSuite(testGenomeMapper,'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
+++ /dev/null
-#!/usr/bin/env python
-
-from datetime import datetime, date
-import os
-import tempfile
-import shutil
-import unittest
-
-from gaworkflow.pipeline import firecrest
-from gaworkflow.pipeline import bustard
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
-
-
-def make_flowcell_id(runfolder_dir, flowcell_id=None):
- if flowcell_id is None:
- flowcell_id = '207BTAAXY'
-
- config = """<?xml version="1.0"?>
-<FlowcellId>
- <Text>%s</Text>
-</FlowcellId>""" % (flowcell_id,)
- config_dir = os.path.join(runfolder_dir, 'Config')
-
- if not os.path.exists(config_dir):
- os.mkdir(config_dir)
- pathname = os.path.join(config_dir, 'FlowcellId.xml')
- f = open(pathname,'w')
- f.write(config)
- f.close()
-
-def make_matrix(matrix_dir):
- contents = """# Auto-generated frequency response matrix
-> A
-> C
-> G
-> T
-0.77 0.15 -0.04 -0.04
-0.76 1.02 -0.05 -0.06
--0.10 -0.10 1.17 -0.03
--0.13 -0.12 0.80 1.27
-"""
- s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
- f = open(s_matrix, 'w')
- f.write(contents)
- f.close()
-
-def make_phasing_params(bustard_dir):
- for lane in range(1,9):
- pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
- f = open(pathname, 'w')
- f.write("""<Parameters>
- <Phasing>0.009900</Phasing>
- <Prephasing>0.003500</Prephasing>
-</Parameters>
-""")
- f.close()
-
-def make_gerald_config(gerald_dir):
- config_xml = """<RunParameters>
-<ChipWideRunParameters>
- <ANALYSIS>default</ANALYSIS>
- <BAD_LANES></BAD_LANES>
- <BAD_TILES></BAD_TILES>
- <CONTAM_DIR></CONTAM_DIR>
- <CONTAM_FILE></CONTAM_FILE>
- <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
- <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
- <ELAND_REPEAT></ELAND_REPEAT>
- <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
- <EMAIL_LIST>diane</EMAIL_LIST>
- <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
- <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
- <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
- <FORCE>1</FORCE>
- <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
- <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
- <HAMSTER_FLAG>genome</HAMSTER_FLAG>
- <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
- <POST_RUN_COMMAND></POST_RUN_COMMAND>
- <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
- <PURE_BASES>12</PURE_BASES>
- <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
- <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
- <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
- <READ_LENGTH>32</READ_LENGTH>
- <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
- <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
- <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
- <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
- <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
- <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
- <TILE_ROOT>s</TILE_ROOT>
- <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
- <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
- <USE_BASES>all</USE_BASES>
- <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
-</ChipWideRunParameters>
-<LaneSpecificRunParameters>
- <ANALYSIS>
- <s_1>eland</s_1>
- <s_2>eland</s_2>
- <s_3>eland</s_3>
- <s_4>eland</s_4>
- <s_5>eland</s_5>
- <s_6>eland</s_6>
- <s_7>eland</s_7>
- <s_8>eland</s_8>
- </ANALYSIS>
- <ELAND_GENOME>
- <s_1>/g/dm3</s_1>
- <s_2>/g/equcab1</s_2>
- <s_3>/g/equcab1</s_3>
- <s_4>/g/canfam2</s_4>
- <s_5>/g/hg18</s_5>
- <s_6>/g/hg18</s_6>
- <s_7>/g/hg18</s_7>
- <s_8>/g/hg18</s_8>
- </ELAND_GENOME>
- <READ_LENGTH>
- <s_1>32</s_1>
- <s_2>32</s_2>
- <s_3>32</s_3>
- <s_4>32</s_4>
- <s_5>32</s_5>
- <s_6>32</s_6>
- <s_7>32</s_7>
- <s_8>32</s_8>
- </READ_LENGTH>
- <USE_BASES>
- <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
- <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
- <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
- <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
- <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
- <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
- <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
- <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
- </USE_BASES>
-</LaneSpecificRunParameters>
-</RunParameters>
-"""
- pathname = os.path.join(gerald_dir, 'config.xml')
- f = open(pathname,'w')
- f.write(config_xml)
- f.close()
-
-
-def make_summary_htm(gerald_dir):
- summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>HWI-EAS229</td></tr>
-<tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>dm3</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>equcab1</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>equcab1</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>canfam2</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND</td>
-<td>32</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<td>Lane </td>
-<td>Clusters </td>
-<td>Av 1st Cycle Int </td>
-<td>% intensity after 20 cycles </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>17421 +/- 2139</td>
-<td>7230 +/- 801</td>
-<td>23.73 +/- 10.79</td>
-<td>13.00 +/- 22.91</td>
-<td>32.03 +/- 18.45</td>
-<td>6703.57 +/- 3753.85</td>
-<td>4.55 +/- 4.81</td>
-</tr>
-<tr>
-<td>2</td>
-<td>20311 +/- 2402</td>
-<td>7660 +/- 678</td>
-<td>17.03 +/- 4.40</td>
-<td>40.74 +/- 30.33</td>
-<td>29.54 +/- 9.03</td>
-<td>5184.02 +/- 1631.54</td>
-<td>3.27 +/- 3.94</td>
-</tr>
-<tr>
-<td>3</td>
-<td>20193 +/- 2399</td>
-<td>7700 +/- 797</td>
-<td>15.75 +/- 3.30</td>
-<td>56.56 +/- 17.16</td>
-<td>27.33 +/- 7.48</td>
-<td>4803.49 +/- 1313.31</td>
-<td>3.07 +/- 2.86</td>
-</tr>
-<tr>
-<td>4</td>
-<td>15537 +/- 2531</td>
-<td>7620 +/- 1392</td>
-<td>15.37 +/- 3.79</td>
-<td>63.05 +/- 18.30</td>
-<td>15.88 +/- 4.99</td>
-<td>3162.13 +/- 962.59</td>
-<td>3.11 +/- 2.22</td>
-</tr>
-<tr>
-<td>5</td>
-<td>32047 +/- 3356</td>
-<td>8093 +/- 831</td>
-<td>23.79 +/- 6.18</td>
-<td>53.36 +/- 18.06</td>
-<td>48.04 +/- 13.77</td>
-<td>9866.23 +/- 2877.30</td>
-<td>2.26 +/- 1.16</td>
-</tr>
-<tr>
-<td>6</td>
-<td>32946 +/- 4753</td>
-<td>8227 +/- 736</td>
-<td>24.07 +/- 4.69</td>
-<td>54.65 +/- 12.57</td>
-<td>50.98 +/- 10.54</td>
-<td>10468.86 +/- 2228.53</td>
-<td>2.21 +/- 2.33</td>
-</tr>
-<tr>
-<td>7</td>
-<td>39504 +/- 4171</td>
-<td>8401 +/- 785</td>
-<td>22.55 +/- 4.56</td>
-<td>45.22 +/- 10.34</td>
-<td>48.41 +/- 9.67</td>
-<td>9829.40 +/- 1993.20</td>
-<td>2.26 +/- 1.11</td>
-</tr>
-<tr>
-<td>8</td>
-<td>37998 +/- 3792</td>
-<td>8443 +/- 1211</td>
-<td>39.03 +/- 7.52</td>
-<td>42.16 +/- 12.35</td>
-<td>40.98 +/- 14.89</td>
-<td>8128.87 +/- 3055.34</td>
-<td>3.57 +/- 2.77</td>
-</tr>
-</table>
-</body>
-</html>
-"""
- pathname = os.path.join(gerald_dir, 'Summary.htm')
- f = open(pathname, 'w')
- f.write(summary_htm)
- f.close()
-
-def make_eland_results(gerald_dir):
- eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
->HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
->HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
->HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
-"""
- for i in range(1,9):
- pathname = os.path.join(gerald_dir,
- 's_%d_eland_result.txt' % (i,))
- f = open(pathname, 'w')
- f.write(eland_result)
- f.close()
-
-class RunfolderTests(unittest.TestCase):
- """
- Test components of the runfolder processing code
- which includes firecrest, bustard, and gerald
- """
- def setUp(self):
- # make a fake runfolder directory
- self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
-
- self.runfolder_dir = os.path.join(self.temp_dir,
- '080102_HWI-EAS229_0010_207BTAAXX')
- os.mkdir(self.runfolder_dir)
-
- self.data_dir = os.path.join(self.runfolder_dir, 'Data')
- os.mkdir(self.data_dir)
-
- self.firecrest_dir = os.path.join(self.data_dir,
- 'C1-33_Firecrest1.8.28_12-04-2008_diane'
- )
- os.mkdir(self.firecrest_dir)
- self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
- os.mkdir(self.matrix_dir)
- make_matrix(self.matrix_dir)
-
- self.bustard_dir = os.path.join(self.firecrest_dir,
- 'Bustard1.8.28_12-04-2008_diane')
- os.mkdir(self.bustard_dir)
- make_phasing_params(self.bustard_dir)
-
- self.gerald_dir = os.path.join(self.bustard_dir,
- 'GERALD_12-04-2008_diane')
- os.mkdir(self.gerald_dir)
- make_gerald_config(self.gerald_dir)
- make_summary_htm(self.gerald_dir)
- make_eland_results(self.gerald_dir)
-
- def tearDown(self):
- shutil.rmtree(self.temp_dir)
-
- def test_firecrest(self):
- """
- Construct a firecrest object
- """
- f = firecrest.firecrest(self.firecrest_dir)
- self.failUnlessEqual(f.version, '1.8.28')
- self.failUnlessEqual(f.start, 1)
- self.failUnlessEqual(f.stop, 33)
- self.failUnlessEqual(f.user, 'diane')
- self.failUnlessEqual(f.date, date(2008,4,12))
-
- xml = f.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
-
- f2 = firecrest.Firecrest(xml=xml)
- self.failUnlessEqual(f.version, f2.version)
- self.failUnlessEqual(f.start, f2.start)
- self.failUnlessEqual(f.stop, f2.stop)
- self.failUnlessEqual(f.user, f2.user)
- self.failUnlessEqual(f.date, f2.date)
-
- def test_bustard(self):
- """
- construct a bustard object
- """
- b = bustard.bustard(self.bustard_dir)
- self.failUnlessEqual(b.version, '1.8.28')
- self.failUnlessEqual(b.date, date(2008,4,12))
- self.failUnlessEqual(b.user, 'diane')
- self.failUnlessEqual(len(b.phasing), 8)
- self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
-
- xml = b.get_elements()
- b2 = bustard.Bustard(xml=xml)
- self.failUnlessEqual(b.version, b2.version)
- self.failUnlessEqual(b.date, b2.date )
- self.failUnlessEqual(b.user, b2.user)
- self.failUnlessEqual(len(b.phasing), len(b2.phasing))
- for key in b.phasing.keys():
- self.failUnlessEqual(b.phasing[key].lane,
- b2.phasing[key].lane)
- self.failUnlessEqual(b.phasing[key].phasing,
- b2.phasing[key].phasing)
- self.failUnlessEqual(b.phasing[key].prephasing,
- b2.phasing[key].prephasing)
-
- def test_gerald(self):
- # need to update gerald and make tests for it
- g = gerald.gerald(self.gerald_dir)
-
- self.failUnlessEqual(g.version,
- '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
- self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
- self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
- self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
-
-
- # list of genomes, matches what was defined up in
- # make_gerald_config.
- # the first None is to offset the genomes list to be 1..9
- # instead of pythons default 0..8
- genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
- '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
-
- # test lane specific parameters from gerald config file
- for i in range(1,9):
- cur_lane = g.lanes[str(i)]
- self.failUnlessEqual(cur_lane.analysis, 'eland')
- self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
- self.failUnlessEqual(cur_lane.read_length, '32')
- self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
-
- # test data extracted from summary file
- clusters = [None,
- (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
- (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
-
- for i in range(1,9):
- summary_lane = g.summary[str(i)]
- self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
-
- xml = g.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
- g2 = gerald.Gerald(xml=xml)
-
- # do it all again after extracting from the xml file
- self.failUnlessEqual(g.version, g2.version)
- self.failUnlessEqual(g.date, g2.date)
- self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
- self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
-
- # test lane specific parameters from gerald config file
- for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
- self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
- self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
- self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
- self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
-
- # test (some) summary elements
- for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
- self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
- self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
- g_eland = g.eland_results
- g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
-
-
- def test_eland(self):
- dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
- 'chr2L.fa': 'dm3/chr2L.fa',
- 'Lambda.fa': 'Lambda.fa'}
- genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
- '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
- eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
-
- for i in range(1,9):
- lane = eland[str(i)]
- self.failUnlessEqual(lane.reads, 4)
- self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
- self.failUnlessEqual(len(lane.mapped_reads), 3)
- self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
- self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
- self.failUnlessEqual(lane.match_codes['U1'], 2)
- self.failUnlessEqual(lane.match_codes['NM'], 1)
-
- xml = eland.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
- e2 = gerald.ELAND(xml=xml)
-
- for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
- self.failUnlessEqual(l1.reads, l2.reads)
- self.failUnlessEqual(l1.sample_name, l2.sample_name)
- self.failUnlessEqual(l1.lane_id, l2.lane_id)
- self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 3)
- for k in l1.mapped_reads.keys():
- self.failUnlessEqual(l1.mapped_reads[k],
- l2.mapped_reads[k])
-
- self.failUnlessEqual(len(l1.match_codes), 9)
- self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
- for k in l1.match_codes.keys():
- self.failUnlessEqual(l1.match_codes[k],
- l2.match_codes[k])
-
- def test_runfolder(self):
- runs = runfolder.get_runs(self.runfolder_dir)
-
- # do we get the flowcell id from the filename?
- self.failUnlessEqual(len(runs), 1)
- self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
-
- # do we get the flowcell id from the FlowcellId.xml file
- make_flowcell_id(self.runfolder_dir, '207BTAAXY')
- runs = runfolder.get_runs(self.runfolder_dir)
- self.failUnlessEqual(len(runs), 1)
- self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
-
- r1 = runs[0]
- xml = r1.get_elements()
- xml_str = ElementTree.tostring(xml)
-
- r2 = runfolder.PipelineRun(xml=xml)
- self.failUnlessEqual(r1.name, r2.name)
- self.failIfEqual(r2.firecrest, None)
- self.failIfEqual(r2.bustard, None)
- self.failIfEqual(r2.gerald, None)
-
-
-def suite():
- return unittest.makeSuite(RunfolderTests,'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
-
+++ /dev/null
-#!/usr/bin/env python
-
-from datetime import datetime, date
-import os
-import tempfile
-import shutil
-import unittest
-
-from gaworkflow.pipeline import firecrest
-from gaworkflow.pipeline import bustard
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
-
-
-def make_flowcell_id(runfolder_dir, flowcell_id=None):
- if flowcell_id is None:
- flowcell_id = '207BTAAXY'
-
- config = """<?xml version="1.0"?>
-<FlowcellId>
- <Text>%s</Text>
-</FlowcellId>""" % (flowcell_id,)
- config_dir = os.path.join(runfolder_dir, 'Config')
-
- if not os.path.exists(config_dir):
- os.mkdir(config_dir)
- pathname = os.path.join(config_dir, 'FlowcellId.xml')
- f = open(pathname,'w')
- f.write(config)
- f.close()
-
-def make_matrix(matrix_dir):
- contents = """# Auto-generated frequency response matrix
-> A
-> C
-> G
-> T
-0.77 0.15 -0.04 -0.04
-0.76 1.02 -0.05 -0.06
--0.10 -0.10 1.17 -0.03
--0.13 -0.12 0.80 1.27
-"""
- s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
- f = open(s_matrix, 'w')
- f.write(contents)
- f.close()
-
-def make_phasing_params(bustard_dir):
- for lane in range(1,9):
- pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
- f = open(pathname, 'w')
- f.write("""<Parameters>
- <Phasing>0.009900</Phasing>
- <Prephasing>0.003500</Prephasing>
-</Parameters>
-""")
- f.close()
-
-def make_gerald_config(gerald_dir):
- config_xml = """<RunParameters>
-<ChipWideRunParameters>
- <ANALYSIS>default</ANALYSIS>
- <BAD_LANES></BAD_LANES>
- <BAD_TILES></BAD_TILES>
- <CONTAM_DIR></CONTAM_DIR>
- <CONTAM_FILE></CONTAM_FILE>
- <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
- <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
- <ELAND_REPEAT></ELAND_REPEAT>
- <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
- <EMAIL_LIST>diane</EMAIL_LIST>
- <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
- <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
- <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
- <FORCE>1</FORCE>
- <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
- <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
- <HAMSTER_FLAG>genome</HAMSTER_FLAG>
- <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
- <POST_RUN_COMMAND></POST_RUN_COMMAND>
- <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
- <PURE_BASES>12</PURE_BASES>
- <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
- <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
- <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
- <READ_LENGTH>32</READ_LENGTH>
- <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
- <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
- <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
- <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
- <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
- <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
- <TILE_ROOT>s</TILE_ROOT>
- <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
- <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
- <USE_BASES>all</USE_BASES>
- <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
-</ChipWideRunParameters>
-<LaneSpecificRunParameters>
- <ANALYSIS>
- <s_1>eland</s_1>
- <s_2>eland</s_2>
- <s_3>eland</s_3>
- <s_4>eland</s_4>
- <s_5>eland</s_5>
- <s_6>eland</s_6>
- <s_7>eland</s_7>
- <s_8>eland</s_8>
- </ANALYSIS>
- <ELAND_GENOME>
- <s_1>/g/dm3</s_1>
- <s_2>/g/equcab1</s_2>
- <s_3>/g/equcab1</s_3>
- <s_4>/g/canfam2</s_4>
- <s_5>/g/hg18</s_5>
- <s_6>/g/hg18</s_6>
- <s_7>/g/hg18</s_7>
- <s_8>/g/hg18</s_8>
- </ELAND_GENOME>
- <READ_LENGTH>
- <s_1>32</s_1>
- <s_2>32</s_2>
- <s_3>32</s_3>
- <s_4>32</s_4>
- <s_5>32</s_5>
- <s_6>32</s_6>
- <s_7>32</s_7>
- <s_8>32</s_8>
- </READ_LENGTH>
- <USE_BASES>
- <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
- <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
- <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
- <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
- <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
- <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
- <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
- <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
- </USE_BASES>
-</LaneSpecificRunParameters>
-</RunParameters>
-"""
- pathname = os.path.join(gerald_dir, 'config.xml')
- f = open(pathname,'w')
- f.write(config_xml)
- f.close()
-
-def make_summary_htm(gerald_dir):
- summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>HWI-EAS229</td></tr>
-<tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Chip Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Clusters</td>
-<td>Clusters (PF)</td>
-<td>Yield (kbases)</td>
-</tr>
-<tr><td>80933224</td>
-<td>43577803</td>
-<td>1133022</td>
-</tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Num Tiles</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td colspan="2">Lane Info</td>
-<td colspan="8">Tile Mean +/- SD for Lane</td>
-</tr>
-<tr>
-<td>Lane </td>
-<td>Lane Yield (kbases) </td>
-<td>Clusters (raw)</td>
-<td>Clusters (PF) </td>
-<td>1st Cycle Int (PF) </td>
-<td>% intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>158046</td>
-<td>96483 +/- 9074</td>
-<td>60787 +/- 4240</td>
-<td>329 +/- 35</td>
-<td>101.88 +/- 6.03</td>
-<td>63.21 +/- 3.29</td>
-<td>70.33 +/- 0.24</td>
-<td>9054.08 +/- 59.16</td>
-<td>0.46 +/- 0.18</td>
-</tr>
-<tr>
-<td>2</td>
-<td>156564</td>
-<td>133738 +/- 7938</td>
-<td>60217 +/- 1926</td>
-<td>444 +/- 39</td>
-<td>92.62 +/- 7.58</td>
-<td>45.20 +/- 3.31</td>
-<td>51.98 +/- 0.74</td>
-<td>6692.04 +/- 92.49</td>
-<td>0.46 +/- 0.09</td>
-</tr>
-<tr>
-<td>3</td>
-<td>185818</td>
-<td>152142 +/- 10002</td>
-<td>71468 +/- 2827</td>
-<td>366 +/- 36</td>
-<td>91.53 +/- 8.66</td>
-<td>47.19 +/- 3.80</td>
-<td>82.24 +/- 0.44</td>
-<td>10598.68 +/- 64.13</td>
-<td>0.41 +/- 0.04</td>
-</tr>
-<tr>
-<td>4</td>
-<td>34953</td>
-<td>15784 +/- 2162</td>
-<td>13443 +/- 1728</td>
-<td>328 +/- 40</td>
-<td>97.53 +/- 9.87</td>
-<td>85.29 +/- 1.91</td>
-<td>80.02 +/- 0.53</td>
-<td>10368.82 +/- 71.08</td>
-<td>0.15 +/- 0.05</td>
-</tr>
-<tr>
-<td>5</td>
-<td>167936</td>
-<td>119735 +/- 8465</td>
-<td>64590 +/- 2529</td>
-<td>417 +/- 37</td>
-<td>88.69 +/- 14.79</td>
-<td>54.10 +/- 2.59</td>
-<td>76.95 +/- 0.32</td>
-<td>9936.47 +/- 65.75</td>
-<td>0.28 +/- 0.02</td>
-</tr>
-<tr>
-<td>6</td>
-<td>173463</td>
-<td>152177 +/- 8146</td>
-<td>66716 +/- 2493</td>
-<td>372 +/- 39</td>
-<td>87.06 +/- 9.86</td>
-<td>43.98 +/- 3.12</td>
-<td>78.80 +/- 0.43</td>
-<td>10162.28 +/- 49.65</td>
-<td>0.38 +/- 0.03</td>
-</tr>
-<tr>
-<td>7</td>
-<td>149287</td>
-<td>84649 +/- 7325</td>
-<td>57418 +/- 3617</td>
-<td>295 +/- 28</td>
-<td>89.40 +/- 8.23</td>
-<td>67.97 +/- 1.82</td>
-<td>33.38 +/- 0.25</td>
-<td>4247.92 +/- 32.37</td>
-<td>1.00 +/- 0.03</td>
-</tr>
-<tr>
-<td>8</td>
-<td>106953</td>
-<td>54622 +/- 4812</td>
-<td>41136 +/- 3309</td>
-<td>284 +/- 37</td>
-<td>90.21 +/- 9.10</td>
-<td>75.39 +/- 2.27</td>
-<td>48.33 +/- 0.29</td>
-<td>6169.21 +/- 169.50</td>
-<td>0.86 +/- 1.22</td>
-</tr>
-<tr><td colspan="13">Tile mean across chip</td></tr>
-<tr>
-<td>Av.</td>
-<td></td>
-<td>101166</td>
-<td>54472</td>
-<td>354</td>
-<td>92.36</td>
-<td>60.29</td>
-<td>65.25</td>
-<td>8403.69</td>
-<td>0.50</td>
-</tr>
-</table>
-<h2><br></br>Expanded Lane Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<tr><td colspan="2">Lane Info</td>
-<td colspan="2">Phasing Info</td>
-<td colspan="2">Raw Data (tile mean)</td>
-<td colspan="7">Filtered Data (tile mean)</td></tr>
-<td>Lane </td>
-<td>Clusters (tile mean) (raw)</td>
-<td>% Phasing </td>
-<td>% Prephasing </td>
-<td>% Error Rate (raw) </td>
-<td> Equiv Perfect Clusters (raw) </td>
-<td>% retained </td>
-<td>Cycle 2-4 Av Int (PF) </td>
-<td>Cycle 2-10 Av % Loss (PF) </td>
-<td>Cycle 10-20 Av % Loss (PF) </td>
-<td>% Align (PF) </td>
-<td>% Error Rate (PF) </td>
-<td> Equiv Perfect Clusters (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>96483</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.00</td>
-<td>49676</td>
-<td>63.21</td>
-<td>317 +/- 32</td>
-<td>0.13 +/- 0.44</td>
-<td>-1.14 +/- 0.34</td>
-<td>70.33</td>
-<td>0.46</td>
-<td>41758</td>
-</tr>
-<tr>
-<td>2</td>
-<td>133738</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.22</td>
-<td>40467</td>
-<td>45.20</td>
-<td>415 +/- 33</td>
-<td>0.29 +/- 0.40</td>
-<td>-0.79 +/- 0.35</td>
-<td>51.98</td>
-<td>0.46</td>
-<td>30615</td>
-</tr>
-<tr>
-<td>3</td>
-<td>152142</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.30</td>
-<td>78588</td>
-<td>47.19</td>
-<td>344 +/- 26</td>
-<td>0.68 +/- 0.51</td>
-<td>-0.77 +/- 0.42</td>
-<td>82.24</td>
-<td>0.41</td>
-<td>57552</td>
-</tr>
-<tr>
-<td>4</td>
-<td>15784</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.29</td>
-<td>11095</td>
-<td>85.29</td>
-<td>306 +/- 34</td>
-<td>0.20 +/- 0.69</td>
-<td>-1.28 +/- 0.66</td>
-<td>80.02</td>
-<td>0.15</td>
-<td>10671</td>
-</tr>
-<tr>
-<td>5</td>
-<td>119735</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.85</td>
-<td>60335</td>
-<td>54.10</td>
-<td>380 +/- 32</td>
-<td>0.34 +/- 0.49</td>
-<td>-1.55 +/- 4.69</td>
-<td>76.95</td>
-<td>0.28</td>
-<td>49015</td>
-</tr>
-<tr>
-<td>6</td>
-<td>152177</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.21</td>
-<td>70905</td>
-<td>43.98</td>
-<td>333 +/- 27</td>
-<td>0.57 +/- 0.50</td>
-<td>-0.91 +/- 0.39</td>
-<td>78.80</td>
-<td>0.38</td>
-<td>51663</td>
-</tr>
-<tr>
-<td>7</td>
-<td>84649</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.38</td>
-<td>21069</td>
-<td>67.97</td>
-<td>272 +/- 20</td>
-<td>1.15 +/- 0.52</td>
-<td>-0.84 +/- 0.58</td>
-<td>33.38</td>
-<td>1.00</td>
-<td>18265</td>
-</tr>
-<tr>
-<td>8</td>
-<td>54622</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.17</td>
-<td>21335</td>
-<td>75.39</td>
-<td>262 +/- 31</td>
-<td>1.10 +/- 0.59</td>
-<td>-1.01 +/- 0.47</td>
-<td>48.33</td>
-<td>0.86</td>
-<td>19104</td>
-</tr>
-</table>
-<b><br></br>IVC Plots</b>
-<p> <a href='IVC.htm' target="_blank"> IVC.htm
- </a></p>
-<b><br></br>All Intensity Plots</b>
-<p> <a href='All.htm' target="_blank"> All.htm
- </a></p>
-<b><br></br>Error graphs: </b>
-<p> <a href='Error.htm' target="_blank"> Error.htm
- </a></p>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>0001</td>
-<td>114972</td>
-<td>326.48</td>
-<td>94.39</td>
-<td>57.44</td>
-<td>70.2</td>
-<td>9038.6</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>2</td>
-<td>0001</td>
-<td>147793</td>
-<td>448.12</td>
-<td>83.68</td>
-<td>38.57</td>
-<td>53.7</td>
-<td>6905.4</td>
-<td>0.54</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>3</td>
-<td>0001</td>
-<td>167904</td>
-<td>374.05</td>
-<td>86.91</td>
-<td>40.36</td>
-<td>81.3</td>
-<td>10465.0</td>
-<td>0.47</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>4</td>
-<td>0001</td>
-<td>20308</td>
-<td>276.85</td>
-<td>92.87</td>
-<td>84.26</td>
-<td>80.4</td>
-<td>10413.8</td>
-<td>0.16</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>6</td>
-<td>0001</td>
-<td>166844</td>
-<td>348.12</td>
-<td>77.59</td>
-<td>38.13</td>
-<td>79.7</td>
-<td>10264.4</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>7</td>
-<td>0001</td>
-<td>98913</td>
-<td>269.90</td>
-<td>86.66</td>
-<td>64.55</td>
-<td>33.2</td>
-<td>4217.5</td>
-<td>1.02</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>8</td>
-<td>0001</td>
-<td>64972</td>
-<td>243.60</td>
-<td>89.40</td>
-<td>73.17</td>
-<td>48.3</td>
-<td>6182.8</td>
-<td>0.71</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-</body>
-</html>
-"""
- pathname = os.path.join(gerald_dir, 'Summary.htm')
- f = open(pathname, 'w')
- f.write(summary_htm)
- f.close()
-
-def make_eland_results(gerald_dir):
- eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
->HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
->HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
->HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
-"""
- for i in range(1,9):
- pathname = os.path.join(gerald_dir,
- 's_%d_eland_result.txt' % (i,))
- f = open(pathname, 'w')
- f.write(eland_result)
- f.close()
-
-def make_runfolder(obj=None):
- """
- Make a fake runfolder, attach all the directories to obj if defined
- """
- # make a fake runfolder directory
- temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
-
- runfolder_dir = os.path.join(temp_dir,
- '080102_HWI-EAS229_0010_207BTAAXX')
- os.mkdir(runfolder_dir)
-
- data_dir = os.path.join(runfolder_dir, 'Data')
- os.mkdir(data_dir)
-
- firecrest_dir = os.path.join(data_dir,
- 'C1-33_Firecrest1.8.28_12-04-2008_diane'
- )
- os.mkdir(firecrest_dir)
- matrix_dir = os.path.join(firecrest_dir, 'Matrix')
- os.mkdir(matrix_dir)
- make_matrix(matrix_dir)
-
- bustard_dir = os.path.join(firecrest_dir,
- 'Bustard1.8.28_12-04-2008_diane')
- os.mkdir(bustard_dir)
- make_phasing_params(bustard_dir)
-
- gerald_dir = os.path.join(bustard_dir,
- 'GERALD_12-04-2008_diane')
- os.mkdir(gerald_dir)
- make_gerald_config(gerald_dir)
- make_summary_htm(gerald_dir)
- make_eland_results(gerald_dir)
-
- if obj is not None:
- obj.temp_dir = temp_dir
- obj.runfolder_dir = runfolder_dir
- obj.data_dir = data_dir
- obj.firecrest_dir = firecrest_dir
- obj.matrix_dir = matrix_dir
- obj.bustard_dir = bustard_dir
- obj.gerald_dir = gerald_dir
-
-
-class RunfolderTests(unittest.TestCase):
- """
- Test components of the runfolder processing code
- which includes firecrest, bustard, and gerald
- """
- def setUp(self):
- # attaches all the directories to the object passed in
- make_runfolder(self)
-
- def tearDown(self):
- shutil.rmtree(self.temp_dir)
-
- def test_firecrest(self):
- """
- Construct a firecrest object
- """
- f = firecrest.firecrest(self.firecrest_dir)
- self.failUnlessEqual(f.version, '1.8.28')
- self.failUnlessEqual(f.start, 1)
- self.failUnlessEqual(f.stop, 33)
- self.failUnlessEqual(f.user, 'diane')
- self.failUnlessEqual(f.date, date(2008,4,12))
-
- xml = f.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
-
- f2 = firecrest.Firecrest(xml=xml)
- self.failUnlessEqual(f.version, f2.version)
- self.failUnlessEqual(f.start, f2.start)
- self.failUnlessEqual(f.stop, f2.stop)
- self.failUnlessEqual(f.user, f2.user)
- self.failUnlessEqual(f.date, f2.date)
-
- def test_bustard(self):
- """
- construct a bustard object
- """
- b = bustard.bustard(self.bustard_dir)
- self.failUnlessEqual(b.version, '1.8.28')
- self.failUnlessEqual(b.date, date(2008,4,12))
- self.failUnlessEqual(b.user, 'diane')
- self.failUnlessEqual(len(b.phasing), 8)
- self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
-
- xml = b.get_elements()
- b2 = bustard.Bustard(xml=xml)
- self.failUnlessEqual(b.version, b2.version)
- self.failUnlessEqual(b.date, b2.date )
- self.failUnlessEqual(b.user, b2.user)
- self.failUnlessEqual(len(b.phasing), len(b2.phasing))
- for key in b.phasing.keys():
- self.failUnlessEqual(b.phasing[key].lane,
- b2.phasing[key].lane)
- self.failUnlessEqual(b.phasing[key].phasing,
- b2.phasing[key].phasing)
- self.failUnlessEqual(b.phasing[key].prephasing,
- b2.phasing[key].prephasing)
-
- def test_gerald(self):
- # need to update gerald and make tests for it
- g = gerald.gerald(self.gerald_dir)
-
- self.failUnlessEqual(g.version,
- '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
- self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
- self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
- self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
-
-
- # list of genomes, matches what was defined up in
- # make_gerald_config.
- # the first None is to offset the genomes list to be 1..9
- # instead of pythons default 0..8
- genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
- '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
-
- # test lane specific parameters from gerald config file
- for i in range(1,9):
- cur_lane = g.lanes[str(i)]
- self.failUnlessEqual(cur_lane.analysis, 'eland')
- self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
- self.failUnlessEqual(cur_lane.read_length, '32')
- self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
-
- # test data extracted from summary file
- clusters = [None,
- (96483, 9074), (133738, 7938),
- (152142, 10002), (15784, 2162),
- (119735, 8465), (152177, 8146),
- (84649, 7325), (54622, 4812),]
-
- for i in range(1,9):
- summary_lane = g.summary[str(i)]
- self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
-
- xml = g.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
- g2 = gerald.Gerald(xml=xml)
-
- # do it all again after extracting from the xml file
- self.failUnlessEqual(g.version, g2.version)
- self.failUnlessEqual(g.date, g2.date)
- self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
- self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
-
- # test lane specific parameters from gerald config file
- for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
- self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
- self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
- self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
- self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
-
- # test (some) summary elements
- for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
- self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
- self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
- g_eland = g.eland_results
- g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
-
-
- def test_eland(self):
- dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
- 'chr2L.fa': 'dm3/chr2L.fa',
- 'Lambda.fa': 'Lambda.fa'}
- genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
- '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
- eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
-
- for i in range(1,9):
- lane = eland[str(i)]
- self.failUnlessEqual(lane.reads, 4)
- self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
- self.failUnlessEqual(len(lane.mapped_reads), 3)
- self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
- self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
- self.failUnlessEqual(lane.match_codes['U1'], 2)
- self.failUnlessEqual(lane.match_codes['NM'], 1)
-
- xml = eland.get_elements()
- # just make sure that element tree can serialize the tree
- xml_str = ElementTree.tostring(xml)
- e2 = gerald.ELAND(xml=xml)
-
- for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
- self.failUnlessEqual(l1.reads, l2.reads)
- self.failUnlessEqual(l1.sample_name, l2.sample_name)
- self.failUnlessEqual(l1.lane_id, l2.lane_id)
- self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 3)
- for k in l1.mapped_reads.keys():
- self.failUnlessEqual(l1.mapped_reads[k],
- l2.mapped_reads[k])
-
- self.failUnlessEqual(len(l1.match_codes), 9)
- self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
- for k in l1.match_codes.keys():
- self.failUnlessEqual(l1.match_codes[k],
- l2.match_codes[k])
-
- def test_runfolder(self):
- runs = runfolder.get_runs(self.runfolder_dir)
-
- # do we get the flowcell id from the filename?
- self.failUnlessEqual(len(runs), 1)
- self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
-
- # do we get the flowcell id from the FlowcellId.xml file
- make_flowcell_id(self.runfolder_dir, '207BTAAXY')
- runs = runfolder.get_runs(self.runfolder_dir)
- self.failUnlessEqual(len(runs), 1)
- self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
-
- r1 = runs[0]
- xml = r1.get_elements()
- xml_str = ElementTree.tostring(xml)
-
- r2 = runfolder.PipelineRun(xml=xml)
- self.failUnlessEqual(r1.name, r2.name)
- self.failIfEqual(r2.firecrest, None)
- self.failIfEqual(r2.bustard, None)
- self.failIfEqual(r2.gerald, None)
-
-
-def suite():
- return unittest.makeSuite(RunfolderTests,'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
-
+++ /dev/null
-#\r
-# The Alphanum Algorithm is an improved sorting algorithm for strings\r
-# containing numbers. Instead of sorting numbers in ASCII order like\r
-# a standard sort, this algorithm sorts numbers in numeric order.\r
-#\r
-# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
-#\r
-#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
-#* Distributed under same license as original\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
-#\r
-\r
-import re\r
-\r
-#\r
-# TODO: Make decimal points be considered in the same class as digits\r
-#\r
-\r
-def chunkify(str):\r
- """return a list of numbers and non-numeric substrings of +str+\r
-\r
- the numeric substrings are converted to integer, non-numeric are left as is\r
- """\r
- chunks = re.findall("(\d+|\D+)",str)\r
- chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
- return chunks\r
-\r
-def alphanum(a,b):\r
- """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
-\r
- +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
- Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
- Numeric sorting, comparison of non-numeric gives Lexicographic order\r
- """\r
- # split strings into chunks\r
- aChunks = chunkify(a)\r
- bChunks = chunkify(b)\r
-\r
- return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
-\r
-\r
-\r
-if __name__ == "__main__":\r
- unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
- sorted = unsorted[:]\r
- sorted.sort(alphanum)\r
- print '+++++Sorted...++++'\r
- print '\n'.join(sorted)\r
+++ /dev/null
-"""
-ElementTree helper functions
-"""
-def indent(elem, level=0):
- """
- reformat an element tree to be 'pretty' (indented)
- """
- i = "\n" + level*" "
- if len(elem):
- if not elem.text or not elem.text.strip():
- elem.text = i + " "
- for child in elem:
- indent(child, level+1)
- # we don't want the closing tag indented too far
- child.tail = i
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
- else:
- if level and (not elem.tail or not elem.tail.strip()):
- elem.tail = i
-
-def flatten(elem, include_tail=0):
- """
- Extract the text from an element tree
- (AKA extract the text that not part of XML tags)
- """
- text = elem.text or ""
- for e in elem:
- text += flatten(e, 1)
- if include_tail and elem.tail: text += elem.tail
- return text
-
+++ /dev/null
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-import datetime
-import os
-import re
-import sys
-import time
-
-if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
- # we're python 2.5
- import sqlite3
-else:
- import pysqlite2.dbapi2 as sqlite3
-
-
-class fctracker:
- """
- provide a simple way to interact with the flowcell data in fctracker.db
- """
- def __init__(self, database):
- # default to the current directory
- if database is None:
- self.database = self._guess_fctracker_path()
- else:
- self.database = database
- self.conn = sqlite3.connect(self.database)
- self._get_library()
- self._get_species()
-
- def _guess_fctracker_path(self):
- """
- Guess a few obvious places for the database
- """
- fctracker = 'fctracker.db'
- name = fctracker
- # is it in the current dir?
- if os.path.exists(name):
- return name
- name = os.path.expanduser(os.path.join('~', fctracker))
- if os.path.exists(name):
- return name
- raise RuntimeError("Can't find fctracker")
-
- def _make_dict_from_table(self, table_name, pkey_name):
- """
- Convert a django table into a dictionary indexed by the primary key.
- Yes, it really does just load everything into memory, hopefully
- we stay under a few tens of thousands of runs for a while.
- """
- table = {}
- c = self.conn.cursor()
- c.execute('select * from %s;' % (table_name))
- # extract just the field name
- description = [ f[0] for f in c.description]
- for row in c:
- row_dict = dict(zip(description, row))
- table[row_dict[pkey_name]] = row_dict
- c.close()
- return table
-
- def _add_lanes_to_libraries(self):
- """
- add flowcell/lane ids to new attribute 'lanes' in the library dictionary
- """
- library_id_re = re.compile('lane_\d_library_id')
-
- for fc_id, fc in self.flowcells.items():
- lane_library = [ (x[0][5], x[1]) for x in fc.items()
- if library_id_re.match(x[0]) ]
- for lane, library_id in lane_library:
- if not self.library[library_id].has_key('lanes'):
- self.library[library_id]['lanes'] = []
- self.library[library_id]['lanes'].append((fc_id, lane))
-
- def _get_library(self):
- """
- attach the library dictionary to the instance
- """
- self.library = self._make_dict_from_table(
- 'fctracker_library',
- 'library_id')
-
-
- def _get_species(self):
- """
- attach the species dictionary to the instance
- """
- self.species = self._make_dict_from_table(
- 'fctracker_species',
- 'id'
- )
-
- def _get_flowcells(self, where=None):
- """
- attach the flowcell dictionary to the instance
-
- where is a sql where clause. (eg "where run_date > '2008-1-1'")
- that can be used to limit what flowcells we select
- FIXME: please add sanitization code
- """
- if where is None:
- where = ""
- self.flowcells = {}
- c = self.conn.cursor()
- c.execute('select * from fctracker_flowcell %s;' % (where))
- # extract just the field name
- description = [ f[0] for f in c.description ]
- for row in c:
- row_dict = dict(zip(description, row))
- fcid, status = self._parse_flowcell_id(row_dict)
- row_dict['flowcell_id'] = fcid
- row_dict['flowcell_status'] = status
-
- for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
- lane_library = self.library[row_dict[lane+"_id"]]
- species_id = lane_library['library_species_id']
- lane_library['library_species'] = self.species[species_id]
- row_dict[lane] = lane_library
- # some useful parsing
- run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S')
- run_date = datetime.datetime(*run_date[:6])
- row_dict['run_date'] = run_date
- self.flowcells[row_dict['flowcell_id']] = row_dict
-
- self._add_lanes_to_libraries()
- return self.flowcells
-
- def _parse_flowcell_id(self, flowcell_row):
- """
- Return flowcell id and status
-
- We stored the status information in the flowcell id name.
- this was dumb, but database schemas are hard to update.
- """
- fields = flowcell_row['flowcell_id'].split()
- fcid = None
- status = None
- if len(fields) > 0:
- fcid = fields[0]
- if len(fields) > 1:
- status = fields[1]
- return fcid, status
-
-
-def flowcell_gone(cell):
- """
- Use a variety of heuristics to determine if the flowcell drive
- has been deleted.
- """
- status = cell['flowcell_status']
- if status is None:
- return False
- failures = ['failed', 'deleted', 'not run']
- for f in failures:
- if re.search(f, status):
- return True
- else:
- return False
-
-def recoverable_drive_report(flowcells):
- """
- Attempt to report what flowcells are still on a hard drive
- """
- def format_status(status):
- if status is None:
- return ""
- else:
- return status+" "
-
- # sort flowcells by run date
- flowcell_list = []
- for key, cell in flowcells.items():
- flowcell_list.append( (cell['run_date'], key) )
- flowcell_list.sort()
-
- report = []
- line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
- line += "%(species)s"
- for run_date, flowcell_id in flowcell_list:
- cell = flowcells[flowcell_id]
- if flowcell_gone(cell):
- continue
- for l in range(1,9):
- lane = 'lane_%d' % (l)
- cell_library = cell['%s_library'%(lane)]
- fields = {
- 'date': cell['run_date'].strftime('%y-%b-%d'),
- 'id': cell['flowcell_id'],
- 'lane': l,
- 'library_name': cell_library['library_name'],
- 'library_id': cell['%s_library_id'%(lane)],
- 'species': cell_library['library_species']['scientific_name'],
- 'status': format_status(cell['flowcell_status']),
- }
- report.append(line % (fields))
- return os.linesep.join(report)
-
+++ /dev/null
-"""
-Utility functions to make bedfiles.
-"""
-import os
-import re
-
-# map eland_result.txt sense
-sense_map = { 'F': '+', 'R': '-'}
-sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
-
-def write_bed_header(outstream, name, description):
- """
- Produce the headerline for a bedfile
- """
- # provide default track names
- if name is None: name = "track"
- if description is None: description = "eland result file"
- bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
- bed_header += os.linesep
- outstream.write(bed_header % (name, description))
-
-def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
- """
- read an eland result file from instream and write a bedfile to outstream
- """
- # indexes into fields in eland_result.txt file
- SEQ = 1
- CHR = 6
- START = 7
- SENSE = 8
-
- write_bed_header(outstream, name, description)
-
- for line in instream:
- fields = line.split()
- # we need more than the CHR field, and it needs to match a chromosome
- if len(fields) <= CHR or \
- (chromosome_prefix is not None and \
- fields[CHR][:3] != chromosome_prefix):
- continue
- start = fields[START]
- stop = int(start) + len(fields[SEQ])
- chromosome, extension = fields[CHR].split('.')
- assert extension == "fa"
- outstream.write('%s %s %d read 0 %s - - %s%s' % (
- chromosome,
- start,
- stop,
- sense_map[fields[SENSE]],
- sense_color[fields[SENSE]],
- os.linesep
- ))
-
-
-def make_bed_from_multi_eland_stream(
- instream,
- outstream,
- name,
- description,
- chr_prefix='chr',
- max_reads=255
- ):
- """
- read a multi eland stream and write a bedfile
- """
- write_bed_header(outstream, name, description)
- parse_multi_eland(instream, outstream, chr_prefix, max_reads)
-
-def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
-
- loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
- other_pattern = '(?P<chr>[^:,]+)'
- split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
-
- for line in instream:
- rec = line.split()
- if len(rec) > 3:
- # colony_id = rec[0]
- seq = rec[1]
- # number of matches for 0, 1, and 2 mismatches
- # m0, m1, m2 = [int(x) for x in rec[2].split(':')]
- compressed_reads = rec[3]
- cur_chr = ""
- reads = {0: [], 1: [], 2:[]}
-
- for token in split_re.finditer(compressed_reads):
- if token.group('chr') is not None:
- cur_chr = token.group('chr')[:-3] # strip off .fa
- elif token.group('fullloc') is not None:
- matches = int(token.group('count'))
- # only emit a bed line if
- # our current chromosome starts with chromosome pattern
- if chr_prefix is None or cur_chr.startswith(chr_prefix):
- start = int(token.group('start'))
- stop = start + len(seq)
- orientation = token.group('dir')
- strand = sense_map[orientation]
- color = sense_color[orientation]
- # build up list of reads for this record
- reads[matches].append((cur_chr, start, stop, strand, color))
-
- # report up to our max_read threshold reporting the fewer-mismatch
- # matches first
- reported_reads = 0
- keys = [0,1,2]
- for mismatch, read_list in ((k, reads[k]) for k in keys):
- reported_reads += len(read_list)
- if reported_reads <= max_reads:
- for cur_chr, start, stop, strand, color in read_list:
- reported_reads += 1
- outstream.write('%s %d %d read 0 %s - - %s%s' % (
- cur_chr,
- start,
- stop,
- sense_map[orientation],
- sense_color[orientation],
- os.linesep
- ))
-
-def make_description(database, flowcell_id, lane):
- """
- compute a bedfile name and description from the fctracker database
- """
- from gaworkflow.util.fctracker import fctracker
-
- fc = fctracker(database)
- cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
- if len(cells) != 1:
- raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
- lane = int(lane)
- if lane < 1 or lane > 8:
- raise RuntimeError("flowcells only have lanes 1-8")
-
- name = "%s-%s" % (flowcell_id, lane)
-
- cell_id, cell = cells.items()[0]
- assert cell_id == flowcell_id
-
- cell_library_id = cell['lane_%d_library_id' %(lane,)]
- cell_library = cell['lane_%d_library' %(lane,)]
- description = "%s-%s" % (cell_library['library_name'], cell_library_id)
- return name, description
+++ /dev/null
-"""
-Utilities for working with unix-style mounts.
-"""
-import os
-import subprocess
-
-def list_mount_points():
- """
- Return list of current mount points
-
- Note: unix-like OS specific
- """
- mount_points = []
- likely_locations = ['/sbin/mount', '/bin/mount']
- for mount in likely_locations:
- if os.path.exists(mount):
- p = subprocess.Popen(mount, stdout=subprocess.PIPE)
- p.wait()
- for l in p.stdout.readlines():
- rec = l.split()
- device = rec[0]
- mount_point = rec[2]
- assert rec[1] == 'on'
- # looking at the output of mount on linux, osx, and
- # sunos, the first 3 elements are always the same
- # devicename on path
- # everything after that displays the attributes
- # of the mount points in wildly differing formats
- mount_points.append(mount_point)
- return mount_points
- else:
- raise RuntimeError("Couldn't find a mount executable")
-
-def is_mounted(point_to_check):
- """
- Return true if argument exactly matches a current mount point.
- """
- for mount_point in list_mount_points():
- if point_to_check == mount_point:
- return True
- else:
- return False
-
-def find_mount_point_for(pathname):
- """
- Find the deepest mount point pathname is located on
- """
- realpath = os.path.realpath(pathname)
- mount_points = list_mount_points()
-
- prefixes = set()
- for current_mount in mount_points:
- cp = os.path.commonprefix([current_mount, realpath])
- prefixes.add((len(cp), cp))
-
- prefixes = list(prefixes)
- prefixes.sort()
- if len(prefixes) == 0:
- return None
- else:
- print prefixes
- # return longest common prefix
- return prefixes[-1][1]
-
-
+++ /dev/null
-"""
-Helpful utilities for turning random names/objects into streams.
-"""
-import os
-import gzip
-import bz2
-import types
-import urllib2
-
-def isfilelike(file_ref, mode):
- """Does file_ref have the core file operations?
- """
- # if mode is w/a check to make sure we writeable ops
- # but always check to see if we can read
- read_operations = ['read', 'readline', 'readlines']
- write_operations = [ 'write', 'writelines' ]
- #random_operations = [ 'seek', 'tell' ]
- if mode[0] in ('w', 'a'):
- for o in write_operations:
- if not hasattr(file_ref, o):
- return False
- for o in read_operations:
- if not hasattr(file_ref, o):
- return False
-
- return True
-
-def isurllike(file_ref, mode):
- """
- does file_ref look like a url?
- (AKA does it start with protocol:// ?)
- """
- #what if mode is 'w'?
- parsed = urllib2.urlparse.urlparse(file_ref)
- schema, netloc, path, params, query, fragment = parsed
-
- return len(schema) > 0
-
-def autoopen(file_ref, mode='r'):
- """
- Attempt to intelligently turn file_ref into a readable stream
- """
- # catch being passed a file
- if type(file_ref) is types.FileType:
- return file_ref
- # does it look like a file?
- elif isfilelike(file_ref, mode):
- return file_ref
- elif isurllike(file_ref, mode):
- return urllib2.urlopen(file_ref)
- elif os.path.splitext(file_ref)[1] == ".gz":
- return gzip.open(file_ref, mode)
- elif os.path.splitext(file_ref)[1] == '.bz2':
- return bz2.BZ2File(file_ref, mode)
- else:
- return open(file_ref,mode)
-
+++ /dev/null
-"""
-Run up to N simultanous jobs from provided of commands
-"""
-
-import logging
-from subprocess import PIPE
-import subprocess
-import select
-import sys
-import time
-
-class QueueCommands(object):
- """
- Queue up N commands from cmd_list, launching more jobs as the first
- finish.
- """
-
- def __init__(self, cmd_list, N=0, cwd=None):
- """
- cmd_list is a list of elements suitable for subprocess
- N is the number of simultanious processes to run.
- 0 is all of them.
-
- WARNING: this will not work on windows
- (It depends on being able to pass local file descriptors to the
- select call with isn't supported by the Win32 API)
- """
- self.to_run = cmd_list[:]
- self.running = {}
- self.N = N
- self.cwd = cwd
-
- def under_process_limit(self):
- """
- are we still under the total number of allowable jobs?
- """
- if self.N == 0:
- return True
-
- if len(self.running) < self.N:
- return True
-
- return False
-
- def start_jobs(self):
- """
- Launch jobs until we have the maximum allowable running
- (or have run out of jobs)
- """
- queue_log = logging.getLogger('queue')
- queue_log.info('using %s as cwd' % (self.cwd,))
-
- while (len(self.to_run) > 0) and self.under_process_limit():
- queue_log.info('%d left to run', len(self.to_run))
- cmd = self.to_run.pop(0)
- p = subprocess.Popen(cmd, stdout=PIPE, cwd=self.cwd, shell=True)
- self.running[p.stdout] = p
- queue_log.info("Created process %d from %s" % (p.pid, str(cmd)))
-
- def run(self):
- """
- run up to N jobs until we run out of jobs
- """
- queue_log = logging.getLogger('queue')
-
- # to_run slowly gets consumed by start_jobs
- while len(self.to_run) > 0 or len(self.running) > 0:
- # fill any empty spots in our job queue
- self.start_jobs()
-
- # build a list of file descriptors
- # fds=file desciptors
- fds = [ x.stdout for x in self.running.values()]
-
- # wait for something to finish
- # wl= write list, xl=exception list (not used so get bad names)
- read_list, wl, xl = select.select(fds, [], fds)
-
- # for everything that might have finished...
- for pending_fd in read_list:
- pending = self.running[pending_fd]
- # if it really did finish, remove it from running jobs
- if pending.poll() is not None:
- queue_log.info("Process %d finished [%d]",
- pending.pid, pending.returncode)
- del self.running[pending_fd]
- time.sleep(1)
+++ /dev/null
-import os
-import unittest
-
-try:
- from xml.etree import ElementTree
-except ImportError, e:
- from elementtree import ElementTree
-
-from gaworkflow.util.ethelp import indent, flatten
-
-class testETHelper(unittest.TestCase):
- def setUp(self):
- self.foo = '<foo><bar>asdf</bar><br/></foo>'
- self.foo_tree = ElementTree.fromstring(self.foo)
-
- def test_indent(self):
- flat_foo = ElementTree.tostring(self.foo_tree)
- self.failUnlessEqual(len(flat_foo.split('\n')), 1)
-
- indent(self.foo_tree)
- pretty_foo = ElementTree.tostring(self.foo_tree)
- self.failUnlessEqual(len(pretty_foo.split('\n')), 5)
-
- def test_flatten(self):
- self.failUnless(flatten(self.foo_tree), 'asdf')
-
-def suite():
- return unittest.makeSuite(testETHelper, 'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest='suite')
-
-
-
-
+++ /dev/null
-import os
-from StringIO import StringIO
-import unittest
-
-from gaworkflow.util import makebed
-
-class testMakeBed(unittest.TestCase):
- def test_multi_1_0_0_limit_1(self):
- instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0')
- out = StringIO()
-
- makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
- self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
-
- def test_multi_1_0_0_limit_255(self):
- instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0')
- out = StringIO()
-
- makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
- self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
-
-
- def test_multi_2_0_0_limit_1(self):
- instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
- out = StringIO()
-
- makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
- self.failUnlessEqual(out.len, 0)
-
- def test_multi_2_0_0_limit_255(self):
- instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
- out = StringIO()
-
- makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
- self.failUnlessEqual(out.len, 98)
-
- def test_multi_0_2_0_limit_1(self):
- instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495 TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG 0:2:1 mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
- out = StringIO()
-
- makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
- print out.getvalue()
- self.failUnlessEqual(out.len, 0)
-
-def suite():
- return unittest.makeSuite(testMakeBed, 'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest='suite')
-
-
+++ /dev/null
-import os
-import logging
-import time
-import unittest
-
-
-from gaworkflow.util.queuecommands import QueueCommands
-
-class testQueueCommands(unittest.TestCase):
- def setUp(self):
- logging.basicConfig(level=logging.DEBUG,
- format='%(asctime)s %(name)-8s %(message)s')
-
-
-
- def test_unlimited_run(self):
- """
- Run everything at once
- """
- cmds = ['/bin/sleep 0',
- '/bin/sleep 1',
- '/bin/sleep 2',]
-
- q = QueueCommands(cmds)
- start = time.time()
- q.run()
- end = time.time()-start
- # we should only take the length of the longest sleep
- self.failUnless( end > 1.9 and end < 2.1,
- "took %s seconds, exected ~5" % (end,))
-
- def test_limited_run(self):
- """
- Run a limited number of jobs
- """
- cmds = ['/bin/sleep 1',
- '/bin/sleep 2',
- '/bin/sleep 3',]
-
- q = QueueCommands(cmds, 2)
-
- start = time.time()
- q.run()
- end = time.time()-start
- self.failUnless( end > 3.9 and end < 4.1,
- "took %s seconds, expected ~6" % (end,))
-
-def suite():
- return unittest.makeSuite(testQueueCommands, 'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest='suite')
-
-
-
-
--- /dev/null
+import ConfigParser
+import copy
+import logging
+import logging.handlers
+import os
+import re
+import subprocess
+import sys
+import time
+import traceback
+
+from benderjab import rpc
+
+def runfolder_validate(fname):
+ """
+ Return True if fname looks like a runfolder name
+ """
+ if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
+ return True
+ else:
+ return False
+
+class rsync(object):
+ def __init__(self, source, dest, pwfile):
+ self.pwfile = os.path.expanduser(pwfile)
+ self.cmd = ['/usr/bin/rsync', ]
+ self.cmd.append('--password-file=%s' % (self.pwfile))
+ self.source_base = source
+ self.dest_base = dest
+ self.processes = {}
+ self.exit_code = None
+
+ def list(self):
+ """Get a directory listing"""
+ args = copy.copy(self.cmd)
+ args.append(self.source_base)
+
+ logging.debug("Rsync cmd:" + " ".join(args))
+ short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+ return self.list_filter(short_process.stdout)
+
+ def list_filter(self, lines):
+ """
+ parse rsync directory listing
+ """
+ dirs_to_copy = []
+ direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
+ for permissions, size, filedate, filetime, filename in direntries:
+ if permissions[0] == 'd':
+ # hey its a directory, the first step to being something we want to
+ # copy
+ if re.match("[0-9]{6}", filename):
+ # it starts with something that looks like a 6 digit date
+ # aka good enough for me
+ dirs_to_copy.append(filename)
+ return dirs_to_copy
+
+ def create_copy_process(self, dirname):
+ args = copy.copy(self.cmd)
+ # we want to copy everything
+ args.append('-rlt')
+ # from here
+ args.append(os.path.join(self.source_base, dirname))
+ # to here
+ args.append(self.dest_base)
+ logging.debug("Rsync cmd:" + " ".join(args))
+ return subprocess.Popen(args)
+
+ def copy(self):
+ """
+ copy any interesting looking directories over
+ return list of items that we started copying.
+ """
+ # clean up any lingering non-running processes
+ self.poll()
+
+ # what's available to copy?
+ dirs_to_copy = self.list()
+
+ # lets start copying
+ started = []
+ for d in dirs_to_copy:
+ process = self.processes.get(d, None)
+
+ if process is None:
+ # we don't have a process, so make one
+ logging.info("rsyncing %s" % (d))
+ self.processes[d] = self.create_copy_process(d)
+ started.append(d)
+ return started
+
+ def poll(self):
+ """
+ check currently running processes to see if they're done
+
+ return path roots that have finished.
+ """
+ for dir_key, proc_value in self.processes.items():
+ retcode = proc_value.poll()
+ if retcode is None:
+ # process hasn't finished yet
+ pass
+ elif retcode == 0:
+ logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
+ del self.processes[dir_key]
+ else:
+ logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
+
+ def __len__(self):
+ """
+ Return how many active rsync processes we currently have
+
+ Call poll first to close finished processes.
+ """
+ return len(self.processes)
+
+ def keys(self):
+ """
+ Return list of current run folder names
+ """
+ return self.processes.keys()
+
+class CopierBot(rpc.XmlRpcBot):
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # configfile = '~/.htsworkflow'
+
+ super(CopierBot, self).__init__(section, configfile)
+
+ # options for rsync command
+ self.cfg['rsync_password_file'] = None
+ self.cfg['rsync_source'] = None
+ self.cfg['rsync_destination'] = None
+
+ # options for reporting we're done
+ self.cfg['notify_users'] = None
+ self.cfg['notify_runner'] = None
+
+ self.pending = []
+ self.rsync = None
+ self.notify_users = None
+ self.notify_runner = None
+
+ self.register_function(self.startCopy)
+ self.register_function(self.sequencingFinished)
+ self.eventTasks.append(self.update)
+
+ def read_config(self, section=None, configfile=None):
+ """
+ read the config file
+ """
+ super(CopierBot, self).read_config(section, configfile)
+
+ password = self._check_required_option('rsync_password_file')
+ source = self._check_required_option('rsync_source')
+ destination = self._check_required_option('rsync_destination')
+ self.rsync = rsync(source, destination, password)
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ try:
+ self.notify_runner = \
+ self._parse_user_list(self.cfg['notify_runner'],
+ require_resource=True)
+ except bot.JIDMissingResource:
+ msg = 'need a full jabber ID + resource for xml-rpc destinations'
+ logging.FATAL(msg)
+ raise bot.JIDMissingResource(msg)
+
+ def startCopy(self, *args):
+ """
+ start our copy
+ """
+ logging.info("starting copy scan")
+ started = self.rsync.copy()
+ logging.info("copying:" + " ".join(started)+".")
+ return started
+
+ def sequencingFinished(self, runDir, *args):
+ """
+ The run was finished, if we're done copying, pass the message on
+ """
+ # close any open processes
+ self.rsync.poll()
+
+ # see if we're still copying
+ if runfolder_validate(runDir):
+ logging.info("recevied sequencing finshed for %s" % (runDir))
+ self.pending.append(runDir)
+ self.startCopy()
+ return "PENDING"
+ else:
+ errmsg = "received bad runfolder name (%s)" % (runDir)
+ logging.warning(errmsg)
+ # maybe I should use a different error message
+ raise RuntimeError(errmsg)
+
+ def reportSequencingFinished(self, runDir):
+ """
+ Send the sequencingFinished message to the interested parties
+ """
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Sequencing run %s finished' % (runDir))
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, (runDir,), 'sequencingFinished')
+ logging.info("forwarding sequencingFinshed message for %s" % (runDir))
+
+ def update(self, *args):
+ """
+ Update our current status.
+ Report if we've finished copying files.
+ """
+ self.rsync.poll()
+ for p in self.pending:
+ if p not in self.rsync.keys():
+ self.reportSequencingFinished(p)
+ self.pending.remove(p)
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can [copy], or report current [status]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("copy", msg):
+ started = self.startCopy()
+ reply = u"started copying " + ", ".join(started)
+ elif re.match(u"status", msg):
+ msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
+ for d in self.rsync.keys():
+ msg.append(u" " + d)
+ reply = os.linesep.join(msg)
+ else:
+ reply = u"I didn't understand '%s'" % (unicode(msg))
+ return reply
+
+def main(args=None):
+ bot = CopierBot()
+ bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+from glob import glob
+import logging
+import os
+import re
+import sys
+import time
+import threading
+
+from benderjab import rpc
+
+from htsworkflow.pipeline.configure_run import *
+
+#s_fc = re.compile('FC[0-9]+')
+s_fc = re.compile('_[0-9a-zA-Z]*$')
+
+
+def _get_flowcell_from_rundir(run_dir):
+ """
+ Returns flowcell string based on run_dir.
+ Returns None and logs error if flowcell can't be found.
+ """
+ junk, dirname = os.path.split(run_dir)
+ mo = s_fc.search(dirname)
+ if not mo:
+ logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
+ return None
+
+ return dirname[mo.start()+1:]
+
+
+
+class Runner(rpc.XmlRpcBot):
+ """
+ Manage running pipeline jobs.
+ """
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # self.configfile = "~/.htsworkflow"
+ super(Runner, self).__init__(section, configfile)
+
+ self.cfg['notify_users'] = None
+ self.cfg['genome_dir'] = None
+ self.cfg['base_analysis_dir'] = None
+
+ self.cfg['notify_users'] = None
+ self.cfg['notify_postanalysis'] = None
+
+ self.conf_info_dict = {}
+
+ self.register_function(self.sequencingFinished)
+ #self.eventTasks.append(self.update)
+
+
+ def read_config(self, section=None, configfile=None):
+ super(Runner, self).read_config(section, configfile)
+
+ self.genome_dir = self._check_required_option('genome_dir')
+ self.base_analysis_dir = self._check_required_option('base_analysis_dir')
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ #FIXME: process notify_postpipeline cfg
+
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can send [start] a run, or report [status]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("status", msg):
+ words = msg.split()
+ if len(words) == 2:
+ reply = self.getStatusReport(words[1])
+ else:
+ reply = u"Status available for: %s" \
+ % (', '.join([k for k in self.conf_info_dict.keys()]))
+ elif re.match(u"start", msg):
+ words = msg.split()
+ if len(words) == 2:
+ self.sequencingFinished(words[1])
+ reply = u"starting run for %s" % (words[1])
+ else:
+ reply = u"need runfolder name"
+ else:
+ reply = u"I didn't understand '%s'" %(msg)
+
+ logging.debug("reply: " + str(reply))
+ return reply
+
+
+ def getStatusReport(self, fc_num):
+ """
+ Returns text status report for flow cell number
+ """
+ if fc_num not in self.conf_info_dict:
+ return "No record of a %s run." % (fc_num)
+
+ status = self.conf_info_dict[fc_num].status
+
+ if status is None:
+ return "No status information for %s yet." \
+ " Probably still in configure step. Try again later." % (fc_num)
+
+ output = status.statusReport()
+
+ return '\n'.join(output)
+
+
+ def sequencingFinished(self, run_dir):
+ """
+ Sequenceing (and copying) is finished, time to start pipeline
+ """
+ logging.debug("received sequencing finished message")
+
+ # Setup config info object
+ ci = ConfigInfo()
+ ci.base_analysis_dir = self.base_analysis_dir
+ ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)
+
+ # get flowcell from run_dir name
+ flowcell = _get_flowcell_from_rundir(run_dir)
+
+ # Store ci object in dictionary
+ self.conf_info_dict[flowcell] = ci
+
+
+ # Launch the job in it's own thread and turn.
+ self.launchJob(run_dir, flowcell, ci)
+ return "started"
+
+
+ def pipelineFinished(self, run_dir):
+ # need to strip off self.watch_dir from rundir I suspect.
+ logging.info("pipeline finished in" + str(run_dir))
+ #pattern = self.watch_dir
+ #if pattern[-1] != os.path.sep:
+ # pattern += os.path.sep
+ #stripped_run_dir = re.sub(pattern, "", run_dir)
+ #logging.debug("stripped to " + stripped_run_dir)
+
+ # Notify each user that the run has finished.
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Pipeline run %s finished' % (run_dir))
+
+ #if self.notify_runner is not None:
+ # for r in self.notify_runner:
+ # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+
+ def reportMsg(self, msg):
+
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, msg)
+
+
+ def _runner(self, run_dir, flowcell, conf_info):
+
+ # retrieve config step
+ cfg_filepath = os.path.join(conf_info.analysis_dir,
+ 'config32auto.txt')
+ status_retrieve_cfg = retrieve_config(conf_info,
+ flowcell,
+ cfg_filepath,
+ self.genome_dir)
+ if status_retrieve_cfg:
+ logging.info("Runner: Retrieve config: success")
+ self.reportMsg("Retrieve config (%s): success" % (run_dir))
+ else:
+ logging.error("Runner: Retrieve config: failed")
+ self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
+
+
+ # configure step
+ if status_retrieve_cfg:
+ status = configure(conf_info)
+ if status:
+ logging.info("Runner: Configure: success")
+ self.reportMsg("Configure (%s): success" % (run_dir))
+ self.reportMsg(
+ os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
+ )
+ else:
+ logging.error("Runner: Configure: failed")
+ self.reportMsg("Configure (%s): FAILED" % (run_dir))
+
+ #if successful, continue
+ if status:
+ # Setup status cmdline status monitor
+ #startCmdLineStatusMonitor(ci)
+
+ # running step
+ print 'Running pipeline now!'
+ run_status = run_pipeline(conf_info)
+ if run_status is True:
+ logging.info('Runner: Pipeline: success')
+ self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
+ else:
+ logging.info('Runner: Pipeline: failed')
+ self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
+
+
+ def launchJob(self, run_dir, flowcell, conf_info):
+ """
+ Starts up a thread for running the pipeline
+ """
+ t = threading.Thread(target=self._runner,
+ args=[run_dir, flowcell, conf_info])
+ t.setDaemon(True)
+ t.start()
+
+
+
+def main(args=None):
+ bot = Runner()
+ return bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+import logging
+import os
+import re
+import sys
+import time
+#import glob
+
+from htsworkflow.util import mount
+
+# this uses pyinotify
+import pyinotify
+from pyinotify import EventsCodes
+
+from benderjab import rpc
+
+
+class WatcherEvents(object):
+ # two events need to be tracked
+ # one to send startCopy
+ # one to send OMG its broken
+ # OMG its broken needs to stop when we've seen enough
+ # cycles
+ # this should be per runfolder.
+ # read the xml files
+ def __init__(self):
+ pass
+
+
+class Handler(pyinotify.ProcessEvent):
+ def __init__(self, watchmanager, bot):
+ self.last_event_time = None
+ self.watchmanager = watchmanager
+ self.bot = bot
+
+ def process_IN_CREATE(self, event):
+ self.last_event_time = time.time()
+ msg = "Create: %s" % os.path.join(event.path, event.name)
+ if event.name.lower() == "run.completed":
+ try:
+ self.bot.sequencingFinished(event.path)
+ except IOError, e:
+ logging.error("Couldn't send sequencingFinished")
+ logging.debug(msg)
+
+ def process_IN_DELETE(self, event):
+ logging.debug("Remove: %s" % os.path.join(event.path, event.name))
+
+ def process_IN_UNMOUNT(self, event):
+ pathname = os.path.join(event.path, event.name)
+ logging.debug("IN_UNMOUNT: %s" % (pathname,))
+ self.bot.unmount_watch()
+
+class SpoolWatcher(rpc.XmlRpcBot):
+ """
+ Watch a directory and send a message when another process is done writing.
+
+ This monitors a directory tree using inotify (linux specific) and
+ after some files having been written will send a message after <timeout>
+ seconds of no file writing.
+
+ (Basically when the solexa machine finishes dumping a round of data
+ this'll hopefully send out a message saying hey look theres data available
+
+ """
+ # these params need to be in the config file
+ # I wonder where I should put the documentation
+ #:Parameters:
+ # `watchdir` - which directory tree to monitor for modifications
+ # `profile` - specify which .htsworkflow profile to use
+ # `write_timeout` - how many seconds to wait for writes to finish to
+ # the spool
+ # `notify_timeout` - how often to timeout from notify
+
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # self.configfile = "~/.htsworkflow"
+ super(SpoolWatcher, self).__init__(section, configfile)
+
+ self.cfg['watchdir'] = None
+ self.cfg['write_timeout'] = 10
+ self.cfg['notify_users'] = None
+ self.cfg['notify_runner'] = None
+
+ self.notify_timeout = 0.001
+ self.wm = pyinotify.WatchManager()
+ self.handler = Handler(self.wm, self)
+ self.notifier = pyinotify.Notifier(self.wm, self.handler)
+ self.wdd = None
+ self.mount_point = None
+ self.mounted = True
+
+ self.notify_users = None
+ self.notify_runner = None
+
+ self.eventTasks.append(self.process_notify)
+
+ def read_config(self, section=None, configfile=None):
+ super(SpoolWatcher, self).read_config(section, configfile)
+
+ self.watch_dir = self._check_required_option('watchdir')
+ self.write_timeout = int(self.cfg['write_timeout'])
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ try:
+ self.notify_runner = \
+ self._parse_user_list(self.cfg['notify_runner'],
+ require_resource=True)
+ except bot.JIDMissingResource:
+ msg = 'need a full jabber ID + resource for xml-rpc destinations'
+ logging.FATAL(msg)
+ raise bot.JIDMissingResource(msg)
+
+ def add_watch(self, watchdir=None):
+ """
+ start watching watchdir or self.watch_dir
+ we're currently limited to watching one directory tree.
+ """
+ # the one tree limit is mostly because self.wdd is a single item
+ # but managing it as a list might be a bit more annoying
+ if watchdir is None:
+ watchdir = self.watch_dir
+ logging.info("Watching:"+str(watchdir))
+
+ self.mount_point = mount.find_mount_point_for(watchdir)
+
+ mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
+ # rec traverses the tree and adds all the directories that are there
+ # at the start.
+ # auto_add will add in new directories as they are created
+ self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
+
+ def unmount_watch(self):
+ if self.wdd is not None:
+ self.wm.rm_watch(self.wdd.values())
+ self.wdd = None
+ self.mounted = False
+
+ def process_notify(self, *args):
+ # process the queue of events as explained above
+ self.notifier.process_events()
+ #check events waits timeout
+ if self.notifier.check_events(self.notify_timeout):
+ # read notified events and enqeue them
+ self.notifier.read_events()
+ # should we do something?
+ # has something happened?
+ last_event_time = self.handler.last_event_time
+ if last_event_time is not None:
+ time_delta = time.time() - last_event_time
+ if time_delta > self.write_timeout:
+ self.startCopy()
+ self.handler.last_event_time = None
+ # handle unmounted filesystems
+ if not self.mounted:
+ if mount.is_mounted(self.mount_point):
+ # we've been remounted. Huzzah!
+ # restart the watch
+ self.add_watch()
+ self.mounted = True
+ logging.info(
+ "%s was remounted, restarting watch" % \
+ (self.mount_point)
+ )
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can send [copy] message, or squencer [finished]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("copy", msg):
+ self.startCopy()
+ reply = u"sent copy message"
+ elif re.match(u"finished", msg):
+ words = msg.split()
+ if len(words) == 2:
+ self.sequencingFinished(words[1])
+ reply = u"sending sequencing finished for %s" % (words[1])
+ else:
+ reply = u"need runfolder name"
+ else:
+ reply = u"I didn't understand '%s'" %(msg)
+ return reply
+
+ def start(self, daemonize):
+ """
+ Start application
+ """
+ self.add_watch()
+ super(SpoolWatcher, self).start(daemonize)
+
+ def stop(self):
+ """
+ shutdown application
+ """
+ # destroy the inotify's instance on this interrupt (stop monitoring)
+ self.notifier.stop()
+ super(SpoolWatcher, self).stop()
+
+ def startCopy(self):
+ logging.debug("writes seem to have stopped")
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, tuple(), 'startCopy')
+
+ def sequencingFinished(self, run_dir):
+ # need to strip off self.watch_dir from rundir I suspect.
+ logging.info("run.completed in " + str(run_dir))
+ pattern = self.watch_dir
+ if pattern[-1] != os.path.sep:
+ pattern += os.path.sep
+ stripped_run_dir = re.sub(pattern, "", run_dir)
+ logging.debug("stripped to " + stripped_run_dir)
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+
+def main(args=None):
+ bot = SpoolWatcher()
+ return bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+from django import newforms as forms
+from django.newforms.util import ErrorList
+
+
+SPECIES_LIST = [#('--choose--', '--Choose--'),
+ ('hg18', 'Homo sapiens (Hg18)'),
+ ('Mm8', 'Mus musculus (Mm8)'),
+ ('arabv6', 'Arabadopsis Thaliana v6'),
+ ('other', 'Other species (Include in description)')]
+
+
+class DivErrorList(ErrorList):
+ def __unicode__(self):
+ return self.as_divs()
+
+ def as_divs(self):
+ if not self: return u''
+ return u'<div class="errorlist">%s</div>' % (''.join([u'<div class="error">%s</div>' % e for e in self]))
+
+
+
+class ConfigForm(forms.Form):
+
+ flow_cell_number = forms.CharField(min_length=2)
+ run_date = forms.DateTimeField()
+ advanced_run = forms.BooleanField(required=False)
+ read_length = forms.IntegerField(min_value=1, initial=32)
+ #eland_repeat = forms.BooleanField()
+
+ #needs a for loop or something to allow for n configurations
+ #analysis_type = forms.ChoiceField(choices=[('eland','eland')])
+ lane1_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane1_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane2_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane2_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane3_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane3_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane4_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane4_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane5_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane5_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane6_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane6_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane7_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane7_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ lane8_species = forms.ChoiceField(choices=SPECIES_LIST)
+ lane8_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'}))
+
+ notes = forms.CharField(widget=forms.Textarea(attrs={'cols':'70'}), required=False)
+
+ #lane_specific_read_length = forms.IntegerField(min_value=1)
+
+ #eland_genome_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
+ # ('lane2','2'),
+ # ('lane3','3'),
+ # ('lane4','4'),
+ # ('lane5','5'),
+ # ('lane6','6'),
+ # ('lane7','7'),
+ # ('lane8','8') ])
+
+ #eland_genome = forms.ChoiceField(choices=)
+
+ #use_bases_lanes = forms.MultipleChoiceField(choices=[('lane1','1'),
+ # ('lane2','2'),
+ # ('lane3','3'),
+ # ('lane4','4'),
+ # ('lane5','5'),
+ # ('lane6','6'),
+ # ('lane7','7'),
+ # ('lane8','8') ])
+
+ #use_bases_mask = forms.CharField()
+
+ #sequence_format = forms.ChoiceField(choices=[('scarf', 'scarf')])
+
+
+
+ #subject = forms.CharField(max_length=100)
+ #message = forms.CharField()
+ #sender = forms.EmailField()
+ #cc_myself = forms.BooleanField()
+
+ def as_custom(self):
+ """
+ Displays customized html output
+ """
+ html = []
+
+ fcn = self['flow_cell_number']
+
+ html.append(fcn.label_tag() + ': ' + str(fcn) + str(fcn.errors) + '<br />')
+
+ run_date = self['run_date']
+ html.append(run_date.label_tag() + ': ' + str(run_date) + str(run_date.errors) + '<br />')
+
+ arun = self['advanced_run']
+ html.append(arun.label_tag() + ': ' + str(arun) + str(arun.errors) + '<br />')
+
+ rl = self['read_length']
+ html.append(rl.label_tag() + ': ' + str(rl) + str(rl.errors) + '<br /><br />')
+
+ html.append('<table border="0">')
+ html.append(' <tr><td>%s</td><td>%s</td><td>%s</td></tr>' \
+ % ('Lane', 'Species', 'Description'))
+
+ l1s = self['lane1_species']
+ l1d = self['lane1_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('1', str(l1s), str(l1s.errors), str(l1d), str(l1d.errors)))
+
+ l2s = self['lane2_species']
+ l2d = self['lane2_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('2', str(l2s), str(l2s.errors), str(l2d), str(l2d.errors)))
+
+ l3s = self['lane3_species']
+ l3d = self['lane3_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('3', str(l3s), str(l3s.errors), str(l3d), str(l3d.errors)))
+
+ l4s = self['lane4_species']
+ l4d = self['lane4_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('4', str(l4s), str(l4s.errors), str(l4d), str(l4d.errors)))
+
+ l5s = self['lane5_species']
+ l5d = self['lane5_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('5', str(l5s), str(l5s.errors), str(l5d), str(l5d.errors)))
+
+ l6s = self['lane6_species']
+ l6d = self['lane6_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('6', str(l6s), str(l6s.errors), str(l6d), str(l6d.errors)))
+
+ l7s = self['lane7_species']
+ l7d = self['lane7_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('7', str(l7s), str(l7s.errors), str(l7d), str(l7d.errors)))
+
+ l8s = self['lane8_species']
+ l8d = self['lane8_description']
+ html.append(' <tr><td>%s</td><td>%s %s</td><td>%s %s</td></tr>' \
+ % ('8', str(l8s), str(l8s.errors), str(l8d), str(l8d.errors)))
+
+ html.append('</table><br />')
+
+ notes = self['notes']
+ html.append('<p>Notes:</p>')
+ html.append(' %s<br />' % (str(notes)))
+
+ return '\n'.join(html)
+
+
+
\ No newline at end of file
--- /dev/null
+from django.db import models
+
+# Create your models here.
--- /dev/null
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns('',
+ # Example:
+
+ (r'^(?P<flowcell>\w+)/$', 'htsworkflow.frontend.eland_config.views.config'),
+ (r'^$', 'htsworkflow.frontend.eland_config.views.config'),
+ #(r'^$', 'htsworkflow.frontend.eland_config.views.index')
+
+)
--- /dev/null
+from django.http import HttpResponse
+from django.shortcuts import render_to_response
+from django.core.exceptions import ObjectDoesNotExist
+
+from htsworkflow.frontend.eland_config import forms
+from htsworkflow.frontend import settings
+from htsworkflow.frontend.fctracker import models
+
+import os
+import glob
+# Create your views here.
+
+
+def _validate_input(data):
+ #if data.find('..') == -1 or data.find('/') == -1 or data.find('\\') == -1:
+ return data.replace('..', '').replace('/', '_').replace('\\', '_')
+
+#def contact(request):
+# if request.method == 'POST':
+# form = ContactForm(request.POST)
+# if form.is_valid():
+# # Do form processing here...
+# return HttpResponseRedirect('/url/on_success/')
+# else:
+# form = ContactForm()
+# return
+
+
+
+#def _saveConfigFile(form):
+# """
+# Given a valid form, save eland config to file based on flowcell number.
+# """
+# assert form.is_valid()
+#
+# clean_data = form.cleaned_data
+# flowcell = clean_data['flow_cell_number'].replace('/','_').replace('..', '__')
+#
+# file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
+#
+# f = open(file_path, 'w')
+# cfg = generateElandConfig(form)
+# f.write(cfg)
+# f.close()
+#
+#
+#def _saveToDb(form):
+# """
+# Save info to the database.
+# """
+# clean_data = form.cleaned_data
+#
+# fc_id = clean_data['flow_cell_number']
+#
+# try:
+# fc = models.FlowCell.objects.get(flowcell_id=fc_id)
+# except models.FlowCell.DoesNotExist:
+# fc = models.FlowCell()
+#
+# fc.flowcell_id = fc_id
+# fc.run_date = clean_data['run_date']
+#
+# #LANE 1
+# fc.lane1_sample = clean_data['lane1_description']
+# species_name = clean_data['lane1_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane1_species = specie
+#
+# #LANE 2
+# fc.lane2_sample = clean_data['lane2_description']
+# species_name = clean_data['lane2_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane2_species = specie
+#
+# #LANE 3
+# fc.lane3_sample = clean_data['lane3_description']
+# species_name = clean_data['lane3_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane3_species = specie
+#
+# #LANE 4
+# fc.lane4_sample = clean_data['lane4_description']
+# species_name = clean_data['lane4_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane4_species = specie
+#
+# #LANE 5
+# fc.lane5_sample = clean_data['lane5_description']
+# species_name = clean_data['lane5_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane5_species = specie
+#
+# #LANE 6
+# fc.lane6_sample = clean_data['lane6_description']
+# species_name = clean_data['lane6_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane6_species = specie
+#
+# #LANE 7
+# fc.lane7_sample = clean_data['lane7_description']
+# species_name = clean_data['lane7_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane7_species = specie
+#
+# #LANE 8
+# fc.lane8_sample = clean_data['lane8_description']
+# species_name = clean_data['lane8_species']
+# try:
+# specie = models.Specie.objects.get(scientific_name=species_name)
+# except models.Specie.DoesNotExist:
+# specie = models.Specie(scientific_name=species_name)
+# specie.save()
+# fc.lane8_species = specie
+#
+# fc.notes = clean_data['notes']
+#
+# fc.save()
+#
+# return fc
+#
+#
+#def generateElandConfig(form):
+# data = []
+#
+# form = form.cleaned_data
+#
+# BASE_DIR = '/data-store01/compbio/genomes'
+#
+# data.append("# FLOWCELL: %s" % (form['flow_cell_number']))
+# data.append("#")
+#
+# notes = form['notes'].replace('\r\n', '\n').replace('\r', '\n')
+# notes = notes.replace('\n', '\n# ')
+# data.append("# NOTES:")
+# data.append("# %s\n#" % (notes))
+#
+# #Convert all newline conventions to unix style
+# l1d = form['lane1_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l2d = form['lane2_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l3d = form['lane3_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l4d = form['lane4_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l5d = form['lane5_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l6d = form['lane6_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l7d = form['lane7_description'].replace('\r\n', '\n').replace('\r', '\n')
+# l8d = form['lane8_description'].replace('\r\n', '\n').replace('\r', '\n')
+#
+# # Turn new lines into indented commented newlines
+# l1d = l1d.replace('\n', '\n# ')
+# l2d = l2d.replace('\n', '\n# ')
+# l3d = l3d.replace('\n', '\n# ')
+# l4d = l4d.replace('\n', '\n# ')
+# l5d = l5d.replace('\n', '\n# ')
+# l6d = l6d.replace('\n', '\n# ')
+# l7d = l7d.replace('\n', '\n# ')
+# l8d = l8d.replace('\n', '\n# ')
+#
+# data.append("# Lane1: %s" % (l1d))
+# data.append("# Lane2: %s" % (l2d))
+# data.append("# Lane3: %s" % (l3d))
+# data.append("# Lane4: %s" % (l4d))
+# data.append("# Lane5: %s" % (l5d))
+# data.append("# Lane6: %s" % (l6d))
+# data.append("# Lane7: %s" % (l7d))
+# data.append("# Lane8: %s" % (l8d))
+#
+# #data.append("GENOME_DIR %s" % (BASE_DIR))
+# #data.append("CONTAM_DIR %s" % (BASE_DIR))
+# read_length = form['read_length']
+# data.append("READ_LENGTH %d" % (read_length))
+# #data.append("ELAND_REPEAT")
+# data.append("ELAND_MULTIPLE_INSTANCES 8")
+#
+# #Construct genome dictionary to figure out what lanes to put
+# # in the config file.
+# genome_dict = {}
+# l1s = form['lane1_species']
+# genome_dict.setdefault(l1s, []).append('1')
+# l2s = form['lane2_species']
+# genome_dict.setdefault(l2s, []).append('2')
+# l3s = form['lane3_species']
+# genome_dict.setdefault(l3s, []).append('3')
+# l4s = form['lane4_species']
+# genome_dict.setdefault(l4s, []).append('4')
+# l5s = form['lane5_species']
+# genome_dict.setdefault(l5s, []).append('5')
+# l6s = form['lane6_species']
+# genome_dict.setdefault(l6s, []).append('6')
+# l7s = form['lane7_species']
+# genome_dict.setdefault(l7s, []).append('7')
+# l8s = form['lane8_species']
+# genome_dict.setdefault(l8s, []).append('8')
+#
+# genome_list = genome_dict.keys()
+# genome_list.sort()
+#
+# #Loop through and create entries for each species.
+# for genome in genome_list:
+# lanes = ''.join(genome_dict[genome])
+# data.append('%s:ANALYSIS eland' % (lanes))
+# data.append('%s:READ_LENGTH %s' % (lanes, read_length))
+# data.append('%s:ELAND_GENOME %s' % (lanes, os.path.join(BASE_DIR, genome)))
+# data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
+#
+# data.append('SEQUENCE_FORMAT --scarf')
+#
+# return '\n'.join(data)
+
+
+def getElandConfig(flowcell, regenerate=False):
+
+ file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell)
+
+ #If we are regenerating the config file, skip
+ # reading of existing file. If the file doesn't
+ # exist, try to generate it form the DB.
+ if not regenerate and os.path.isfile(file_path):
+ f = open(file_path, 'r')
+ data = f.read()
+ f.close()
+ return data
+
+ try:
+ fcObj = models.FlowCell.objects.get(flowcell_id__iexact=flowcell)
+ except ObjectDoesNotExist:
+ return None
+
+ data = []
+
+ #form = form.cleaned_data
+
+ BASE_DIR = '/data-store01/compbio/genomes'
+
+ data.append("# FLOWCELL: %s" % (fcObj.flowcell_id))
+ data.append("#")
+
+ notes = fcObj.notes.replace('\r\n', '\n').replace('\r', '\n')
+ notes = notes.replace('\n', '\n# ')
+ data.append("# NOTES:")
+ data.append("# %s\n#" % (notes))
+
+ #Convert all newline conventions to unix style
+ l1d = str(fcObj.lane_1_library.library_id) + '|' \
+ + fcObj.lane_1_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l2d = str(fcObj.lane_2_library.library_id) + '|' \
+ + fcObj.lane_2_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l3d = str(fcObj.lane_3_library.library_id) + '|' \
+ + fcObj.lane_3_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l4d = str(fcObj.lane_4_library.library_id) + '|' \
+ + fcObj.lane_4_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+
+ l5d = str(fcObj.lane_5_library.library_id) + '|' \
+ + fcObj.lane_5_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l6d = str(fcObj.lane_6_library.library_id) + '|' \
+ + fcObj.lane_6_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l7d = str(fcObj.lane_7_library.library_id) + '|' \
+ + fcObj.lane_7_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+ l8d = str(fcObj.lane_8_library.library_id) + '|' \
+ + fcObj.lane_8_library.library_name.replace('\r\n', '\n').replace('\r', '\n')
+
+ # Turn new lines into indented commented newlines
+ l1d = l1d.replace('\n', '\n# ')
+ l2d = l2d.replace('\n', '\n# ')
+ l3d = l3d.replace('\n', '\n# ')
+ l4d = l4d.replace('\n', '\n# ')
+ l5d = l5d.replace('\n', '\n# ')
+ l6d = l6d.replace('\n', '\n# ')
+ l7d = l7d.replace('\n', '\n# ')
+ l8d = l8d.replace('\n', '\n# ')
+
+ data.append("# Lane1: %s" % (l1d))
+ data.append("# Lane2: %s" % (l2d))
+ data.append("# Lane3: %s" % (l3d))
+ data.append("# Lane4: %s" % (l4d))
+ data.append("# Lane5: %s" % (l5d))
+ data.append("# Lane6: %s" % (l6d))
+ data.append("# Lane7: %s" % (l7d))
+ data.append("# Lane8: %s" % (l8d))
+
+ #data.append("GENOME_DIR %s" % (BASE_DIR))
+ #data.append("CONTAM_DIR %s" % (BASE_DIR))
+ read_length = fcObj.read_length
+ data.append("READ_LENGTH %d" % (read_length))
+ #data.append("ELAND_REPEAT")
+ data.append("ELAND_MULTIPLE_INSTANCES 8")
+
+ #Construct genome dictionary to figure out what lanes to put
+ # in the config file.
+ genome_dict = {}
+
+ #l1s = form['lane1_species']
+ l1s = fcObj.lane_1_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_1_library.library_species.use_genome_build
+ genome_dict.setdefault(l1s, []).append('1')
+ l2s = fcObj.lane_2_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_2_library.library_species.use_genome_build
+ genome_dict.setdefault(l2s, []).append('2')
+ l3s = fcObj.lane_3_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_3_library.library_species.use_genome_build
+ genome_dict.setdefault(l3s, []).append('3')
+ l4s = fcObj.lane_4_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_4_library.library_species.use_genome_build
+ genome_dict.setdefault(l4s, []).append('4')
+ l5s = fcObj.lane_5_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_5_library.library_species.use_genome_build
+ genome_dict.setdefault(l5s, []).append('5')
+ l6s = fcObj.lane_6_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_6_library.library_species.use_genome_build
+ genome_dict.setdefault(l6s, []).append('6')
+ l7s = fcObj.lane_7_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_7_library.library_species.use_genome_build
+ genome_dict.setdefault(l7s, []).append('7')
+ l8s = fcObj.lane_8_library.library_species.scientific_name #+ '|' + \
+ #fcObj.lane_8_library.library_species.use_genome_build
+ genome_dict.setdefault(l8s, []).append('8')
+
+ genome_list = genome_dict.keys()
+ genome_list.sort()
+
+ #Loop through and create entries for each species.
+ for genome in genome_list:
+ lanes = ''.join(genome_dict[genome])
+ data.append('%s:ANALYSIS eland' % (lanes))
+ data.append('%s:READ_LENGTH %s' % (lanes, read_length))
+ data.append('%s:ELAND_GENOME %s' % (lanes, '%%(%s)s' % (genome)))
+ data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length)))
+
+ data.append('SEQUENCE_FORMAT --scarf')
+
+ data = '\n'.join(data)
+
+ f = open(file_path, 'w')
+ f.write(data)
+ f.close()
+
+ return data
+
+
+
+def config(request, flowcell=None):
+ """
+ Returns eland config file for a given flowcell number,
+ or returns a list of available flowcell numbers.
+ """
+
+ # Provide INDEX of available Flowcell config files.
+ if flowcell is None:
+ #Find all FC* config files and report an index html file
+ #fc_list = [ os.path.split(file_path)[1] for file_path in glob.glob(os.path.join(settings.UPLOADTO_CONFIG_FILE, 'FC*')) ]
+ fc_list = [ fc.flowcell_id for fc in models.FlowCell.objects.all() ]
+
+ #Convert FC* list to html links
+ fc_html = [ '<a href="/eland_config/%s/">%s</a>' % (fc_name, fc_name) for fc_name in fc_list ]
+
+ return HttpResponse('<br />'.join(fc_html))
+
+ #FIXME: Should validate flowcell input before using.
+ flowcell = _validate_input(flowcell)
+ cfg = getElandConfig(flowcell, regenerate=True)
+
+ if not cfg:
+ return HttpResponse("Hmm, config file for %s does not seem to exist." % (flowcell))
+
+
+ return HttpResponse(cfg, mimetype="text/plain")
+
+
+
+
+#def index(request):
+# """
+# Return a form for filling out information about the flowcell
+# """
+# if request.method == 'POST':
+# form = forms.ConfigForm(request.POST, error_class=forms.DivErrorList)
+# if form.is_valid():
+# #cfg = generateElandConfig(form)
+# _saveConfigFile(form)
+# _saveToDb(form)
+# return HttpResponse("Eland Config Saved!", mimetype="text/plain")
+# else:
+# return render_to_response('config_form.html', {'form': form })
+#
+# else:
+# fm = forms.ConfigForm(error_class=forms.DivErrorList)
+# return render_to_response('config_form.html', {'form': fm })
--- /dev/null
+from django.db import models
+from django.contrib.auth.models import User
+from htsworkflow.frontend import settings
+
+# Create your models here.
+
+class Antibody(models.Model):
+ antigene = models.CharField(max_length=500, db_index=True)
+ catalog = models.CharField(max_length=50, unique=True, db_index=True)
+ antibodies = models.CharField(max_length=500, db_index=True)
+ source = models.CharField(max_length=500, blank=True, db_index=True)
+ biology = models.TextField(blank=True)
+ notes = models.TextField(blank=True)
+ def __str__(self):
+ return '%s - %s (%s)' % (self.antigene, self.antibodies, self.catalog)
+ class Meta:
+ verbose_name_plural = "antibodies"
+ ordering = ["antigene"]
+ class Admin:
+ list_display = ('antigene','antibodies','catalog','source','biology','notes')
+ list_filter = ('antibodies','source')
+ fields = (
+ (None, {
+ 'fields': (('antigene','antibodies'),('catalog','source'),('biology'),('notes'))
+ }),
+ )
+
+class Cellline(models.Model):
+ cellline_name = models.CharField(max_length=100, unique=True, db_index=True)
+ notes = models.TextField(blank=True)
+ def __str__(self):
+ return '%s' % (self.cellline_name)
+
+ class Meta:
+ ordering = ["cellline_name"]
+
+ class Admin:
+ fields = (
+ (None, {
+ 'fields': (('cellline_name'),('notes'),)
+ }),
+ )
+
+class Condition(models.Model):
+ condition_name = models.CharField(max_length=2000, unique=True, db_index=True)
+ notes = models.TextField(blank=True)
+ def __str__(self):
+ return '%s' % (self.condition_name)
+
+ class Meta:
+ ordering = ["condition_name"]
+
+ class Admin:
+ fields = (
+ (None, {
+ 'fields': (('condition_name'),('notes'),)
+ }),
+ )
+
+class Species(models.Model):
+
+ scientific_name = models.CharField(max_length=256, unique=False, db_index=True, core=True)
+ common_name = models.CharField(max_length=256, blank=True)
+ use_genome_build = models.CharField(max_length=100, blank=False, null=False)
+
+ def __str__(self):
+ return '%s (%s)|%s' % (self.scientific_name, self.common_name, self.use_genome_build)
+
+ class Meta:
+ verbose_name_plural = "species"
+ ordering = ["scientific_name"]
+
+ class Admin:
+ fields = (
+ (None, {
+ 'fields': (('scientific_name', 'common_name'), ('use_genome_build'))
+ }),
+ )
+
+class Lab(models.Model):
+
+ name = models.CharField(max_length=100, blank=False, unique=True)
+
+ def __str__(self):
+ return self.name
+
+ class Admin:
+ pass
+
+class UserProfile(models.Model):
+
+ # This allows you to use user.get_profile() to get this object
+ user = models.ForeignKey(User, unique=True)
+
+ lab = models.ForeignKey(Lab)
+ #email = models.CharField(max_length=50, blank=True, null=True)
+
+ def __str__(self):
+ return '%s (%s lab)' % (self.user, self.lab)
+
+ class Meta:
+ #verbose_name_plural = "people"
+ #ordering = ["lab"]
+ pass
+
+ class Admin:
+ #fields = (
+ # (None, {
+ # 'fields': (('email', 'lab'), ('email'))
+ # }),
+ #)
+ pass
+
+
+class Library(models.Model):
+
+ library_id = models.CharField(max_length=30, primary_key=True, db_index=True, core=True)
+ library_name = models.CharField(max_length=100, unique=True, core=True)
+ library_species = models.ForeignKey(Species, core=True)
+ cell_line = models.ForeignKey(Cellline,core=True)
+ condition = models.ForeignKey(Condition,core=True)
+ antibody = models.ForeignKey(Antibody,blank=True,null=True,core=True)
+
+ EXPERIMENT_TYPES = (
+ ('INPUT_RXLCh','INPUT_RXLCh'),
+ ('ChIP-seq', 'ChIP-seq'),
+ ('Sheared', 'Sheared'),
+ ('RNA-seq', 'RNA-seq'),
+ ('Methyl-seq', 'Methyl-seq'),
+ ('DIP-seq', 'DIP-seq'),
+ )
+ experiment_type = models.CharField(max_length=50, choices=EXPERIMENT_TYPES,
+ default='RNA-seq')
+
+ creation_date = models.DateField(blank=True, null=True)
+ made_for = models.ForeignKey(User)
+ made_by = models.CharField(max_length=50, blank=True, default="Lorian")
+
+ PROTOCOL_END_POINTS = (
+ ('?', 'Unknown'),
+ ('Sample', 'Raw sample'),
+ ('Progress', 'In progress'),
+ ('1A', 'Ligation, then gel'),
+ ('PCR', 'Ligation, then PCR'),
+ ('1Ab', 'Ligation, PCR, then gel'),
+ ('1Aa', 'Ligation, gel, then PCR'),
+ ('2A', 'Ligation, PCR, gel, PCR'),
+ ('Done', 'Completed'),
+ )
+ stopping_point = models.CharField(max_length=25, choices=PROTOCOL_END_POINTS, default='Done')
+ amplified_from_sample = models.ForeignKey('self', blank=True, null=True)
+
+ undiluted_concentration = models.DecimalField("Undiluted concentration (ng/ul)", max_digits=5, decimal_places=2, default=0, blank=True, null=True)
+ successful_pM = models.DecimalField(max_digits=5, decimal_places=2, blank=True, null=True)
+ ten_nM_dilution = models.BooleanField()
+ avg_lib_size = models.IntegerField(default=225, blank=True, null=True)
+ notes = models.TextField(blank=True)
+
+ def __str__(self):
+ return '#%s: %s' % (self.library_id, self.library_name)
+
+ class Meta:
+ verbose_name_plural = "libraries"
+ ordering = ["-library_id"]
+
+ class Admin:
+ date_hierarchy = "creation_date"
+ save_as = True
+ save_on_top = True
+ search_fields = ['library_name', 'library_id']
+ list_display = ('library_id', 'library_name', 'made_for', 'creation_date', 'stopping_point')
+ list_display_links = ('library_id', 'library_name')
+ list_filter = ('stopping_point', 'library_species', 'made_for', 'made_by', 'experiment_type')
+ fields = (
+ (None, {
+ 'fields': (('library_id', 'library_name'), ('library_species', 'experiment_type'),)
+ }),
+ ('Creation Information:', {
+ 'fields' : (('made_for', 'made_by', 'creation_date'), ('stopping_point', 'amplified_from_sample'), ('undiluted_concentration', 'library_size'), 'notes',)
+ }),
+ ('Run Information:', {
+ 'fields' : (('ten_nM_dilution','successful_pM'),)
+ }),
+ )
+
+class FlowCell(models.Model):
+
+ flowcell_id = models.CharField(max_length=20, unique=True, db_index=True, core=True)
+ run_date = models.DateTimeField(core=True)
+ advanced_run = models.BooleanField(default=False)
+ read_length = models.IntegerField(default=32)
+
+
+ FLOWCELL_STATUSES = (
+ ('No', 'Not run'),
+ ('F', 'Failed'),
+ ('Del', 'Data deleted'),
+ ('A', 'Data available'),
+ ('In', 'In progress'),
+ )
+ flowcell_status = models.CharField(max_length=10, choices=FLOWCELL_STATUSES)
+
+ lane_1_library = models.ForeignKey(Library, related_name="lane_1_library")
+ lane_2_library = models.ForeignKey(Library, related_name="lane_2_library")
+ lane_3_library = models.ForeignKey(Library, related_name="lane_3_library")
+ lane_4_library = models.ForeignKey(Library, related_name="lane_4_library")
+ lane_5_library = models.ForeignKey(Library, related_name="lane_5_library")
+ lane_6_library = models.ForeignKey(Library, related_name="lane_6_library")
+ lane_7_library = models.ForeignKey(Library, related_name="lane_7_library")
+ lane_8_library = models.ForeignKey(Library, related_name="lane_8_library")
+
+ lane_1_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_2_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_3_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_4_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_5_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_6_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_7_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+ lane_8_pM = models.DecimalField(max_digits=5, decimal_places=2, default=4)
+
+ lane_1_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_2_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_3_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_4_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_5_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_6_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_7_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+ lane_8_cluster_estimate = models.CharField(max_length=25, blank=True, null=True)
+
+ kit_1000148 = models.IntegerField(blank=True, null=True)
+ kit_1000147 = models.IntegerField(blank=True, null=True)
+ kit_1000183 = models.IntegerField(blank=True, null=True)
+ kit_1001625 = models.IntegerField(blank=True, null=True)
+
+ cluster_station_id = models.CharField(max_length=50, blank=True, null=True)
+ sequencer_id = models.CharField(max_length=50, blank=True, null=True)
+
+ notes = models.TextField(blank=True)
+
+ def __str__(self):
+ return '%s (%s)' % (self.flowcell_id, self.run_date)
+
+ class Meta:
+ ordering = ["-run_date"]
+
+ class Admin:
+ date_hierarchy = "run_date"
+ save_as = True
+ save_on_top = True
+ search_fields = ['flowcell_id', 'lane_1_library__library_id', 'lane_1_library__library_name', 'lane_2_library__library_id', 'lane_2_library__library_name', 'lane_3_library__library_id', 'lane_3_library__library_name', 'lane_4_library__library_id', 'lane_4_library__library_name', 'lane_5_library__library_id', 'lane_5_library__library_name', 'lane_6_library__library_id', 'lane_6_library__library_name', 'lane_7_library__library_id', 'lane_7_library__library_name', 'lane_8_library__library_id', 'lane_8_library__library_name']
+ list_display = ('run_date', 'flowcell_status', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
+ list_display_links = ('run_date', 'flowcell_id', 'lane_1_library', 'lane_2_library', 'lane_3_library', 'lane_4_library', 'lane_5_library', 'lane_6_library', 'lane_7_library', 'lane_8_library')
+ fields = (
+ (None, {
+ 'fields': ('run_date', ('flowcell_id', 'flowcell_status'), ('read_length', 'advanced_run'),)
+ }),
+ ('Lanes:', {
+ 'fields' : (('lane_1_library', 'lane_1_pM'), ('lane_2_library', 'lane_2_pM'), ('lane_3_library', 'lane_3_pM'), ('lane_4_library', 'lane_4_pM'), ('lane_5_library', 'lane_5_pM'), ('lane_6_library', 'lane_6_pM'), ('lane_7_library', 'lane_7_pM'), ('lane_8_library', 'lane_8_pM'),)
+ }),
+ (None, {
+ 'fields' : ('notes',)
+ }),
+ ('Kits & Machines:', {
+ 'classes': 'collapse',
+ 'fields' : (('kit_1000148', 'kit_1000147', 'kit_1000183', 'kit_1001625'), ('cluster_station_id', 'sequencer_id'),)
+ }),
+ ('Cluster Estimates:', {
+ 'classes': 'collapse',
+ 'fields' : (('lane_1_cluster_estimate', 'lane_2_cluster_estimate'), ('lane_3_cluster_estimate', 'lane_4_cluster_estimate'), ('lane_5_cluster_estimate', 'lane_6_cluster_estimate'), ('lane_7_cluster_estimate', 'lane_8_cluster_estimate',),)
+ }),
+ )
+
+# Did not finish implementing, removing to avoid further confusion.
+#class ElandResult(models.Model):
+#
+# class Admin: pass
+#
+# flow_cell = models.ForeignKey(FlowCell)
+# config_file = models.FileField(upload_to=settings.UPLOADTO_CONFIG_FILE)
+# eland_result_pack = models.FileField(upload_to=settings.UPLOADTO_ELAND_RESULT_PACKS)
+# bed_file_pack = models.FileField(upload_to=settings.UPLOADTO_BED_PACKS)
+#
+# notes = models.TextField(blank=True)
--- /dev/null
+# Create your views here.
\ No newline at end of file
--- /dev/null
+#!/usr/bin/env python
+from django.core.management import execute_manager
+try:
+ import settings # Assumed to be in the same directory.
+except ImportError:
+ import sys
+ sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
+ sys.exit(1)
+
+if __name__ == "__main__":
+ execute_manager(settings)
--- /dev/null
+import os
+
+# Django settings for elandifier project.
+
+DEBUG = True
+TEMPLATE_DEBUG = DEBUG
+
+ADMINS = (
+ # ('Your Name', 'your_email@domain.com'),
+)
+
+MANAGERS = ADMINS
+
+DATABASE_ENGINE = 'sqlite3' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'.
+DATABASE_NAME = os.path.abspath('../../fctracker.db') # Or path to database file if using sqlite3.
+DATABASE_USER = '' # Not used with sqlite3.
+DATABASE_PASSWORD = '' # Not used with sqlite3.
+DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3.
+DATABASE_PORT = '' # Set to empty string for default. Not used with sqlite3.
+
+# Local time zone for this installation. Choices can be found here:
+# http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE
+# although not all variations may be possible on all operating systems.
+# If running in a Windows environment this must be set to the same as your
+# system time zone.
+TIME_ZONE = 'America/Los_Angeles'
+
+# Language code for this installation. All choices can be found here:
+# http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
+# http://blogs.law.harvard.edu/tech/stories/storyReader$15
+LANGUAGE_CODE = 'en-us'
+
+SITE_ID = 1
+
+# If you set this to False, Django will make some optimizations so as not
+# to load the internationalization machinery.
+USE_I18N = True
+
+# Absolute path to the directory that holds media.
+# Example: "/home/media/media.lawrence.com/"
+MEDIA_ROOT = ''
+
+# URL that handles the media served from MEDIA_ROOT.
+# Example: "http://media.lawrence.com"
+MEDIA_URL = ''
+
+# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
+# trailing slash.
+# Examples: "http://foo.com/media/", "/media/".
+ADMIN_MEDIA_PREFIX = '/media/'
+
+# Make this unique, and don't share it with anybody.
+SECRET_KEY = '(ekv^=gf(j9f(x25@a7r+8)hqlz%&_1!tw^75l%^041#vi=@4n'
+
+# List of callables that know how to import templates from various sources.
+TEMPLATE_LOADERS = (
+ 'django.template.loaders.filesystem.load_template_source',
+ 'django.template.loaders.app_directories.load_template_source',
+# 'django.template.loaders.eggs.load_template_source',
+)
+
+MIDDLEWARE_CLASSES = (
+ 'django.middleware.common.CommonMiddleware',
+ 'django.contrib.sessions.middleware.SessionMiddleware',
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
+ 'django.middleware.doc.XViewMiddleware',
+)
+
+ROOT_URLCONF = 'htsworkflow.frontend.urls'
+
+TEMPLATE_DIRS = (
+ # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
+ # Always use forward slashes, even on Windows.
+ # Don't forget to use absolute paths, not relative paths.
+ os.path.abspath("../../templates"),
+)
+
+INSTALLED_APPS = (
+ 'django.contrib.admin',
+ 'django.contrib.auth',
+ 'django.contrib.contenttypes',
+ 'django.contrib.sessions',
+ 'django.contrib.sites',
+ 'htsworkflow.frontend.eland_config',
+ 'htsworkflow.frontend.fctracker',
+ 'django.contrib.databrowse',
+)
+
+# Project specific settings
+UPLOADTO_HOME = os.path.abspath('../../uploads')
+UPLOADTO_CONFIG_FILE = os.path.join(UPLOADTO_HOME, 'eland_config')
+UPLOADTO_ELAND_RESULT_PACKS = os.path.join(UPLOADTO_HOME, 'eland_results')
+UPLOADTO_BED_PACKS = os.path.join(UPLOADTO_HOME, 'bed_packs')
+
--- /dev/null
+from django.conf.urls.defaults import *
+
+# Databrowser:
+from django.contrib import databrowse
+from fctracker.models import Library, FlowCell
+databrowse.site.register(Library)
+databrowse.site.register(FlowCell)
+
+urlpatterns = patterns('',
+ # Base:
+ (r'^eland_config/', include('htsworkflow.frontend.eland_config.urls')),
+ # Admin:
+ (r'^admin/', include('django.contrib.admin.urls')),
+ # Databrowser:
+ (r'^databrowse/(.*)', databrowse.site.root),
+)
--- /dev/null
+
+from datetime import date
+from glob import glob
+import logging
+import os
+import time
+import re
+
+from htsworkflow.pipeline.runfolder import \
+ ElementTree, \
+ VERSION_RE, \
+ EUROPEAN_STRPTIME
+
+class Phasing(object):
+ PHASING = 'Phasing'
+ PREPHASING = 'Prephasing'
+
+ def __init__(self, fromfile=None, xml=None):
+ self.lane = None
+ self.phasing = None
+ self.prephasing = None
+
+ if fromfile is not None:
+ self._initialize_from_file(fromfile)
+ elif xml is not None:
+ self.set_elements(xml)
+
+ def _initialize_from_file(self, pathname):
+ path, name = os.path.split(pathname)
+ basename, ext = os.path.splitext(name)
+ # the last character of the param base filename should be the
+ # lane number
+ tree = ElementTree.parse(pathname).getroot()
+ self.set_elements(tree)
+ self.lane = int(basename[-1])
+
+ def get_elements(self):
+ root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
+ phasing = ElementTree.SubElement(root, Phasing.PHASING)
+ phasing.text = str(self.phasing)
+ prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
+ prephasing.text = str(self.prephasing)
+ return root
+
+ def set_elements(self, tree):
+ if tree.tag not in ('Phasing', 'Parameters'):
+ raise ValueError('exptected Phasing or Parameters')
+ lane = tree.attrib.get('lane', None)
+ if lane is not None:
+ self.lane = int(lane)
+ for element in list(tree):
+ if element.tag == Phasing.PHASING:
+ self.phasing = float(element.text)
+ elif element.tag == Phasing.PREPHASING:
+ self.prephasing = float(element.text)
+
+class Bustard(object):
+ XML_VERSION = 1
+
+ # Xml Tags
+ BUSTARD = 'Bustard'
+ SOFTWARE_VERSION = 'version'
+ DATE = 'run_time'
+ USER = 'user'
+ PARAMETERS = 'Parameters'
+
+ def __init__(self, xml=None):
+ self.version = None
+ self.date = date.today()
+ self.user = None
+ self.phasing = {}
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def _get_time(self):
+ return time.mktime(self.date.timetuple())
+ time = property(_get_time, doc='return run time as seconds since epoch')
+
+ def dump(self):
+ print "Bustard version:", self.version
+ print "Run date", self.date
+ print "user:", self.user
+ for lane, tree in self.phasing.items():
+ print lane
+ print tree
+
+ def get_elements(self):
+ root = ElementTree.Element('Bustard',
+ {'version': str(Bustard.XML_VERSION)})
+ version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
+ version.text = self.version
+ run_date = ElementTree.SubElement(root, Bustard.DATE)
+ run_date.text = str(self.time)
+ user = ElementTree.SubElement(root, Bustard.USER)
+ user.text = self.user
+ params = ElementTree.SubElement(root, Bustard.PARAMETERS)
+ for p in self.phasing.values():
+ params.append(p.get_elements())
+ return root
+
+ def set_elements(self, tree):
+ if tree.tag != Bustard.BUSTARD:
+ raise ValueError('Expected "Bustard" SubElements')
+ xml_version = int(tree.attrib.get('version', 0))
+ if xml_version > Bustard.XML_VERSION:
+ logging.warn('Bustard XML tree is a higher version than this class')
+ for element in list(tree):
+ if element.tag == Bustard.SOFTWARE_VERSION:
+ self.version = element.text
+ elif element.tag == Bustard.DATE:
+ self.date = date.fromtimestamp(float(element.text))
+ elif element.tag == Bustard.USER:
+ self.user = element.text
+ elif element.tag == Bustard.PARAMETERS:
+ for param in element:
+ p = Phasing(xml=param)
+ self.phasing[p.lane] = p
+ else:
+ raise ValueError("Unrecognized tag: %s" % (element.tag,))
+
+
+
+def bustard(pathname):
+ """
+ Construct a Bustard object from pathname
+ """
+ b = Bustard()
+ path, name = os.path.split(pathname)
+ groups = name.split("_")
+ version = re.search(VERSION_RE, groups[0])
+ b.version = version.group(1)
+ t = time.strptime(groups[1], EUROPEAN_STRPTIME)
+ b.date = date(*t[0:3])
+ b.user = groups[2]
+ paramfiles = glob(os.path.join(pathname, "params?.xml"))
+ for paramfile in paramfiles:
+ phasing = Phasing(paramfile)
+ assert (phasing.lane >= 1 and phasing.lane <= 8)
+ b.phasing[phasing.lane] = phasing
+ return b
+
+def fromxml(tree):
+ b = Bustard()
+ b.set_elements(tree)
+ return b
--- /dev/null
+#!/usr/bin/python
+import subprocess
+import logging
+import time
+import re
+import os
+
+from htsworkflow.pipeline.retrieve_config import getCombinedOptions, saveConfigFile
+from htsworkflow.pipeline.retrieve_config import FlowCellNotFound, WebError404
+from htsworkflow.pipeline.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict
+from htsworkflow.pipeline.run_status import GARunStatus
+
+from pyinotify import WatchManager, ThreadedNotifier
+from pyinotify import EventsCodes, ProcessEvent
+
+class ConfigInfo:
+
+ def __init__(self):
+ #run_path = firecrest analysis directory to run analysis from
+ self.run_path = None
+ self.bustard_path = None
+ self.config_filepath = None
+ self.status = None
+
+ #top level directory where all analyses are placed
+ self.base_analysis_dir = None
+ #analysis_dir, top level analysis dir...
+ # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150'
+ self.analysis_dir = None
+
+
+ def createStatusObject(self):
+ """
+ Creates a status object which can be queried for
+ status of running the pipeline
+
+ returns True if object created
+ returns False if object cannot be created
+ """
+ if self.config_filepath is None:
+ return False
+
+ self.status = GARunStatus(self.config_filepath)
+ return True
+
+
+
+####################################
+# inotify event processor
+
+s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt')
+s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt')
+s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt')
+
+s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/')
+s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/')
+s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/')
+
+class RunEvent(ProcessEvent):
+
+ def __init__(self, conf_info):
+
+ self.run_status_dict = {'firecrest': False,
+ 'bustard': False,
+ 'gerald': False}
+
+ self._ci = conf_info
+
+ ProcessEvent.__init__(self)
+
+
+ def process_IN_CREATE(self, event):
+ fullpath = os.path.join(event.path, event.name)
+ if s_finished.search(fullpath):
+ logging.info("File Found: %s" % (fullpath))
+
+ if s_firecrest_finished.search(fullpath):
+ self.run_status_dict['firecrest'] = True
+ self._ci.status.updateFirecrest(event.name)
+ elif s_bustard_finished.search(fullpath):
+ self.run_status_dict['bustard'] = True
+ self._ci.status.updateBustard(event.name)
+ elif s_gerald_finished.search(fullpath):
+ self.run_status_dict['gerald'] = True
+ self._ci.status.updateGerald(event.name)
+
+ #WARNING: The following order is important!!
+ # Firecrest regex will catch all gerald, bustard, and firecrest
+ # Bustard regex will catch all gerald and bustard
+ # Gerald regex will catch all gerald
+ # So, order needs to be Gerald, Bustard, Firecrest, or this
+ # won't work properly.
+ elif s_gerald_all.search(fullpath):
+ self._ci.status.updateGerald(event.name)
+ elif s_bustard_all.search(fullpath):
+ self._ci.status.updateBustard(event.name)
+ elif s_firecrest_all.search(fullpath):
+ self._ci.status.updateFirecrest(event.name)
+
+ #print "Create: %s" % (os.path.join(event.path, event.name))
+
+ def process_IN_DELETE(self, event):
+ #print "Remove %s" % (os.path.join(event.path, event.name))
+ pass
+
+
+
+
+#FLAGS
+# Config Step Error
+RUN_ABORT = 'abort'
+# Run Step Error
+RUN_FAILED = 'failed'
+
+
+#####################################
+# Configure Step (goat_pipeline.py)
+#Info
+s_start = re.compile('Starting Genome Analyzer Pipeline')
+s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
+s_generating = re.compile('^Generating journals, Makefiles')
+s_seq_folder = re.compile('^Sequence folder: ')
+s_seq_folder_sub = re.compile('want to make ')
+s_stderr_taskcomplete = re.compile('^Task complete, exiting')
+
+#Errors
+s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
+s_species_dir_err = re.compile('Error: Lane [1-8]:')
+s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
+s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+')
+
+SUPPRESS_MISSING_CYCLES = False
+
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#CF_STDERR_IGNORE_LIST = []
+s_skip = re.compile('s_[0-8]_[0-9]+')
+
+
+##########################################
+# Pipeline Run Step (make -j8 recursive)
+
+##Info
+s_finished = re.compile('finished')
+
+##Errors
+s_make_error = re.compile('^make[\S\s]+Error')
+s_no_gnuplot = re.compile('gnuplot: command not found')
+s_no_convert = re.compile('^Can\'t exec "convert"')
+s_no_ghostscript = re.compile('gs: command not found')
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#
+PL_STDERR_IGNORE_LIST = []
+# Info: PF 11802
+PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
+# About to analyse intensity file s_4_0101_sig2.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
+# Will send output to standard output
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
+# Found 31877 clusters
+PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
+# Will use quality criterion ((CHASTITY>=0.6)
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
+# Quality criterion translated to (($F[5]>=0.6))
+PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
+# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
+# AND
+# opened s_4_0103_qhg.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
+# 81129 sequences out of 157651 passed filter criteria
+PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
+
+
+def pl_stderr_ignore(line):
+ """
+ Searches lines for lines to ignore (i.e. not to log)
+
+ returns True if line should be ignored
+ returns False if line should NOT be ignored
+ """
+ for s in PL_STDERR_IGNORE_LIST:
+ if s.search(line):
+ return True
+ return False
+
+
+def config_stdout_handler(line, conf_info):
+ """
+ Processes each line of output from GOAT
+ and stores useful information using the logging module
+
+ Loads useful information into conf_info as well, for future
+ use outside the function.
+
+ returns True if found condition that signifies success.
+ """
+
+ # Skip irrelevant line (without logging)
+ if s_skip.search(line):
+ pass
+
+ # Detect invalid command-line arguments
+ elif s_invalid_cmdline.search(line):
+ logging.error("Invalid commandline options!")
+
+ # Detect starting of configuration
+ elif s_start.search(line):
+ logging.info('START: Configuring pipeline')
+
+ # Detect it made it past invalid arguments
+ elif s_gerald.search(line):
+ logging.info('Running make now')
+
+ # Detect that make files have been generated (based on output)
+ elif s_generating.search(line):
+ logging.info('Make files generted')
+ return True
+
+ # Capture run directory
+ elif s_seq_folder.search(line):
+ mo = s_seq_folder_sub.search(line)
+ #Output changed when using --tiles=<tiles>
+ # at least in pipeline v0.3.0b2
+ if mo:
+ firecrest_bustard_gerald_makefile = line[mo.end():]
+ firecrest_bustard_gerald, junk = \
+ os.path.split(firecrest_bustard_gerald_makefile)
+ firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
+ firecrest, junk = os.path.split(firecrest_bustard)
+
+ conf_info.bustard_path = firecrest_bustard
+ conf_info.run_path = firecrest
+
+ #Standard output handling
+ else:
+ print 'Sequence line:', line
+ mo = s_seq_folder.search(line)
+ conf_info.bustard_path = line[mo.end():]
+ conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
+
+ # Log all other output for debugging purposes
+ else:
+ logging.warning('CONF:?: %s' % (line))
+
+ return False
+
+
+
+def config_stderr_handler(line, conf_info):
+ """
+ Processes each line of output from GOAT
+ and stores useful information using the logging module
+
+ Loads useful information into conf_info as well, for future
+ use outside the function.
+
+ returns RUN_ABORT upon detecting failure;
+ True on success message;
+ False if neutral message
+ (i.e. doesn't signify failure or success)
+ """
+ global SUPPRESS_MISSING_CYCLES
+
+ # Detect invalid species directory error
+ if s_species_dir_err.search(line):
+ logging.error(line)
+ return RUN_ABORT
+ # Detect goat_pipeline.py traceback
+ elif s_goat_traceb.search(line):
+ logging.error("Goat config script died, traceback in debug output")
+ return RUN_ABORT
+ # Detect indication of successful configuration (from stderr; odd, but ok)
+ elif s_stderr_taskcomplete.search(line):
+ logging.info('Configure step successful (from: stderr)')
+ return True
+ # Detect missing cycles
+ elif s_missing_cycles.search(line):
+
+ # Only display error once
+ if not SUPPRESS_MISSING_CYCLES:
+ logging.error("Missing cycles detected; Not all cycles copied?")
+ logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line))
+ SUPPRESS_MISSING_CYCLES = True
+ return RUN_ABORT
+
+ # Log all other output as debug output
+ else:
+ logging.debug('CONF:STDERR:?: %s' % (line))
+
+ # Neutral (not failure; nor success)
+ return False
+
+
+#def pipeline_stdout_handler(line, conf_info):
+# """
+# Processes each line of output from running the pipeline
+# and stores useful information using the logging module
+#
+# Loads useful information into conf_info as well, for future
+# use outside the function.
+#
+# returns True if found condition that signifies success.
+# """
+#
+# #f.write(line + '\n')
+#
+# return True
+
+
+
+def pipeline_stderr_handler(line, conf_info):
+ """
+ Processes each line of stderr from pipelien run
+ and stores useful information using the logging module
+
+ ##FIXME: Future feature (doesn't actually do this yet)
+ #Loads useful information into conf_info as well, for future
+ #use outside the function.
+
+ returns RUN_FAILED upon detecting failure;
+ #True on success message; (no clear success state)
+ False if neutral message
+ (i.e. doesn't signify failure or success)
+ """
+
+ if pl_stderr_ignore(line):
+ pass
+ elif s_make_error.search(line):
+ logging.error("make error detected; run failed")
+ return RUN_FAILED
+ elif s_no_gnuplot.search(line):
+ logging.error("gnuplot not found")
+ return RUN_FAILED
+ elif s_no_convert.search(line):
+ logging.error("imagemagick's convert command not found")
+ return RUN_FAILED
+ elif s_no_ghostscript.search(line):
+ logging.error("ghostscript not found")
+ return RUN_FAILED
+ else:
+ logging.debug('PIPE:STDERR:?: %s' % (line))
+
+ return False
+
+
+def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir):
+ """
+ Gets the config file from server...
+ requires config file in:
+ /etc/ga_frontend/ga_frontend.conf
+ or
+ ~/.ga_frontend.conf
+
+ with:
+ [config_file_server]
+ base_host_url: http://host:port
+
+ return True if successful, False is failure
+ """
+ options = getCombinedOptions()
+
+ if options.url is None:
+ logging.error("~/.ga_frontend.conf or /etc/ga_frontend/ga_frontend.conf" \
+ " missing base_host_url option")
+ return False
+
+ try:
+ saveConfigFile(flowcell, options.url, cfg_filepath)
+ conf_info.config_filepath = cfg_filepath
+ except FlowCellNotFound, e:
+ logging.error(e)
+ return False
+ except WebError404, e:
+ logging.error(e)
+ return False
+ except IOError, e:
+ logging.error(e)
+ return False
+ except Exception, e:
+ logging.error(e)
+ return False
+
+ f = open(cfg_filepath, 'r')
+ data = f.read()
+ f.close()
+
+ genome_dict = getAvailableGenomes(genome_dir)
+ mapper_dict = constructMapperDict(genome_dict)
+
+ logging.debug(data)
+
+ f = open(cfg_filepath, 'w')
+ f.write(data % (mapper_dict))
+ f.close()
+
+ return True
+
+
+
+def configure(conf_info):
+ """
+ Attempts to configure the GA pipeline using goat.
+
+ Uses logging module to store information about status.
+
+ returns True if configuration successful, otherwise False.
+ """
+ #ERROR Test:
+ #pipe = subprocess.Popen(['goat_pipeline.py',
+ # '--GERALD=config32bk.txt',
+ # '--make .',],
+ # #'.'],
+ # stdout=subprocess.PIPE,
+ # stderr=subprocess.PIPE)
+
+ #ERROR Test (2), causes goat_pipeline.py traceback
+ #pipe = subprocess.Popen(['goat_pipeline.py',
+ # '--GERALD=%s' % (conf_info.config_filepath),
+ # '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
+ # '--make',
+ # '.'],
+ # stdout=subprocess.PIPE,
+ # stderr=subprocess.PIPE)
+
+ ##########################
+ # Run configuration step
+ # Not a test; actual configure attempt.
+ #pipe = subprocess.Popen(['goat_pipeline.py',
+ # '--GERALD=%s' % (conf_info.config_filepath),
+ # '--make',
+ # '.'],
+ # stdout=subprocess.PIPE,
+ # stderr=subprocess.PIPE)
+
+
+ stdout_filepath = os.path.join(conf_info.analysis_dir,
+ "pipeline_configure_stdout.txt")
+ stderr_filepath = os.path.join(conf_info.analysis_dir,
+ "pipeline_configure_stderr.txt")
+
+ fout = open(stdout_filepath, 'w')
+ ferr = open(stderr_filepath, 'w')
+
+ pipe = subprocess.Popen(['goat_pipeline.py',
+ '--GERALD=%s' % (conf_info.config_filepath),
+ #'--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
+ '--make',
+ conf_info.analysis_dir],
+ stdout=fout,
+ stderr=ferr)
+
+ print "Configuring pipeline: %s" % (time.ctime())
+ error_code = pipe.wait()
+
+ # Clean up
+ fout.close()
+ ferr.close()
+
+
+ ##################
+ # Process stdout
+ fout = open(stdout_filepath, 'r')
+
+ stdout_line = fout.readline()
+
+ complete = False
+ while stdout_line != '':
+ # Handle stdout
+ if config_stdout_handler(stdout_line, conf_info):
+ complete = True
+ stdout_line = fout.readline()
+
+ fout.close()
+
+
+ #error_code = pipe.wait()
+ if error_code:
+ logging.error('Recieved error_code: %s' % (error_code))
+ else:
+ logging.info('We are go for launch!')
+
+ #Process stderr
+ ferr = open(stderr_filepath, 'r')
+ stderr_line = ferr.readline()
+
+ abort = 'NO!'
+ stderr_success = False
+ while stderr_line != '':
+ stderr_status = config_stderr_handler(stderr_line, conf_info)
+ if stderr_status == RUN_ABORT:
+ abort = RUN_ABORT
+ elif stderr_status is True:
+ stderr_success = True
+ stderr_line = ferr.readline()
+
+ ferr.close()
+
+
+ #Success requirements:
+ # 1) The stdout completed without error
+ # 2) The program exited with status 0
+ # 3) No errors found in stdout
+ print '#Expect: True, False, True, True'
+ print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
+ status = complete is True and \
+ bool(error_code) is False and \
+ abort != RUN_ABORT and \
+ stderr_success is True
+
+ # If everything was successful, but for some reason
+ # we didn't retrieve the path info, log it.
+ if status is True:
+ if conf_info.bustard_path is None or conf_info.run_path is None:
+ logging.error("Failed to retrieve run_path")
+ return False
+
+ return status
+
+
+def run_pipeline(conf_info):
+ """
+ Run the pipeline and monitor status.
+ """
+ # Fail if the run_path doesn't actually exist
+ if not os.path.exists(conf_info.run_path):
+ logging.error('Run path does not exist: %s' \
+ % (conf_info.run_path))
+ return False
+
+ # Change cwd to run_path
+ stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt')
+ stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt')
+
+ # Create status object
+ conf_info.createStatusObject()
+
+ # Monitor file creation
+ wm = WatchManager()
+ mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE
+ event = RunEvent(conf_info)
+ notifier = ThreadedNotifier(wm, event)
+ notifier.start()
+ wdd = wm.add_watch(conf_info.run_path, mask, rec=True)
+
+ # Log pipeline starting
+ logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
+
+ # Start the pipeline (and hide!)
+ #pipe = subprocess.Popen(['make',
+ # '-j8',
+ # 'recursive'],
+ # stdout=subprocess.PIPE,
+ # stderr=subprocess.PIPE)
+
+ fout = open(stdout_filepath, 'w')
+ ferr = open(stderr_filepath, 'w')
+
+ pipe = subprocess.Popen(['make',
+ '--directory=%s' % (conf_info.run_path),
+ '-j8',
+ 'recursive'],
+ stdout=fout,
+ stderr=ferr)
+ #shell=True)
+ # Wait for run to finish
+ retcode = pipe.wait()
+
+
+ # Clean up
+ notifier.stop()
+ fout.close()
+ ferr.close()
+
+ # Process stderr
+ ferr = open(stderr_filepath, 'r')
+
+ run_failed_stderr = False
+ for line in ferr:
+ err_status = pipeline_stderr_handler(line, conf_info)
+ if err_status == RUN_FAILED:
+ run_failed_stderr = True
+
+ ferr.close()
+
+ # Finished file check!
+ print 'RUN SUCCESS CHECK:'
+ for key, value in event.run_status_dict.items():
+ print ' %s: %s' % (key, value)
+
+ dstatus = event.run_status_dict
+
+ # Success or failure check
+ status = (retcode == 0) and \
+ run_failed_stderr is False and \
+ dstatus['firecrest'] is True and \
+ dstatus['bustard'] is True and \
+ dstatus['gerald'] is True
+
+ return status
+
+
--- /dev/null
+"""
+Extract information about the Firecrest run
+
+Firecrest - class holding the properties we found
+firecrest - Firecrest factory function initalized from a directory name
+fromxml - Firecrest factory function initalized from an xml dump from
+ the Firecrest object.
+"""
+
+from datetime import date
+import os
+import re
+import time
+
+from htsworkflow.pipeline.runfolder import \
+ ElementTree, \
+ VERSION_RE, \
+ EUROPEAN_STRPTIME
+
+class Firecrest(object):
+ XML_VERSION=1
+
+ # xml tag names
+ FIRECREST = 'Firecrest'
+ SOFTWARE_VERSION = 'version'
+ START = 'FirstCycle'
+ STOP = 'LastCycle'
+ DATE = 'run_time'
+ USER = 'user'
+ MATRIX = 'matrix'
+
+ def __init__(self, xml=None):
+ self.start = None
+ self.stop = None
+ self.version = None
+ self.date = date.today()
+ self.user = None
+ self.matrix = None
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def _get_time(self):
+ return time.mktime(self.date.timetuple())
+ time = property(_get_time, doc='return run time as seconds since epoch')
+
+ def dump(self):
+ print "Starting cycle:", self.start
+ print "Ending cycle:", self.stop
+ print "Firecrest version:", self.version
+ print "Run date:", self.date
+ print "user:", self.user
+
+ def get_elements(self):
+ attribs = {'version': str(Firecrest.XML_VERSION) }
+ root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
+ version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
+ version.text = self.version
+ start_cycle = ElementTree.SubElement(root, Firecrest.START)
+ start_cycle.text = str(self.start)
+ stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
+ stop_cycle.text = str(self.stop)
+ run_date = ElementTree.SubElement(root, Firecrest.DATE)
+ run_date.text = str(self.time)
+ user = ElementTree.SubElement(root, Firecrest.USER)
+ user.text = self.user
+ matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
+ matrix.text = self.matrix
+ return root
+
+ def set_elements(self, tree):
+ if tree.tag != Firecrest.FIRECREST:
+ raise ValueError('Expected "Firecrest" SubElements')
+ xml_version = int(tree.attrib.get('version', 0))
+ if xml_version > Firecrest.XML_VERSION:
+ logging.warn('Firecrest XML tree is a higher version than this class')
+ for element in list(tree):
+ if element.tag == Firecrest.SOFTWARE_VERSION:
+ self.version = element.text
+ elif element.tag == Firecrest.START:
+ self.start = int(element.text)
+ elif element.tag == Firecrest.STOP:
+ self.stop = int(element.text)
+ elif element.tag == Firecrest.DATE:
+ self.date = date.fromtimestamp(float(element.text))
+ elif element.tag == Firecrest.USER:
+ self.user = element.text
+ elif element.tag == Firecrest.MATRIX:
+ self.matrix = element.text
+ else:
+ raise ValueError("Unrecognized tag: %s" % (element.tag,))
+
+def firecrest(pathname):
+ """
+ Examine the directory at pathname and initalize a Firecrest object
+ """
+ f = Firecrest()
+
+ # parse firecrest directory name
+ path, name = os.path.split(pathname)
+ groups = name.split('_')
+ # grab the start/stop cycle information
+ cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
+ f.start = int(cycle.group(1))
+ f.stop = int(cycle.group(2))
+ # firecrest version
+ version = re.search(VERSION_RE, groups[1])
+ f.version = (version.group(1))
+ # datetime
+ t = time.strptime(groups[2], EUROPEAN_STRPTIME)
+ f.date = date(*t[0:3])
+ # username
+ f.user = groups[3]
+
+ # should I parse this deeper than just stashing the
+ # contents of the matrix file?
+ matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+ f.matrix = open(matrix_pathname, 'r').read()
+ return f
+
+def fromxml(tree):
+ """
+ Initialize a Firecrest object from an element tree node
+ """
+ f = Firecrest()
+ f.set_elements(tree)
+ return f
--- /dev/null
+#!/usr/bin/python
+import glob
+import sys
+import os
+import re
+
+import logging
+
+from htsworkflow.util.alphanum import alphanum
+
+class DuplicateGenome(Exception): pass
+
+
+def _has_metainfo(genome_dir):
+ metapath = os.path.join(genome_dir, '_metainfo_')
+ if os.path.isfile(metapath):
+ return True
+ else:
+ return False
+
+def getAvailableGenomes(genome_base_dir):
+ """
+ raises IOError (on genome_base_dir not found)
+ raises DuplicateGenome on duplicate genomes found.
+
+ returns a double dictionary (i.e. d[species][build] = path)
+ """
+
+ # Need valid directory
+ if not os.path.exists(genome_base_dir):
+ msg = "Directory does not exist: %s" % (genome_base_dir)
+ raise IOError, msg
+
+ # Find all subdirectories
+ filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
+ potential_genome_dirs = \
+ [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
+
+ # Get list of metadata files
+ genome_dir_list = \
+ [ dirpath \
+ for dirpath in potential_genome_dirs \
+ if _has_metainfo(dirpath) ]
+
+ # Genome double dictionary
+ d = {}
+
+ for genome_dir in genome_dir_list:
+ line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
+
+ # Get species, build... log and skip on failure
+ try:
+ species, build = line.split('|')
+ except:
+ logging.warning('Skipping: Invalid metafile (%s) line: %s' \
+ % (metafile, line))
+ continue
+
+ build_dict = d.setdefault(species, {})
+ if build in build_dict:
+ msg = "Duplicate genome for %s|%s" % (species, build)
+ raise DuplicateGenome, msg
+
+ build_dict[build] = genome_dir
+
+ return d
+
+
+class constructMapperDict(object):
+ """
+ Emulate a dictionary to map genome|build names to paths.
+
+ It uses the dictionary generated by getAvailableGenomes.
+ """
+ def __init__(self, genome_dict):
+ self.genome_dict = genome_dict
+
+ def __getitem__(self, key):
+ """
+ Return the best match for key
+ """
+ elements = re.split("\|", key)
+
+ if len(elements) == 1:
+ # we just the species name
+ # get the set of builds
+ builds = self.genome_dict[elements[0]]
+
+ # sort build names the way humans would
+ keys = builds.keys()
+ keys.sort(cmp=alphanum)
+
+ # return the path from the 'last' build name
+ return builds[keys[-1]]
+
+ elif len(elements) == 2:
+ # we have species, and build name
+ return self.genome_dict[elements[0]][elements[1]]
+ else:
+ raise KeyError("Unrecognized key")
+
+ def keys(self):
+ keys = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ keys.append([species+'|'+build])
+ return keys
+
+ def values(self):
+ values = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ values.append(self.genome_dict[species][build])
+ return values
+
+ def items(self):
+ items = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ key = [species+'|'+build]
+ value = self.genome_dict[species][build]
+ items.append((key, value))
+ return items
+
+if __name__ == '__main__':
+
+ if len(sys.argv) != 2:
+ print 'useage: %s <base_genome_dir>' % (sys.argv[0])
+ sys.exit(1)
+
+ d = getAvailableGenomes(sys.argv[1])
+ d2 = constructMapperDict(d)
+
+ for k,v in d2.items():
+ print '%s: %s' % (k,v)
+
+
--- /dev/null
+"""
+Provide access to information stored in the GERALD directory.
+"""
+from datetime import datetime, date
+from glob import glob
+import logging
+import os
+import stat
+import time
+import types
+
+from htsworkflow.pipeline.runfolder import \
+ ElementTree, \
+ EUROPEAN_STRPTIME, \
+ LANES_PER_FLOWCELL, \
+ VERSION_RE
+from htsworkflow.util.ethelp import indent, flatten
+from htsworkflow.util.opener import autoopen
+
+class Gerald(object):
+ """
+ Capture meaning out of the GERALD directory
+ """
+ XML_VERSION = 1
+ GERALD='Gerald'
+ RUN_PARAMETERS='RunParameters'
+ SUMMARY='Summary'
+
+ class LaneParameters(object):
+ """
+ Make it easy to access elements of LaneSpecificRunParameters from python
+ """
+ def __init__(self, gerald, key):
+ self._gerald = gerald
+ self._key = key
+
+ def __get_attribute(self, xml_tag):
+ subtree = self._gerald.tree.find('LaneSpecificRunParameters')
+ container = subtree.find(xml_tag)
+ if container is None:
+ return None
+ if len(container.getchildren()) > LANES_PER_FLOWCELL:
+ raise RuntimeError('GERALD config.xml file changed')
+ lanes = [x.tag.split('_')[1] for x in container.getchildren()]
+ index = lanes.index(self._key)
+ element = container[index]
+ return element.text
+ def _get_analysis(self):
+ return self.__get_attribute('ANALYSIS')
+ analysis = property(_get_analysis)
+
+ def _get_eland_genome(self):
+ genome = self.__get_attribute('ELAND_GENOME')
+ # default to the chipwide parameters if there isn't an
+ # entry in the lane specific paramaters
+ if genome is None:
+ subtree = self._gerald.tree.find('ChipWideRunParameters')
+ container = subtree.find('ELAND_GENOME')
+ genome = container.text
+ return genome
+ eland_genome = property(_get_eland_genome)
+
+ def _get_read_length(self):
+ return self.__get_attribute('READ_LENGTH')
+ read_length = property(_get_read_length)
+
+ def _get_use_bases(self):
+ return self.__get_attribute('USE_BASES')
+ use_bases = property(_get_use_bases)
+
+ class LaneSpecificRunParameters(object):
+ """
+ Provide access to LaneSpecificRunParameters
+ """
+ def __init__(self, gerald):
+ self._gerald = gerald
+ self._keys = None
+ def __getitem__(self, key):
+ return Gerald.LaneParameters(self._gerald, key)
+ def keys(self):
+ if self._keys is None:
+ tree = self._gerald.tree
+ analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
+ # according to the pipeline specs I think their fields
+ # are sampleName_laneID, with sampleName defaulting to s
+ # since laneIDs are constant lets just try using
+ # those consistently.
+ self._keys = [ x.tag.split('_')[1] for x in analysis]
+ return self._keys
+ def values(self):
+ return [ self[x] for x in self.keys() ]
+ def items(self):
+ return zip(self.keys(), self.values())
+ def __len__(self):
+ return len(self.keys())
+
+ def __init__(self, xml=None):
+ self.pathname = None
+ self.tree = None
+
+ # parse lane parameters out of the config.xml file
+ self.lanes = Gerald.LaneSpecificRunParameters(self)
+
+ self.summary = None
+ self.eland_results = None
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def _get_date(self):
+ if self.tree is None:
+ return datetime.today()
+ timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
+ epochstamp = time.mktime(time.strptime(timestamp, '%c'))
+ return datetime.fromtimestamp(epochstamp)
+ date = property(_get_date)
+
+ def _get_time(self):
+ return time.mktime(self.date.timetuple())
+ time = property(_get_time, doc='return run time as seconds since epoch')
+
+ def _get_version(self):
+ if self.tree is None:
+ return None
+ return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
+ version = property(_get_version)
+
+ def dump(self):
+ """
+ Debugging function, report current object
+ """
+ print 'Gerald version:', self.version
+ print 'Gerald run date:', self.date
+ print 'Gerald config.xml:', self.tree
+ self.summary.dump()
+
+ def get_elements(self):
+ if self.tree is None or self.summary is None:
+ return None
+
+ gerald = ElementTree.Element(Gerald.GERALD,
+ {'version': unicode(Gerald.XML_VERSION)})
+ gerald.append(self.tree)
+ gerald.append(self.summary.get_elements())
+ if self.eland_results:
+ gerald.append(self.eland_results.get_elements())
+ return gerald
+
+ def set_elements(self, tree):
+ if tree.tag != Gerald.GERALD:
+ raise ValueError('exptected GERALD')
+ xml_version = int(tree.attrib.get('version', 0))
+ if xml_version > Gerald.XML_VERSION:
+ logging.warn('XML tree is a higher version than this class')
+ for element in list(tree):
+ tag = element.tag.lower()
+ if tag == Gerald.RUN_PARAMETERS.lower():
+ self.tree = element
+ elif tag == Gerald.SUMMARY.lower():
+ self.summary = Summary(xml=element)
+ elif tag == ELAND.ELAND.lower():
+ self.eland_results = ELAND(xml=element)
+ else:
+ logging.warn("Unrecognized tag %s" % (element.tag,))
+
+
+def gerald(pathname):
+ g = Gerald()
+ g.pathname = pathname
+ path, name = os.path.split(pathname)
+ config_pathname = os.path.join(pathname, 'config.xml')
+ g.tree = ElementTree.parse(config_pathname).getroot()
+
+ # parse Summary.htm file
+ summary_pathname = os.path.join(pathname, 'Summary.htm')
+ g.summary = Summary(summary_pathname)
+ # parse eland files
+ g.eland_results = eland(g.pathname, g)
+ return g
+
+def tonumber(v):
+ """
+ Convert a value to int if its an int otherwise a float.
+ """
+ try:
+ v = int(v)
+ except ValueError, e:
+ v = float(v)
+ return v
+
+def parse_mean_range(value):
+ """
+ Parse values like 123 +/- 4.5
+ """
+ if value.strip() == 'unknown':
+ return 0, 0
+
+ average, pm, deviation = value.split()
+ if pm != '+/-':
+ raise RuntimeError("Summary.htm file format changed")
+ return tonumber(average), tonumber(deviation)
+
+def make_mean_range_element(parent, name, mean, deviation):
+ """
+ Make an ElementTree subelement <Name mean='mean', deviation='deviation'/>
+ """
+ element = ElementTree.SubElement(parent, name,
+ { 'mean': unicode(mean),
+ 'deviation': unicode(deviation)})
+ return element
+
+def parse_mean_range_element(element):
+ """
+ Grab mean/deviation out of element
+ """
+ return (tonumber(element.attrib['mean']),
+ tonumber(element.attrib['deviation']))
+
+def parse_summary_element(element):
+ """
+ Determine if we have a simple element or a mean/deviation element
+ """
+ if len(element.attrib) > 0:
+ return parse_mean_range_element(element)
+ else:
+ return element.text
+
+class Summary(object):
+ """
+ Extract some useful information from the Summary.htm file
+ """
+ XML_VERSION = 2
+ SUMMARY = 'Summary'
+
+ class LaneResultSummary(object):
+ """
+ Parse the LaneResultSummary table out of Summary.htm
+ Mostly for the cluster number
+ """
+ LANE_RESULT_SUMMARY = 'LaneResultSummary'
+ TAGS = {
+ 'LaneYield': 'lane_yield',
+ 'Cluster': 'cluster', # Raw
+ 'ClusterPF': 'cluster_pass_filter',
+ 'AverageFirstCycleIntensity': 'average_first_cycle_intensity',
+ 'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles',
+ 'PercentPassFilterClusters': 'percent_pass_filter_clusters',
+ 'PercentPassFilterAlign': 'percent_pass_filter_align',
+ 'AverageAlignmentScore': 'average_alignment_score',
+ 'PercentErrorRate': 'percent_error_rate'
+ }
+
+ def __init__(self, html=None, xml=None):
+ self.lane = None
+ self.lane_yield = None
+ self.cluster = None
+ self.cluster_pass_filter = None
+ self.average_first_cycle_intensity = None
+ self.percent_intensity_after_20_cycles = None
+ self.percent_pass_filter_clusters = None
+ self.percent_pass_filter_align = None
+ self.average_alignment_score = None
+ self.percent_error_rate = None
+
+ if html is not None:
+ self.set_elements_from_html(html)
+ if xml is not None:
+ self.set_elements(xml)
+
+ def set_elements_from_html(self, data):
+ if not len(data) in (8,10):
+ raise RuntimeError("Summary.htm file format changed")
+
+ # same in pre-0.3.0 Summary file and 0.3 summary file
+ self.lane = data[0]
+
+ if len(data) == 8:
+ parsed_data = [ parse_mean_range(x) for x in data[1:] ]
+ # this is the < 0.3 Pipeline version
+ self.cluster = parsed_data[0]
+ self.average_first_cycle_intensity = parsed_data[1]
+ self.percent_intensity_after_20_cycles = parsed_data[2]
+ self.percent_pass_filter_clusters = parsed_data[3]
+ self.percent_pass_filter_align = parsed_data[4]
+ self.average_alignment_score = parsed_data[5]
+ self.percent_error_rate = parsed_data[6]
+ elif len(data) == 10:
+ parsed_data = [ parse_mean_range(x) for x in data[2:] ]
+ # this is the >= 0.3 summary file
+ self.lane_yield = data[1]
+ self.cluster = parsed_data[0]
+ self.cluster_pass_filter = parsed_data[1]
+ self.average_first_cycle_intensity = parsed_data[2]
+ self.percent_intensity_after_20_cycles = parsed_data[3]
+ self.percent_pass_filter_clusters = parsed_data[4]
+ self.percent_pass_filter_align = parsed_data[5]
+ self.average_alignment_score = parsed_data[6]
+ self.percent_error_rate = parsed_data[7]
+
+ def get_elements(self):
+ lane_result = ElementTree.Element(
+ Summary.LaneResultSummary.LANE_RESULT_SUMMARY,
+ {'lane': self.lane})
+ for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
+ value = getattr(self, variable_name)
+ if value is None:
+ continue
+ # it looks like a sequence
+ elif type(value) in (types.TupleType, types.ListType):
+ element = make_mean_range_element(
+ lane_result,
+ tag,
+ *value
+ )
+ else:
+ element = ElementTree.SubElement(lane_result, tag)
+ element.text = value
+ return lane_result
+
+ def set_elements(self, tree):
+ if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
+ raise ValueError('Expected %s' % (
+ Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
+ self.lane = tree.attrib['lane']
+ tags = Summary.LaneResultSummary.TAGS
+ for element in list(tree):
+ try:
+ variable_name = tags[element.tag]
+ setattr(self, variable_name,
+ parse_summary_element(element))
+ except KeyError, e:
+ logging.warn('Unrecognized tag %s' % (element.tag,))
+
+ def __init__(self, filename=None, xml=None):
+ self.lane_results = {}
+
+ if filename is not None:
+ self._extract_lane_results(filename)
+ if xml is not None:
+ self.set_elements(xml)
+
+ def __getitem__(self, key):
+ return self.lane_results[key]
+
+ def __len__(self):
+ return len(self.lane_results)
+
+ def keys(self):
+ return self.lane_results.keys()
+
+ def values(self):
+ return self.lane_results.values()
+
+ def items(self):
+ return self.lane_results.items()
+
+ def _flattened_row(self, row):
+ """
+ flatten the children of a <tr>...</tr>
+ """
+ return [flatten(x) for x in row.getchildren() ]
+
+ def _parse_table(self, table):
+ """
+ assumes the first line is the header of a table,
+ and that the remaining rows are data
+ """
+ rows = table.getchildren()
+ data = []
+ for r in rows:
+ data.append(self._flattened_row(r))
+ return data
+
+ def _extract_named_tables(self, pathname):
+ """
+ extract all the 'named' tables from a Summary.htm file
+ and return as a dictionary
+
+ Named tables are <h2>...</h2><table>...</table> pairs
+ The contents of the h2 tag is considered to the name
+ of the table.
+ """
+ tree = ElementTree.parse(pathname).getroot()
+ body = tree.find('body')
+ tables = {}
+ for i in range(len(body)):
+ if body[i].tag == 'h2' and body[i+1].tag == 'table':
+ # we have an interesting table
+ name = flatten(body[i])
+ table = body[i+1]
+ data = self._parse_table(table)
+ tables[name] = data
+ return tables
+
+ def _extract_lane_results(self, pathname):
+ """
+ extract the Lane Results Summary table
+ """
+
+ tables = self._extract_named_tables(pathname)
+
+ # parse lane result summary
+ lane_summary = tables['Lane Results Summary']
+ # this is version 1 of the summary file
+ if len(lane_summary[-1]) == 8:
+ # strip header
+ headers = lane_summary[0]
+ # grab the lane by lane data
+ lane_summary = lane_summary[1:]
+
+ # this is version 2 of the summary file
+ if len(lane_summary[-1]) == 10:
+ # lane_summary[0] is a different less specific header row
+ headers = lane_summary[1]
+ lane_summary = lane_summary[2:10]
+ # after the last lane, there's a set of chip wide averages
+
+ for r in lane_summary:
+ lrs = Summary.LaneResultSummary(html=r)
+ self.lane_results[lrs.lane] = lrs
+
+ def get_elements(self):
+ summary = ElementTree.Element(Summary.SUMMARY,
+ {'version': unicode(Summary.XML_VERSION)})
+ for lane in self.lane_results.values():
+ summary.append(lane.get_elements())
+ return summary
+
+ def set_elements(self, tree):
+ if tree.tag != Summary.SUMMARY:
+ return ValueError("Expected %s" % (Summary.SUMMARY,))
+ xml_version = int(tree.attrib.get('version', 0))
+ if xml_version > Summary.XML_VERSION:
+ logging.warn('Summary XML tree is a higher version than this class')
+ for element in list(tree):
+ lrs = Summary.LaneResultSummary()
+ lrs.set_elements(element)
+ self.lane_results[lrs.lane] = lrs
+
+ def dump(self):
+ """
+ Debugging function, report current object
+ """
+ pass
+
+
+def build_genome_fasta_map(genome_dir):
+ # build fasta to fasta file map
+ genome = genome_dir.split(os.path.sep)[-1]
+ fasta_map = {}
+ for vld_file in glob(os.path.join(genome_dir, '*.vld')):
+ is_link = False
+ if os.path.islink(vld_file):
+ is_link = True
+ vld_file = os.path.realpath(vld_file)
+ path, vld_name = os.path.split(vld_file)
+ name, ext = os.path.splitext(vld_name)
+ if is_link:
+ fasta_map[name] = name
+ else:
+ fasta_map[name] = os.path.join(genome, name)
+ return fasta_map
+
+class ElandLane(object):
+ """
+ Process an eland result file
+ """
+ XML_VERSION = 1
+ LANE = 'ElandLane'
+ SAMPLE_NAME = 'SampleName'
+ LANE_ID = 'LaneID'
+ GENOME_MAP = 'GenomeMap'
+ GENOME_ITEM = 'GenomeItem'
+ MAPPED_READS = 'MappedReads'
+ MAPPED_ITEM = 'MappedItem'
+ MATCH_CODES = 'MatchCodes'
+ MATCH_ITEM = 'Code'
+ READS = 'Reads'
+
+ def __init__(self, pathname=None, genome_map=None, xml=None):
+ self.pathname = pathname
+ self._sample_name = None
+ self._lane_id = None
+ self._reads = None
+ self._mapped_reads = None
+ self._match_codes = None
+ if genome_map is None:
+ genome_map = {}
+ self.genome_map = genome_map
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def _update(self):
+ """
+ Actually read the file and actually count the reads
+ """
+ # can't do anything if we don't have a file to process
+ if self.pathname is None:
+ return
+
+ if os.stat(self.pathname)[stat.ST_SIZE] == 0:
+ raise RuntimeError("Eland isn't done, try again later.")
+
+ reads = 0
+ mapped_reads = {}
+
+ match_codes = {'NM':0, 'QC':0, 'RM':0,
+ 'U0':0, 'U1':0, 'U2':0,
+ 'R0':0, 'R1':0, 'R2':0,
+ }
+ for line in autoopen(self.pathname,'r'):
+ reads += 1
+ fields = line.split()
+ # code = fields[2]
+ # match_codes[code] = match_codes.setdefault(code, 0) + 1
+ # the QC/NM etc codes are in the 3rd field and always present
+ match_codes[fields[2]] += 1
+ # ignore lines that don't have a fasta filename
+ if len(fields) < 7:
+ continue
+ fasta = self.genome_map.get(fields[6], fields[6])
+ mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
+ self._match_codes = match_codes
+ self._mapped_reads = mapped_reads
+ self._reads = reads
+
+ def _update_name(self):
+ # extract the sample name
+ if self.pathname is None:
+ return
+
+ path, name = os.path.split(self.pathname)
+ split_name = name.split('_')
+ self._sample_name = split_name[0]
+ self._lane_id = split_name[1]
+
+ def _get_sample_name(self):
+ if self._sample_name is None:
+ self._update_name()
+ return self._sample_name
+ sample_name = property(_get_sample_name)
+
+ def _get_lane_id(self):
+ if self._lane_id is None:
+ self._update_name()
+ return self._lane_id
+ lane_id = property(_get_lane_id)
+
+ def _get_reads(self):
+ if self._reads is None:
+ self._update()
+ return self._reads
+ reads = property(_get_reads)
+
+ def _get_mapped_reads(self):
+ if self._mapped_reads is None:
+ self._update()
+ return self._mapped_reads
+ mapped_reads = property(_get_mapped_reads)
+
+ def _get_match_codes(self):
+ if self._match_codes is None:
+ self._update()
+ return self._match_codes
+ match_codes = property(_get_match_codes)
+
+ def get_elements(self):
+ lane = ElementTree.Element(ElandLane.LANE,
+ {'version':
+ unicode(ElandLane.XML_VERSION)})
+ sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
+ sample_tag.text = self.sample_name
+ lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
+ lane_tag.text = self.lane_id
+ genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
+ for k, v in self.genome_map.items():
+ item = ElementTree.SubElement(
+ genome_map, ElandLane.GENOME_ITEM,
+ {'name':k, 'value':unicode(v)})
+ mapped_reads = ElementTree.SubElement(lane, ElandLane.MAPPED_READS)
+ for k, v in self.mapped_reads.items():
+ item = ElementTree.SubElement(
+ mapped_reads, ElandLane.MAPPED_ITEM,
+ {'name':k, 'value':unicode(v)})
+ match_codes = ElementTree.SubElement(lane, ElandLane.MATCH_CODES)
+ for k, v in self.match_codes.items():
+ item = ElementTree.SubElement(
+ match_codes, ElandLane.MATCH_ITEM,
+ {'name':k, 'value':unicode(v)})
+ reads = ElementTree.SubElement(lane, ElandLane.READS)
+ reads.text = unicode(self.reads)
+
+ return lane
+
+ def set_elements(self, tree):
+ if tree.tag != ElandLane.LANE:
+ raise ValueError('Exptecting %s' % (ElandLane.LANE,))
+
+ # reset dictionaries
+ self._mapped_reads = {}
+ self._match_codes = {}
+
+ for element in tree:
+ tag = element.tag.lower()
+ if tag == ElandLane.SAMPLE_NAME.lower():
+ self._sample_name = element.text
+ elif tag == ElandLane.LANE_ID.lower():
+ self._lane_id = element.text
+ elif tag == ElandLane.GENOME_MAP.lower():
+ for child in element:
+ name = child.attrib['name']
+ value = child.attrib['value']
+ self.genome_map[name] = value
+ elif tag == ElandLane.MAPPED_READS.lower():
+ for child in element:
+ name = child.attrib['name']
+ value = child.attrib['value']
+ self._mapped_reads[name] = int(value)
+ elif tag == ElandLane.MATCH_CODES.lower():
+ for child in element:
+ name = child.attrib['name']
+ value = int(child.attrib['value'])
+ self._match_codes[name] = value
+ elif tag == ElandLane.READS.lower():
+ self._reads = int(element.text)
+ else:
+ logging.warn("ElandLane unrecognized tag %s" % (element.tag,))
+
+def extract_eland_sequence(instream, outstream, start, end):
+ """
+ Extract a chunk of sequence out of an eland file
+ """
+ for line in instream:
+ record = line.split()
+ if len(record) > 1:
+ result = [record[0], record[1][start:end]]
+ else:
+ result = [record[0][start:end]]
+ outstream.write("\t".join(result))
+ outstream.write(os.linesep)
+
+class ELAND(object):
+ """
+ Summarize information from eland files
+ """
+ XML_VERSION = 1
+
+ ELAND = 'ElandCollection'
+ LANE = 'Lane'
+ LANE_ID = 'id'
+
+ def __init__(self, xml=None):
+ # we need information from the gerald config.xml
+ self.results = {}
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def __len__(self):
+ return len(self.results)
+
+ def keys(self):
+ return self.results.keys()
+
+ def values(self):
+ return self.results.values()
+
+ def items(self):
+ return self.results.items()
+
+ def __getitem__(self, key):
+ return self.results[key]
+
+ def get_elements(self):
+ root = ElementTree.Element(ELAND.ELAND,
+ {'version': unicode(ELAND.XML_VERSION)})
+ for lane_id, lane in self.results.items():
+ eland_lane = lane.get_elements()
+ eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
+ root.append(eland_lane)
+ return root
+
+ def set_elements(self, tree):
+ if tree.tag.lower() != ELAND.ELAND.lower():
+ raise ValueError('Expecting %s', ELAND.ELAND)
+ for element in list(tree):
+ lane_id = element.attrib[ELAND.LANE_ID]
+ lane = ElandLane(xml=element)
+ self.results[lane_id] = lane
+
+def eland(basedir, gerald=None, genome_maps=None):
+ e = ELAND()
+
+ file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
+ if len(file_list) == 0:
+ # lets handle compressed eland files too
+ file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
+
+ for pathname in file_list:
+ # yes the lane_id is also being computed in ElandLane._update
+ # I didn't want to clutter up my constructor
+ # but I needed to persist the sample_name/lane_id for
+ # runfolder summary_report
+ path, name = os.path.split(pathname)
+ split_name = name.split('_')
+ lane_id = split_name[1]
+
+ if genome_maps is not None:
+ genome_map = genome_maps[lane_id]
+ elif gerald is not None:
+ genome_dir = gerald.lanes[lane_id].eland_genome
+ genome_map = build_genome_fasta_map(genome_dir)
+ else:
+ genome_map = {}
+
+ eland_result = ElandLane(pathname, genome_map)
+ e.results[lane_id] = eland_result
+ return e
--- /dev/null
+from xml import sax
+
+
+def get_cycles(recipe_xml_filepath):
+ """
+ returns the number of cycles found in Recipe*.xml
+ """
+ handler = CycleXmlHandler()
+ sax.parse(recipe_xml_filepath, handler)
+ return handler.cycle_count
+
+
+
+class CycleXmlHandler(sax.ContentHandler):
+
+ def __init__(self):
+ self.cycle_count = 0
+ self.in_protocol = False
+ sax.ContentHandler.__init__(self)
+
+
+ def startDocument(self):
+ self.cycle_count = 0
+ self.in_protocol = False
+
+
+ def startElement(self, name, attrs):
+
+ #Only count Incorporations as cycles if within
+ # the protocol section of the xml document.
+ if name == "Incorporation" and self.in_protocol:
+ #print 'Found a cycle!'
+ self.cycle_count += 1
+ return
+
+ elif name == 'Protocol':
+ #print 'In protocol'
+ self.in_protocol = True
+ return
+
+ #print 'Skipping: %s' % (name)
+
+
+ def endElement(self, name):
+
+ if name == 'Protocol':
+ #print 'End protocol'
+ self.in_protocol = False
--- /dev/null
+#!/usr/bin/env python
+
+from optparse import OptionParser, IndentedHelpFormatter
+from ConfigParser import SafeConfigParser
+
+import logging
+import os
+import sys
+import urllib2
+
+CONFIG_SYSTEM = '/etc/ga_frontend/ga_frontend.conf'
+CONFIG_USER = os.path.expanduser('~/.ga_frontend.conf')
+
+#Disable or enable commandline arg parsing; disabled by default.
+DISABLE_CMDLINE = True
+
+class FlowCellNotFound(Exception): pass
+class WebError404(Exception): pass
+
+class DummyOptions:
+ """
+ Used when command line parsing is disabled; default
+ """
+ def __init__(self):
+ self.url = None
+ self.output_filepath = None
+ self.flowcell = None
+ self.genome_dir = None
+
+class PreformattedDescriptionFormatter(IndentedHelpFormatter):
+
+ #def format_description(self, description):
+ #
+ # if description:
+ # return description + "\n"
+ # else:
+ # return ""
+
+ def format_epilog(self, epilog):
+ """
+ It was removing my preformated epilog, so this should override
+ that behavior! Muhahaha!
+ """
+ if epilog:
+ return "\n" + epilog + "\n"
+ else:
+ return ""
+
+
+def constructOptionParser():
+ """
+ returns a pre-setup optparser
+ """
+ global DISABLE_CMDLINE
+
+ if DISABLE_CMDLINE:
+ return None
+
+ parser = OptionParser(formatter=PreformattedDescriptionFormatter())
+
+ parser.set_description('Retrieves eland config file from ga_frontend web frontend.')
+
+ parser.epilog = """
+Config File:
+ * %s (System wide)
+ * %s (User specific; overrides system)
+ * command line overrides all config file options
+
+ Example Config File:
+
+ [config_file_server]
+ base_host_url=http://somewhere.domain:port
+""" % (CONFIG_SYSTEM, CONFIG_USER)
+
+ #Special formatter for allowing preformatted description.
+ ##parser.format_epilog(PreformattedDescriptionFormatter())
+
+ parser.add_option("-u", "--url",
+ action="store", type="string", dest="url")
+
+ parser.add_option("-o", "--output",
+ action="store", type="string", dest="output_filepath")
+
+ parser.add_option("-f", "--flowcell",
+ action="store", type="string", dest="flowcell")
+
+ parser.add_option("-g", "--genome_dir",
+ action="store", type="string", dest="genome_dir")
+
+ #parser.set_default("url", "default")
+
+ return parser
+
+def constructConfigParser():
+ """
+ returns a pre-setup config parser
+ """
+ parser = SafeConfigParser()
+ parser.read([CONFIG_SYSTEM, CONFIG_USER])
+ if not parser.has_section('config_file_server'):
+ parser.add_section('config_file_server')
+ if not parser.has_section('local_setup'):
+ parser.add_section('local_setup')
+
+ return parser
+
+
+def getCombinedOptions():
+ """
+ Returns optparse options after it has be updated with ConfigParser
+ config files and merged with parsed commandline options.
+ """
+ cl_parser = constructOptionParser()
+ conf_parser = constructConfigParser()
+
+ if cl_parser is None:
+ options = DummyOptions()
+ else:
+ options, args = cl_parser.parse_args()
+
+ if options.url is None:
+ if conf_parser.has_option('config_file_server', 'base_host_url'):
+ options.url = conf_parser.get('config_file_server', 'base_host_url')
+
+ if options.genome_dir is None:
+ if conf_parser.has_option('local_setup', 'genome_dir'):
+ options.genome_dir = conf_parser.get('local_setup', 'genome_dir')
+
+ print 'USING OPTIONS:'
+ print ' URL:', options.url
+ print ' OUT:', options.output_filepath
+ print ' FC:', options.flowcell
+ print 'GDIR:', options.genome_dir
+ print ''
+
+ return options
+
+
+def saveConfigFile(flowcell, base_host_url, output_filepath):
+ """
+ retrieves the flowcell eland config file, give the base_host_url
+ (i.e. http://sub.domain.edu:port)
+ """
+ url = base_host_url + '/eland_config/%s/' % (flowcell)
+
+ f = open(output_filepath, 'w')
+ #try:
+ try:
+ web = urllib2.urlopen(url)
+ except urllib2.URLError, e:
+ errmsg = 'URLError: %d' % (e.code,)
+ logging.error(errmsg)
+ logging.error('opened %s' % (url,))
+ logging.error('%s' % ( e.read(),))
+ raise IOError(errmsg)
+
+ #except IOError, msg:
+ # if str(msg).find("Connection refused") >= 0:
+ # print 'Error: Connection refused for: %s' % (url)
+ # f.close()
+ # sys.exit(1)
+ # elif str(msg).find("Name or service not known") >= 0:
+ # print 'Error: Invalid domain or ip address for: %s' % (url)
+ # f.close()
+ # sys.exit(2)
+ # else:
+ # raise IOError, msg
+
+ data = web.read()
+
+ if data.find('Hmm, config file for') >= 0:
+ msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url)
+ raise FlowCellNotFound, msg
+
+ if data.find('404 - Not Found') >= 0:
+ msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
+ "Did you get right port #?" % (flowcell, base_host_url, url)
+ raise FlowCellNotFound, msg
+
+ f.write(data)
+ web.close()
+ f.close()
+ logging.info('Wrote config file to %s' % (output_filepath,))
+
+
--- /dev/null
+import glob
+import re
+import os
+import sys
+import time
+import threading
+
+s_comment = re.compile('^#')
+s_general_read_len = re.compile('^READ_LENGTH ')
+s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
+
+s_firecrest = None
+
+def _four_digit_num_in_string(num):
+ if num < 0:
+ pass
+ elif num < 10:
+ return '000' + str(num)
+ elif num < 100:
+ return '00' + str(num)
+ elif num < 1000:
+ return '0' + str(num)
+ elif num < 10000:
+ return str(num)
+
+ msg = 'Invalid number: %s' % (num)
+ raise ValueError, msg
+
+def _two_digit_num_in_string(num):
+ if num < 0:
+ pass
+ elif num < 10:
+ return '0' + str(num)
+ elif num < 100:
+ return str(num)
+
+ msg = 'Invalid number: %s' % (num)
+ raise ValueError, msg
+
+
+# FIRECREST PATTERNS
+# _p2f(<pattern>, lane, tile, cycle)
+PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
+
+# _p2f(<pattern>, lane, tile)
+PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
+PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
+PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
+PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
+PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
+PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
+PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
+PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
+
+
+# BUSTARD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
+PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
+
+
+
+# GERALD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
+PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
+PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
+PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
+PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
+PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
+PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
+PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
+PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
+PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
+PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
+PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
+PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
+
+# _p2f(<pattern>, lane)
+PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
+PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
+PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
+PATTERN_GERALD_CALLPNG = 's_%s_call.png'
+PATTERN_GERALD_ALLPNG = 's_%s_all.png'
+PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
+PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
+PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
+PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
+PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
+PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
+PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
+PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
+PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
+PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
+
+
+
+def _p2f(pattern, lane, tile=None, cycle=None):
+ """
+ Converts a pattern plus info into file names
+ """
+
+ # lane, and cycle provided (INVALID)
+ if tile is None and cycle is not None:
+ msg = "Handling of cycle without tile is not currently implemented."
+ raise ValueError, msg
+
+ # lane, tile, cycle provided
+ elif cycle:
+ return pattern % (lane,
+ _four_digit_num_in_string(tile),
+ _two_digit_num_in_string(cycle))
+
+ # lane, tile provided
+ elif tile:
+ return pattern % (lane, _four_digit_num_in_string(tile))
+
+ # lane provided
+ else:
+ return pattern % (lane)
+
+
+class GARunStatus(object):
+
+ def __init__(self, conf_filepath):
+ """
+ Given an eland config file in the top level directory
+ of a run, predicts the files that will be generated
+ during a run and provides methods for retrieving
+ (completed, total) for each step or entire run.
+ """
+ #print 'self._conf_filepath = %s' % (conf_filepath)
+ self._conf_filepath = conf_filepath
+ self._base_dir, junk = os.path.split(conf_filepath)
+ self._image_dir = os.path.join(self._base_dir, 'Images')
+
+ self.lanes = []
+ self.lane_read_length = {}
+ self.tiles = None
+ self.cycles = None
+
+ self.status = {}
+ self.status['firecrest'] = {}
+ self.status['bustard'] = {}
+ self.status['gerald'] = {}
+
+ self._process_config()
+ self._count_tiles()
+ self._count_cycles()
+ self._generate_expected()
+
+
+ def _process_config(self):
+ """
+ Grabs info from self._conf_filepath
+ """
+ f = open(self._conf_filepath, 'r')
+
+ for line in f:
+
+ #Skip comment lines for now.
+ if s_comment.search(line):
+ continue
+
+ mo = s_general_read_len.search(line)
+ if mo:
+ read_length = int(line[mo.end():])
+ #Handle general READ_LENGTH
+ for i in range(1,9):
+ self.lane_read_length[i] = read_length
+
+ mo = s_read_len.search(line)
+ if mo:
+ read_length = int(line[mo.end():])
+ lanes, junk = line.split(':')
+
+ #Convert lanes from string of lanes to list of lane #s.
+ lanes = [ int(i) for i in lanes ]
+
+
+ for lane in lanes:
+
+ #Keep track of which lanes are being run.
+ if lane not in self.lanes:
+ self.lanes.append(lane)
+
+ #Update with lane specific read lengths
+ self.lane_read_length[lane] = read_length
+
+ self.lanes.sort()
+
+
+ def _count_tiles(self):
+ """
+ Count the number of tiles being used
+ """
+ self.tiles = len(glob.glob(os.path.join(self._image_dir,
+ 'L001',
+ 'C1.1',
+ 's_1_*_a.tif')))
+
+ def _count_cycles(self):
+ """
+ Figures out the number of cycles that are available
+ """
+ #print 'self._image_dir = %s' % (self._image_dir)
+ cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
+ #print 'cycle_dirs = %s' % (cycle_dirs)
+ cycle_list = []
+ for cycle_dir in cycle_dirs:
+ junk, c = os.path.split(cycle_dir)
+ cycle_list.append(int(c[1:c.find('.')]))
+
+ self.cycles = max(cycle_list)
+
+
+
+
+ def _generate_expected(self):
+ """
+ generates a list of files we expect to find.
+ """
+
+ firecrest = self.status['firecrest']
+ bustard = self.status['bustard']
+ gerald = self.status['gerald']
+
+
+ for lane in self.lanes:
+ for tile in range(1,self.tiles+1):
+ for cycle in range(1, self.cycles+1):
+
+ ##########################
+ # LANE, TILE, CYCLE LAYER
+
+ # FIRECREST
+ firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
+
+
+ ###################
+ # LANE, TILE LAYER
+
+ # FIRECREST
+ firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
+ firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
+
+
+ # BUSTARD
+ bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
+ bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
+
+
+ # GERALD
+ #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
+ gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
+ #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
+
+ ###################
+ # LANE LAYER
+
+ # GERALD
+ #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
+ #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
+ gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
+
+
+
+ #################
+ # LOOPS FINISHED
+
+ # FIRECREST
+ firecrest['offsets_finished.txt'] = False
+ firecrest['finished.txt'] = False
+
+ # BUSTARD
+ bustard['finished.txt'] = False
+
+ # GERALD
+ gerald['tiles.txt'] = False
+ gerald['FullAll.htm'] = False
+ #gerald['All.htm.tmp'] = False
+ #gerald['Signal_Means.txt.tmp'] = False
+ #gerald['plotIntensity_for_IVC'] = False
+ #gerald['IVC.htm.tmp'] = False
+ gerald['FullError.htm'] = False
+ gerald['FullPerfect.htm'] = False
+ #gerald['Error.htm.tmp'] = False
+ #gerald['Perfect.htm.tmp'] = False
+ #gerald['Summary.htm.tmp'] = False
+ #gerald['Tile.htm.tmp'] = False
+ gerald['finished.txt'] = False
+
+ def statusFirecrest(self):
+ """
+ returns (<completed>, <total>)
+ """
+ firecrest = self.status['firecrest']
+ total = len(firecrest)
+ completed = firecrest.values().count(True)
+
+ return (completed, total)
+
+
+ def statusBustard(self):
+ """
+ returns (<completed>, <total>)
+ """
+ bustard = self.status['bustard']
+ total = len(bustard)
+ completed = bustard.values().count(True)
+
+ return (completed, total)
+
+
+ def statusGerald(self):
+ """
+ returns (<completed>, <total>)
+ """
+ gerald = self.status['gerald']
+ total = len(gerald)
+ completed = gerald.values().count(True)
+
+ return (completed, total)
+
+
+ def statusTotal(self):
+ """
+ returns (<completed>, <total>)
+ """
+ #f = firecrest c = completed
+ #b = bustard t = total
+ #g = gerald
+ fc, ft = self.statusFirecrest()
+ bc, bt = self.statusBustard()
+ gc, gt = self.statusGerald()
+
+ return (fc+bc+gc, ft+bt+gt)
+
+
+ def statusReport(self):
+ """
+ Generate the basic percent complete report
+ """
+ def _percentCompleted(completed, total):
+ """
+ Returns precent completed as float
+ """
+ return (completed / float(total)) * 100
+
+ fc, ft = self.statusFirecrest()
+ bc, bt = self.statusBustard()
+ gc, gt = self.statusGerald()
+ tc, tt = self.statusTotal()
+
+ fp = _percentCompleted(fc, ft)
+ bp = _percentCompleted(bc, bt)
+ gp = _percentCompleted(gc, gt)
+ tp = _percentCompleted(tc, tt)
+
+ report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
+ ' Bustard: %s%% (%s/%s)' % (bp, bc, bt),
+ ' Gerald: %s%% (%s/%s)' % (gp, gc, gt),
+ '-----------------------',
+ ' Total: %s%% (%s/%s)' % (tp, tc, tt),
+ ]
+ return report
+
+ def updateFirecrest(self, filename):
+ """
+ Marks firecrest filename as being completed.
+ """
+ self.status['firecrest'][filename] = True
+
+
+ def updateBustard(self, filename):
+ """
+ Marks bustard filename as being completed.
+ """
+ self.status['bustard'][filename] = True
+
+
+ def updateGerald(self, filename):
+ """
+ Marks gerald filename as being completed.
+ """
+ self.status['gerald'][filename] = True
+
+
+
+##################################################
+# Functions to be called by Thread(target=<func>)
+def _cmdLineStatusMonitorFunc(conf_info):
+ """
+ Given a ConfigInfo object, provides status to stdout.
+
+ You should probably use startCmdLineStatusMonitor()
+ instead of ths function.
+
+ Use with:
+ t = threading.Thread(target=_cmdLineStatusMonitorFunc,
+ args=[conf_info])
+ t.setDaemon(True)
+ t.start()
+ """
+ SLEEP_AMOUNT = 30
+
+ while 1:
+ if conf_info.status is None:
+ print "No status object yet."
+ time.sleep(SLEEP_AMOUNT)
+ continue
+
+ report = conf_info.status.statusReport()
+ print os.linesep.join(report)
+ print
+
+ time.sleep(SLEEP_AMOUNT)
+
+
+#############################################
+# Start monitor thread convenience functions
+def startCmdLineStatusMonitor(conf_info):
+ """
+ Starts a command line status monitor given a conf_info object.
+ """
+ t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
+ t.setDaemon(True)
+ t.start()
+
+from optparse import OptionParser
+def make_parser():
+ usage = "%prog: config file"
+
+ parser = OptionParser()
+ return parser
+
+def main(cmdline=None):
+ parser = make_parser()
+ opt, args = parser.parse_args(cmdline)
+
+ if len(args) != 1:
+ parser.error("need name of configuration file")
+
+ status = GARunStatus(args[0])
+ print os.linesep.join(status.statusReport())
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+"""
+Core information needed to inspect a runfolder.
+"""
+from glob import glob
+import logging
+import os
+import re
+import shutil
+import stat
+import subprocess
+import sys
+import time
+
+try:
+ from xml.etree import ElementTree
+except ImportError, e:
+ from elementtree import ElementTree
+
+EUROPEAN_STRPTIME = "%d-%m-%Y"
+EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
+VERSION_RE = "([0-9\.]+)"
+USER_RE = "([a-zA-Z0-9]+)"
+LANES_PER_FLOWCELL = 8
+
+from htsworkflow.util.alphanum import alphanum
+from htsworkflow.util.ethelp import indent, flatten
+
+
+class PipelineRun(object):
+ """
+ Capture "interesting" information about a pipeline run
+ """
+ XML_VERSION = 1
+ PIPELINE_RUN = 'PipelineRun'
+ FLOWCELL_ID = 'FlowcellID'
+
+ def __init__(self, pathname=None, firecrest=None, bustard=None, gerald=None, xml=None):
+ if pathname is not None:
+ self.pathname = os.path.normpath(pathname)
+ else:
+ self.pathname = None
+ self._name = None
+ self._flowcell_id = None
+ self.firecrest = firecrest
+ self.bustard = bustard
+ self.gerald = gerald
+
+ if xml is not None:
+ self.set_elements(xml)
+
+ def _get_flowcell_id(self):
+ # extract flowcell ID
+ if self._flowcell_id is None:
+ config_dir = os.path.join(self.pathname, 'Config')
+ flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+ if os.path.exists(flowcell_id_path):
+ flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+ self._flowcell_id = flowcell_id_tree.findtext('Text')
+ else:
+ path_fields = self.pathname.split('_')
+ if len(path_fields) > 0:
+ # guessing last element of filename
+ flowcell_id = path_fields[-1]
+ else:
+ flowcell_id = 'unknown'
+
+ logging.warning(
+ "Flowcell id was not found, guessing %s" % (
+ flowcell_id))
+ self._flowcell_id = flowcell_id
+ return self._flowcell_id
+ flowcell_id = property(_get_flowcell_id)
+
+ def get_elements(self):
+ """
+ make one master xml file from all of our sub-components.
+ """
+ root = ElementTree.Element(PipelineRun.PIPELINE_RUN)
+ flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID)
+ flowcell.text = self.flowcell_id
+ root.append(self.firecrest.get_elements())
+ root.append(self.bustard.get_elements())
+ root.append(self.gerald.get_elements())
+ return root
+
+ def set_elements(self, tree):
+ # this file gets imported by all the others,
+ # so we need to hide the imports to avoid a cyclic imports
+ from htsworkflow.pipeline import firecrest
+ from htsworkflow.pipeline import bustard
+ from htsworkflow.pipeline import gerald
+
+ tag = tree.tag.lower()
+ if tag != PipelineRun.PIPELINE_RUN.lower():
+ raise ValueError('Pipeline Run Expecting %s got %s' % (
+ PipelineRun.PIPELINE_RUN, tag))
+ for element in tree:
+ tag = element.tag.lower()
+ if tag == PipelineRun.FLOWCELL_ID.lower():
+ self._flowcell_id = element.text
+ #ok the xword.Xword.XWORD pattern for module.class.constant is lame
+ elif tag == firecrest.Firecrest.FIRECREST.lower():
+ self.firecrest = firecrest.Firecrest(xml=element)
+ elif tag == bustard.Bustard.BUSTARD.lower():
+ self.bustard = bustard.Bustard(xml=element)
+ elif tag == gerald.Gerald.GERALD.lower():
+ self.gerald = gerald.Gerald(xml=element)
+ else:
+ logging.warn('PipelineRun unrecognized tag %s' % (tag,))
+
+ def _get_run_name(self):
+ """
+ Given a run tuple, find the latest date and use that as our name
+ """
+ if self._name is None:
+ tmax = max(self.firecrest.time, self.bustard.time, self.gerald.time)
+ timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
+ self._name = 'run_'+self.flowcell_id+"_"+timestamp+'.xml'
+ return self._name
+ name = property(_get_run_name)
+
+ def save(self, destdir=None):
+ if destdir is None:
+ destdir = ''
+ logging.info("Saving run report "+ self.name)
+ xml = self.get_elements()
+ indent(xml)
+ dest_pathname = os.path.join(destdir, self.name)
+ ElementTree.ElementTree(xml).write(dest_pathname)
+
+ def load(self, filename):
+ logging.info("Loading run report from " + filename)
+ tree = ElementTree.parse(filename).getroot()
+ self.set_elements(tree)
+
+def get_runs(runfolder):
+ """
+ Search through a run folder for all the various sub component runs
+ and then return a PipelineRun for each different combination.
+
+ For example if there are two different GERALD runs, this will
+ generate two different PipelineRun objects, that differ
+ in there gerald component.
+ """
+ from htsworkflow.pipeline import firecrest
+ from htsworkflow.pipeline import bustard
+ from htsworkflow.pipeline import gerald
+
+ datadir = os.path.join(runfolder, 'Data')
+
+ logging.info('Searching for runs in ' + datadir)
+ runs = []
+ for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
+ f = firecrest.firecrest(firecrest_pathname)
+ bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
+ for bustard_pathname in glob(bustard_glob):
+ b = bustard.bustard(bustard_pathname)
+ gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
+ for gerald_pathname in glob(gerald_glob):
+ try:
+ g = gerald.gerald(gerald_pathname)
+ runs.append(PipelineRun(runfolder, f, b, g))
+ except IOError, e:
+ print "Ignoring", str(e)
+ return runs
+
+
+def extract_run_parameters(runs):
+ """
+ Search through runfolder_path for various runs and grab their parameters
+ """
+ for run in runs:
+ run.save()
+
+def summarize_mapped_reads(mapped_reads):
+ """
+ Summarize per chromosome reads into a genome count
+ But handle spike-in/contamination symlinks seperately.
+ """
+ summarized_reads = {}
+ genome_reads = 0
+ genome = 'unknown'
+ for k, v in mapped_reads.items():
+ path, k = os.path.split(k)
+ if len(path) > 0:
+ genome = path
+ genome_reads += v
+ else:
+ summarized_reads[k] = summarized_reads.setdefault(k, 0) + v
+ summarized_reads[genome] = genome_reads
+ return summarized_reads
+
+def summary_report(runs):
+ """
+ Summarize cluster numbers and mapped read counts for a runfolder
+ """
+ report = []
+ for run in runs:
+ # print a run name?
+ report.append('Summary for %s' % (run.name,))
+ # sort the report
+ eland_keys = run.gerald.eland_results.results.keys()
+ eland_keys.sort(alphanum)
+
+ lane_results = run.gerald.summary.lane_results
+ for lane_id in eland_keys:
+ result = run.gerald.eland_results.results[lane_id]
+ report.append("Sample name %s" % (result.sample_name))
+ report.append("Lane id %s" % (result.lane_id,))
+ cluster = lane_results[result.lane_id].cluster
+ report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
+ report.append("Total Reads: %d" % (result.reads))
+ mc = result._match_codes
+ nm = mc['NM']
+ nm_percent = float(nm)/result.reads * 100
+ qc = mc['QC']
+ qc_percent = float(qc)/result.reads * 100
+
+ report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+ report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
+ report.append('Unique (0,1,2 mismatches) %d %d %d' % \
+ (mc['U0'], mc['U1'], mc['U2']))
+ report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
+ (mc['R0'], mc['R1'], mc['R2']))
+ report.append("Mapped Reads")
+ mapped_reads = summarize_mapped_reads(result.mapped_reads)
+ for name, counts in mapped_reads.items():
+ report.append(" %s: %d" % (name, counts))
+ report.append('---')
+ report.append('')
+ return os.linesep.join(report)
+
+def extract_results(runs, output_base_dir=None):
+ if output_base_dir is None:
+ output_base_dir = os.getcwd()
+
+ for r in runs:
+ result_dir = os.path.join(output_base_dir, r.flowcell_id)
+ logging.info("Using %s as result directory" % (result_dir,))
+ if not os.path.exists(result_dir):
+ os.mkdir(result_dir)
+
+ # create cycle_dir
+ cycle = "C%d-%d" % (r.firecrest.start, r.firecrest.stop)
+ logging.info("Filling in %s" % (cycle,))
+ cycle_dir = os.path.join(result_dir, cycle)
+ if os.path.exists(cycle_dir):
+ logging.error("%s already exists, not overwriting" % (cycle_dir,))
+ continue
+ else:
+ os.mkdir(cycle_dir)
+
+ # copy stuff out of the main run
+ g = r.gerald
+
+ # save run file
+ r.save(cycle_dir)
+
+ # Copy Summary.htm
+ summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+ if os.path.exists(summary_path):
+ logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+ shutil.copy(summary_path, cycle_dir)
+ else:
+ logging.info('Summary file %s was not found' % (summary_path,))
+
+ # tar score files
+ score_files = []
+ for f in os.listdir(g.pathname):
+ if re.match('.*_score.txt', f):
+ score_files.append(f)
+
+ tar_cmd = ['/bin/tar', 'c'] + score_files
+ bzip_cmd = [ 'bzip2', '-9', '-c' ]
+ tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
+ tar_dest = open(tar_dest_name, 'w')
+ logging.info("Compressing score files in %s" % (g.pathname,))
+ logging.info("Running tar: " + " ".join(tar_cmd[:10]))
+ logging.info("Running bzip2: " + " ".join(bzip_cmd))
+ logging.info("Writing to %s" %(tar_dest_name))
+
+ tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, cwd=g.pathname)
+ bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
+ tar.wait()
+
+ # copy & bzip eland files
+ for eland_lane in g.eland_results.values():
+ source_name = eland_lane.pathname
+ path, name = os.path.split(eland_lane.pathname)
+ dest_name = os.path.join(cycle_dir, name+'.bz2')
+
+ args = ['bzip2', '-9', '-c', source_name]
+ logging.info('Running: %s' % ( " ".join(args) ))
+ bzip_dest = open(dest_name, 'w')
+ bzip = subprocess.Popen(args, stdout=bzip_dest)
+ logging.info('Saving to %s' % (dest_name, ))
+ bzip.wait()
+
+def clean_runs(runs):
+ """
+ Clean up run folders to optimize for compression.
+ """
+ # TODO: implement this.
+ # rm RunLog*.xml
+ # rm pipeline_*.txt
+ # rm gclog.txt
+ # rm NetCopy.log
+ # rm nfn.log
+ # rm Images/L*
+ # cd Data/C1-*_Firecrest*
+ # make clean_intermediate
+
+ pass
--- /dev/null
+import unittest
+
+from StringIO import StringIO
+from htsworkflow.pipeline import genome_mapper
+
+class testGenomeMapper(unittest.TestCase):
+ def test_construct_mapper(self):
+ genomes = {
+ 'Arabidopsis thaliana': {'v01212004': '/arabidopsis'},
+ 'Homo sapiens': {'hg18': '/hg18'},
+ 'Mus musculus': {'mm8': '/mm8',
+ 'mm9': '/mm9',
+ 'mm10': '/mm10'},
+ 'Phage': {'174': '/phi'},
+ }
+ genome_map = genome_mapper.constructMapperDict(genomes)
+
+ self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+ self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi")
+ self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10")
+ self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+ self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10")
+
+ self.failUnlessEqual(len(genome_map.keys()), 6)
+ self.failUnlessEqual(len(genome_map.values()), 6)
+ self.failUnlessEqual(len(genome_map.items()), 6)
+
+
+def suite():
+ return unittest.makeSuite(testGenomeMapper,'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="suite")
--- /dev/null
+#!/usr/bin/env python
+
+from datetime import datetime, date
+import os
+import tempfile
+import shutil
+import unittest
+
+from htsworkflow.pipeline import firecrest
+from htsworkflow.pipeline import bustard
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
+
+
+def make_flowcell_id(runfolder_dir, flowcell_id=None):
+ if flowcell_id is None:
+ flowcell_id = '207BTAAXY'
+
+ config = """<?xml version="1.0"?>
+<FlowcellId>
+ <Text>%s</Text>
+</FlowcellId>""" % (flowcell_id,)
+ config_dir = os.path.join(runfolder_dir, 'Config')
+
+ if not os.path.exists(config_dir):
+ os.mkdir(config_dir)
+ pathname = os.path.join(config_dir, 'FlowcellId.xml')
+ f = open(pathname,'w')
+ f.write(config)
+ f.close()
+
+def make_matrix(matrix_dir):
+ contents = """# Auto-generated frequency response matrix
+> A
+> C
+> G
+> T
+0.77 0.15 -0.04 -0.04
+0.76 1.02 -0.05 -0.06
+-0.10 -0.10 1.17 -0.03
+-0.13 -0.12 0.80 1.27
+"""
+ s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
+ f = open(s_matrix, 'w')
+ f.write(contents)
+ f.close()
+
+def make_phasing_params(bustard_dir):
+ for lane in range(1,9):
+ pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
+ f = open(pathname, 'w')
+ f.write("""<Parameters>
+ <Phasing>0.009900</Phasing>
+ <Prephasing>0.003500</Prephasing>
+</Parameters>
+""")
+ f.close()
+
+def make_gerald_config(gerald_dir):
+ config_xml = """<RunParameters>
+<ChipWideRunParameters>
+ <ANALYSIS>default</ANALYSIS>
+ <BAD_LANES></BAD_LANES>
+ <BAD_TILES></BAD_TILES>
+ <CONTAM_DIR></CONTAM_DIR>
+ <CONTAM_FILE></CONTAM_FILE>
+ <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
+ <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
+ <ELAND_REPEAT></ELAND_REPEAT>
+ <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
+ <EMAIL_LIST>diane</EMAIL_LIST>
+ <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
+ <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
+ <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
+ <FORCE>1</FORCE>
+ <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
+ <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
+ <HAMSTER_FLAG>genome</HAMSTER_FLAG>
+ <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
+ <POST_RUN_COMMAND></POST_RUN_COMMAND>
+ <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
+ <PURE_BASES>12</PURE_BASES>
+ <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
+ <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
+ <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
+ <READ_LENGTH>32</READ_LENGTH>
+ <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
+ <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
+ <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
+ <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
+ <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
+ <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
+ <TILE_ROOT>s</TILE_ROOT>
+ <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
+ <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
+ <USE_BASES>all</USE_BASES>
+ <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
+</ChipWideRunParameters>
+<LaneSpecificRunParameters>
+ <ANALYSIS>
+ <s_1>eland</s_1>
+ <s_2>eland</s_2>
+ <s_3>eland</s_3>
+ <s_4>eland</s_4>
+ <s_5>eland</s_5>
+ <s_6>eland</s_6>
+ <s_7>eland</s_7>
+ <s_8>eland</s_8>
+ </ANALYSIS>
+ <ELAND_GENOME>
+ <s_1>/g/dm3</s_1>
+ <s_2>/g/equcab1</s_2>
+ <s_3>/g/equcab1</s_3>
+ <s_4>/g/canfam2</s_4>
+ <s_5>/g/hg18</s_5>
+ <s_6>/g/hg18</s_6>
+ <s_7>/g/hg18</s_7>
+ <s_8>/g/hg18</s_8>
+ </ELAND_GENOME>
+ <READ_LENGTH>
+ <s_1>32</s_1>
+ <s_2>32</s_2>
+ <s_3>32</s_3>
+ <s_4>32</s_4>
+ <s_5>32</s_5>
+ <s_6>32</s_6>
+ <s_7>32</s_7>
+ <s_8>32</s_8>
+ </READ_LENGTH>
+ <USE_BASES>
+ <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
+ <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
+ <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
+ <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
+ <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
+ <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
+ <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
+ <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
+ </USE_BASES>
+</LaneSpecificRunParameters>
+</RunParameters>
+"""
+ pathname = os.path.join(gerald_dir, 'config.xml')
+ f = open(pathname,'w')
+ f.write(config_xml)
+ f.close()
+
+
+def make_summary_htm(gerald_dir):
+ summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
+<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
+<html>
+<body>
+
+<a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
+<h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
+<h2><br></br>Chip Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr><td>Machine</td><td>HWI-EAS229</td></tr>
+<tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
+<tr><td>Chip ID</td><td>unknown</td></tr>
+</table>
+<h2><br></br>Lane Parameter Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane</td>
+<td>Sample ID</td>
+<td>Sample Target</td>
+<td>Sample Type</td>
+<td>Length</td>
+<td>Filter</td>
+<td>Tiles</td>
+</tr>
+<tr>
+<td>1</td>
+<td>unknown</td>
+<td>dm3</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane1">Lane 1</a></td>
+</tr>
+<tr>
+<td>2</td>
+<td>unknown</td>
+<td>equcab1</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane2">Lane 2</a></td>
+</tr>
+<tr>
+<td>3</td>
+<td>unknown</td>
+<td>equcab1</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane3">Lane 3</a></td>
+</tr>
+<tr>
+<td>4</td>
+<td>unknown</td>
+<td>canfam2</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane4">Lane 4</a></td>
+</tr>
+<tr>
+<td>5</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane5">Lane 5</a></td>
+</tr>
+<tr>
+<td>6</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane6">Lane 6</a></td>
+</tr>
+<tr>
+<td>7</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane7">Lane 7</a></td>
+</tr>
+<tr>
+<td>8</td>
+<td>unknown</td>
+<td>hg18</td>
+<td>ELAND</td>
+<td>32</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td><a href="#Lane8">Lane 8</a></td>
+</tr>
+</table>
+<h2><br></br>Lane Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+
+<td>Lane </td>
+<td>Clusters </td>
+<td>Av 1st Cycle Int </td>
+<td>% intensity after 20 cycles </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td> % Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>17421 +/- 2139</td>
+<td>7230 +/- 801</td>
+<td>23.73 +/- 10.79</td>
+<td>13.00 +/- 22.91</td>
+<td>32.03 +/- 18.45</td>
+<td>6703.57 +/- 3753.85</td>
+<td>4.55 +/- 4.81</td>
+</tr>
+<tr>
+<td>2</td>
+<td>20311 +/- 2402</td>
+<td>7660 +/- 678</td>
+<td>17.03 +/- 4.40</td>
+<td>40.74 +/- 30.33</td>
+<td>29.54 +/- 9.03</td>
+<td>5184.02 +/- 1631.54</td>
+<td>3.27 +/- 3.94</td>
+</tr>
+<tr>
+<td>3</td>
+<td>20193 +/- 2399</td>
+<td>7700 +/- 797</td>
+<td>15.75 +/- 3.30</td>
+<td>56.56 +/- 17.16</td>
+<td>27.33 +/- 7.48</td>
+<td>4803.49 +/- 1313.31</td>
+<td>3.07 +/- 2.86</td>
+</tr>
+<tr>
+<td>4</td>
+<td>15537 +/- 2531</td>
+<td>7620 +/- 1392</td>
+<td>15.37 +/- 3.79</td>
+<td>63.05 +/- 18.30</td>
+<td>15.88 +/- 4.99</td>
+<td>3162.13 +/- 962.59</td>
+<td>3.11 +/- 2.22</td>
+</tr>
+<tr>
+<td>5</td>
+<td>32047 +/- 3356</td>
+<td>8093 +/- 831</td>
+<td>23.79 +/- 6.18</td>
+<td>53.36 +/- 18.06</td>
+<td>48.04 +/- 13.77</td>
+<td>9866.23 +/- 2877.30</td>
+<td>2.26 +/- 1.16</td>
+</tr>
+<tr>
+<td>6</td>
+<td>32946 +/- 4753</td>
+<td>8227 +/- 736</td>
+<td>24.07 +/- 4.69</td>
+<td>54.65 +/- 12.57</td>
+<td>50.98 +/- 10.54</td>
+<td>10468.86 +/- 2228.53</td>
+<td>2.21 +/- 2.33</td>
+</tr>
+<tr>
+<td>7</td>
+<td>39504 +/- 4171</td>
+<td>8401 +/- 785</td>
+<td>22.55 +/- 4.56</td>
+<td>45.22 +/- 10.34</td>
+<td>48.41 +/- 9.67</td>
+<td>9829.40 +/- 1993.20</td>
+<td>2.26 +/- 1.11</td>
+</tr>
+<tr>
+<td>8</td>
+<td>37998 +/- 3792</td>
+<td>8443 +/- 1211</td>
+<td>39.03 +/- 7.52</td>
+<td>42.16 +/- 12.35</td>
+<td>40.98 +/- 14.89</td>
+<td>8128.87 +/- 3055.34</td>
+<td>3.57 +/- 2.77</td>
+</tr>
+</table>
+</body>
+</html>
+"""
+ pathname = os.path.join(gerald_dir, 'Summary.htm')
+ f = open(pathname, 'w')
+ f.write(summary_htm)
+ f.close()
+
+def make_eland_results(gerald_dir):
+ eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
+>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
+>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
+>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
+"""
+ for i in range(1,9):
+ pathname = os.path.join(gerald_dir,
+ 's_%d_eland_result.txt' % (i,))
+ f = open(pathname, 'w')
+ f.write(eland_result)
+ f.close()
+
+class RunfolderTests(unittest.TestCase):
+ """
+ Test components of the runfolder processing code
+ which includes firecrest, bustard, and gerald
+ """
+ def setUp(self):
+ # make a fake runfolder directory
+ self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
+
+ self.runfolder_dir = os.path.join(self.temp_dir,
+ '080102_HWI-EAS229_0010_207BTAAXX')
+ os.mkdir(self.runfolder_dir)
+
+ self.data_dir = os.path.join(self.runfolder_dir, 'Data')
+ os.mkdir(self.data_dir)
+
+ self.firecrest_dir = os.path.join(self.data_dir,
+ 'C1-33_Firecrest1.8.28_12-04-2008_diane'
+ )
+ os.mkdir(self.firecrest_dir)
+ self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
+ os.mkdir(self.matrix_dir)
+ make_matrix(self.matrix_dir)
+
+ self.bustard_dir = os.path.join(self.firecrest_dir,
+ 'Bustard1.8.28_12-04-2008_diane')
+ os.mkdir(self.bustard_dir)
+ make_phasing_params(self.bustard_dir)
+
+ self.gerald_dir = os.path.join(self.bustard_dir,
+ 'GERALD_12-04-2008_diane')
+ os.mkdir(self.gerald_dir)
+ make_gerald_config(self.gerald_dir)
+ make_summary_htm(self.gerald_dir)
+ make_eland_results(self.gerald_dir)
+
+ def tearDown(self):
+ shutil.rmtree(self.temp_dir)
+
+ def test_firecrest(self):
+ """
+ Construct a firecrest object
+ """
+ f = firecrest.firecrest(self.firecrest_dir)
+ self.failUnlessEqual(f.version, '1.8.28')
+ self.failUnlessEqual(f.start, 1)
+ self.failUnlessEqual(f.stop, 33)
+ self.failUnlessEqual(f.user, 'diane')
+ self.failUnlessEqual(f.date, date(2008,4,12))
+
+ xml = f.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+
+ f2 = firecrest.Firecrest(xml=xml)
+ self.failUnlessEqual(f.version, f2.version)
+ self.failUnlessEqual(f.start, f2.start)
+ self.failUnlessEqual(f.stop, f2.stop)
+ self.failUnlessEqual(f.user, f2.user)
+ self.failUnlessEqual(f.date, f2.date)
+
+ def test_bustard(self):
+ """
+ construct a bustard object
+ """
+ b = bustard.bustard(self.bustard_dir)
+ self.failUnlessEqual(b.version, '1.8.28')
+ self.failUnlessEqual(b.date, date(2008,4,12))
+ self.failUnlessEqual(b.user, 'diane')
+ self.failUnlessEqual(len(b.phasing), 8)
+ self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
+
+ xml = b.get_elements()
+ b2 = bustard.Bustard(xml=xml)
+ self.failUnlessEqual(b.version, b2.version)
+ self.failUnlessEqual(b.date, b2.date )
+ self.failUnlessEqual(b.user, b2.user)
+ self.failUnlessEqual(len(b.phasing), len(b2.phasing))
+ for key in b.phasing.keys():
+ self.failUnlessEqual(b.phasing[key].lane,
+ b2.phasing[key].lane)
+ self.failUnlessEqual(b.phasing[key].phasing,
+ b2.phasing[key].phasing)
+ self.failUnlessEqual(b.phasing[key].prephasing,
+ b2.phasing[key].prephasing)
+
+ def test_gerald(self):
+ # need to update gerald and make tests for it
+ g = gerald.gerald(self.gerald_dir)
+
+ self.failUnlessEqual(g.version,
+ '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
+ self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
+ self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
+ self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
+
+
+ # list of genomes, matches what was defined up in
+ # make_gerald_config.
+ # the first None is to offset the genomes list to be 1..9
+ # instead of pythons default 0..8
+ genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
+ '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
+
+ # test lane specific parameters from gerald config file
+ for i in range(1,9):
+ cur_lane = g.lanes[str(i)]
+ self.failUnlessEqual(cur_lane.analysis, 'eland')
+ self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
+ self.failUnlessEqual(cur_lane.read_length, '32')
+ self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
+
+ # test data extracted from summary file
+ clusters = [None,
+ (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
+ (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
+
+ for i in range(1,9):
+ summary_lane = g.summary[str(i)]
+ self.failUnlessEqual(summary_lane.cluster, clusters[i])
+ self.failUnlessEqual(summary_lane.lane, str(i))
+
+ xml = g.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+ g2 = gerald.Gerald(xml=xml)
+
+ # do it all again after extracting from the xml file
+ self.failUnlessEqual(g.version, g2.version)
+ self.failUnlessEqual(g.date, g2.date)
+ self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
+ self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
+
+ # test lane specific parameters from gerald config file
+ for i in range(1,9):
+ g_lane = g.lanes[str(i)]
+ g2_lane = g2.lanes[str(i)]
+ self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
+ self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
+ self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
+ self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+
+ # test (some) summary elements
+ for i in range(1,9):
+ g_summary = g.summary[str(i)]
+ g2_summary = g2.summary[str(i)]
+ self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+ self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+ g_eland = g.eland_results
+ g2_eland = g2.eland_results
+ for lane in g_eland.keys():
+ self.failUnlessEqual(g_eland[lane].reads,
+ g2_eland[lane].reads)
+ self.failUnlessEqual(len(g_eland[lane].mapped_reads),
+ len(g2_eland[lane].mapped_reads))
+ for k in g_eland[lane].mapped_reads.keys():
+ self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+ g2_eland[lane].mapped_reads[k])
+
+ self.failUnlessEqual(len(g_eland[lane].match_codes),
+ len(g2_eland[lane].match_codes))
+ for k in g_eland[lane].match_codes.keys():
+ self.failUnlessEqual(g_eland[lane].match_codes[k],
+ g2_eland[lane].match_codes[k])
+
+
+ def test_eland(self):
+ dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
+ 'chr2L.fa': 'dm3/chr2L.fa',
+ 'Lambda.fa': 'Lambda.fa'}
+ genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
+ '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+ eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
+
+ for i in range(1,9):
+ lane = eland[str(i)]
+ self.failUnlessEqual(lane.reads, 4)
+ self.failUnlessEqual(lane.sample_name, "s")
+ self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(len(lane.mapped_reads), 3)
+ self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
+ self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
+ self.failUnlessEqual(lane.match_codes['U1'], 2)
+ self.failUnlessEqual(lane.match_codes['NM'], 1)
+
+ xml = eland.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+ e2 = gerald.ELAND(xml=xml)
+
+ for i in range(1,9):
+ l1 = eland[str(i)]
+ l2 = e2[str(i)]
+ self.failUnlessEqual(l1.reads, l2.reads)
+ self.failUnlessEqual(l1.sample_name, l2.sample_name)
+ self.failUnlessEqual(l1.lane_id, l2.lane_id)
+ self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
+ self.failUnlessEqual(len(l1.mapped_reads), 3)
+ for k in l1.mapped_reads.keys():
+ self.failUnlessEqual(l1.mapped_reads[k],
+ l2.mapped_reads[k])
+
+ self.failUnlessEqual(len(l1.match_codes), 9)
+ self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
+ for k in l1.match_codes.keys():
+ self.failUnlessEqual(l1.match_codes[k],
+ l2.match_codes[k])
+
+ def test_runfolder(self):
+ runs = runfolder.get_runs(self.runfolder_dir)
+
+ # do we get the flowcell id from the filename?
+ self.failUnlessEqual(len(runs), 1)
+ self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+
+ # do we get the flowcell id from the FlowcellId.xml file
+ make_flowcell_id(self.runfolder_dir, '207BTAAXY')
+ runs = runfolder.get_runs(self.runfolder_dir)
+ self.failUnlessEqual(len(runs), 1)
+ self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+
+ r1 = runs[0]
+ xml = r1.get_elements()
+ xml_str = ElementTree.tostring(xml)
+
+ r2 = runfolder.PipelineRun(xml=xml)
+ self.failUnlessEqual(r1.name, r2.name)
+ self.failIfEqual(r2.firecrest, None)
+ self.failIfEqual(r2.bustard, None)
+ self.failIfEqual(r2.gerald, None)
+
+
+def suite():
+ return unittest.makeSuite(RunfolderTests,'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="suite")
+
--- /dev/null
+#!/usr/bin/env python
+
+from datetime import datetime, date
+import os
+import tempfile
+import shutil
+import unittest
+
+from htsworkflow.pipeline import firecrest
+from htsworkflow.pipeline import bustard
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
+
+
+def make_flowcell_id(runfolder_dir, flowcell_id=None):
+ if flowcell_id is None:
+ flowcell_id = '207BTAAXY'
+
+ config = """<?xml version="1.0"?>
+<FlowcellId>
+ <Text>%s</Text>
+</FlowcellId>""" % (flowcell_id,)
+ config_dir = os.path.join(runfolder_dir, 'Config')
+
+ if not os.path.exists(config_dir):
+ os.mkdir(config_dir)
+ pathname = os.path.join(config_dir, 'FlowcellId.xml')
+ f = open(pathname,'w')
+ f.write(config)
+ f.close()
+
+def make_matrix(matrix_dir):
+ contents = """# Auto-generated frequency response matrix
+> A
+> C
+> G
+> T
+0.77 0.15 -0.04 -0.04
+0.76 1.02 -0.05 -0.06
+-0.10 -0.10 1.17 -0.03
+-0.13 -0.12 0.80 1.27
+"""
+ s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
+ f = open(s_matrix, 'w')
+ f.write(contents)
+ f.close()
+
+def make_phasing_params(bustard_dir):
+ for lane in range(1,9):
+ pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
+ f = open(pathname, 'w')
+ f.write("""<Parameters>
+ <Phasing>0.009900</Phasing>
+ <Prephasing>0.003500</Prephasing>
+</Parameters>
+""")
+ f.close()
+
+def make_gerald_config(gerald_dir):
+ config_xml = """<RunParameters>
+<ChipWideRunParameters>
+ <ANALYSIS>default</ANALYSIS>
+ <BAD_LANES></BAD_LANES>
+ <BAD_TILES></BAD_TILES>
+ <CONTAM_DIR></CONTAM_DIR>
+ <CONTAM_FILE></CONTAM_FILE>
+ <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
+ <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
+ <ELAND_REPEAT></ELAND_REPEAT>
+ <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
+ <EMAIL_LIST>diane</EMAIL_LIST>
+ <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
+ <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
+ <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
+ <FORCE>1</FORCE>
+ <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
+ <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
+ <HAMSTER_FLAG>genome</HAMSTER_FLAG>
+ <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
+ <POST_RUN_COMMAND></POST_RUN_COMMAND>
+ <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
+ <PURE_BASES>12</PURE_BASES>
+ <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
+ <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
+ <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
+ <READ_LENGTH>32</READ_LENGTH>
+ <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
+ <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
+ <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
+ <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
+ <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
+ <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
+ <TILE_ROOT>s</TILE_ROOT>
+ <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
+ <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
+ <USE_BASES>all</USE_BASES>
+ <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
+</ChipWideRunParameters>
+<LaneSpecificRunParameters>
+ <ANALYSIS>
+ <s_1>eland</s_1>
+ <s_2>eland</s_2>
+ <s_3>eland</s_3>
+ <s_4>eland</s_4>
+ <s_5>eland</s_5>
+ <s_6>eland</s_6>
+ <s_7>eland</s_7>
+ <s_8>eland</s_8>
+ </ANALYSIS>
+ <ELAND_GENOME>
+ <s_1>/g/dm3</s_1>
+ <s_2>/g/equcab1</s_2>
+ <s_3>/g/equcab1</s_3>
+ <s_4>/g/canfam2</s_4>
+ <s_5>/g/hg18</s_5>
+ <s_6>/g/hg18</s_6>
+ <s_7>/g/hg18</s_7>
+ <s_8>/g/hg18</s_8>
+ </ELAND_GENOME>
+ <READ_LENGTH>
+ <s_1>32</s_1>
+ <s_2>32</s_2>
+ <s_3>32</s_3>
+ <s_4>32</s_4>
+ <s_5>32</s_5>
+ <s_6>32</s_6>
+ <s_7>32</s_7>
+ <s_8>32</s_8>
+ </READ_LENGTH>
+ <USE_BASES>
+ <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
+ <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
+ <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
+ <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
+ <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
+ <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
+ <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
+ <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
+ </USE_BASES>
+</LaneSpecificRunParameters>
+</RunParameters>
+"""
+ pathname = os.path.join(gerald_dir, 'config.xml')
+ f = open(pathname,'w')
+ f.write(config_xml)
+ f.close()
+
+def make_summary_htm(gerald_dir):
+ summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
+<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
+<html>
+<body>
+
+<a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
+<h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
+<h2><br></br>Chip Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr><td>Machine</td><td>HWI-EAS229</td></tr>
+<tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
+<tr><td>Chip ID</td><td>unknown</td></tr>
+</table>
+<h2><br></br>Chip Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Clusters</td>
+<td>Clusters (PF)</td>
+<td>Yield (kbases)</td>
+</tr>
+<tr><td>80933224</td>
+<td>43577803</td>
+<td>1133022</td>
+</tr>
+</table>
+<h2><br></br>Lane Parameter Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane</td>
+<td>Sample ID</td>
+<td>Sample Target</td>
+<td>Sample Type</td>
+<td>Length</td>
+<td>Filter</td>
+<td>Num Tiles</td>
+<td>Tiles</td>
+</tr>
+<tr>
+<td>1</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane1">Lane 1</a></td>
+</tr>
+<tr>
+<td>2</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane2">Lane 2</a></td>
+</tr>
+<tr>
+<td>3</td>
+<td>unknown</td>
+<td>mm9</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane3">Lane 3</a></td>
+</tr>
+<tr>
+<td>4</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane4">Lane 4</a></td>
+</tr>
+<tr>
+<td>5</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane5">Lane 5</a></td>
+</tr>
+<tr>
+<td>6</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane6">Lane 6</a></td>
+</tr>
+<tr>
+<td>7</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane7">Lane 7</a></td>
+</tr>
+<tr>
+<td>8</td>
+<td>unknown</td>
+<td>elegans170</td>
+<td>ELAND</td>
+<td>26</td>
+<td>'((CHASTITY>=0.6))'</td>
+<td>100</td>
+<td><a href="#Lane8">Lane 8</a></td>
+</tr>
+</table>
+<h2><br></br>Lane Results Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+<td colspan="2">Lane Info</td>
+<td colspan="8">Tile Mean +/- SD for Lane</td>
+</tr>
+<tr>
+<td>Lane </td>
+<td>Lane Yield (kbases) </td>
+<td>Clusters (raw)</td>
+<td>Clusters (PF) </td>
+<td>1st Cycle Int (PF) </td>
+<td>% intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Alignment Score (PF) </td>
+<td> % Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>158046</td>
+<td>96483 +/- 9074</td>
+<td>60787 +/- 4240</td>
+<td>329 +/- 35</td>
+<td>101.88 +/- 6.03</td>
+<td>63.21 +/- 3.29</td>
+<td>70.33 +/- 0.24</td>
+<td>9054.08 +/- 59.16</td>
+<td>0.46 +/- 0.18</td>
+</tr>
+<tr>
+<td>2</td>
+<td>156564</td>
+<td>133738 +/- 7938</td>
+<td>60217 +/- 1926</td>
+<td>444 +/- 39</td>
+<td>92.62 +/- 7.58</td>
+<td>45.20 +/- 3.31</td>
+<td>51.98 +/- 0.74</td>
+<td>6692.04 +/- 92.49</td>
+<td>0.46 +/- 0.09</td>
+</tr>
+<tr>
+<td>3</td>
+<td>185818</td>
+<td>152142 +/- 10002</td>
+<td>71468 +/- 2827</td>
+<td>366 +/- 36</td>
+<td>91.53 +/- 8.66</td>
+<td>47.19 +/- 3.80</td>
+<td>82.24 +/- 0.44</td>
+<td>10598.68 +/- 64.13</td>
+<td>0.41 +/- 0.04</td>
+</tr>
+<tr>
+<td>4</td>
+<td>34953</td>
+<td>15784 +/- 2162</td>
+<td>13443 +/- 1728</td>
+<td>328 +/- 40</td>
+<td>97.53 +/- 9.87</td>
+<td>85.29 +/- 1.91</td>
+<td>80.02 +/- 0.53</td>
+<td>10368.82 +/- 71.08</td>
+<td>0.15 +/- 0.05</td>
+</tr>
+<tr>
+<td>5</td>
+<td>167936</td>
+<td>119735 +/- 8465</td>
+<td>64590 +/- 2529</td>
+<td>417 +/- 37</td>
+<td>88.69 +/- 14.79</td>
+<td>54.10 +/- 2.59</td>
+<td>76.95 +/- 0.32</td>
+<td>9936.47 +/- 65.75</td>
+<td>0.28 +/- 0.02</td>
+</tr>
+<tr>
+<td>6</td>
+<td>173463</td>
+<td>152177 +/- 8146</td>
+<td>66716 +/- 2493</td>
+<td>372 +/- 39</td>
+<td>87.06 +/- 9.86</td>
+<td>43.98 +/- 3.12</td>
+<td>78.80 +/- 0.43</td>
+<td>10162.28 +/- 49.65</td>
+<td>0.38 +/- 0.03</td>
+</tr>
+<tr>
+<td>7</td>
+<td>149287</td>
+<td>84649 +/- 7325</td>
+<td>57418 +/- 3617</td>
+<td>295 +/- 28</td>
+<td>89.40 +/- 8.23</td>
+<td>67.97 +/- 1.82</td>
+<td>33.38 +/- 0.25</td>
+<td>4247.92 +/- 32.37</td>
+<td>1.00 +/- 0.03</td>
+</tr>
+<tr>
+<td>8</td>
+<td>106953</td>
+<td>54622 +/- 4812</td>
+<td>41136 +/- 3309</td>
+<td>284 +/- 37</td>
+<td>90.21 +/- 9.10</td>
+<td>75.39 +/- 2.27</td>
+<td>48.33 +/- 0.29</td>
+<td>6169.21 +/- 169.50</td>
+<td>0.86 +/- 1.22</td>
+</tr>
+<tr><td colspan="13">Tile mean across chip</td></tr>
+<tr>
+<td>Av.</td>
+<td></td>
+<td>101166</td>
+<td>54472</td>
+<td>354</td>
+<td>92.36</td>
+<td>60.29</td>
+<td>65.25</td>
+<td>8403.69</td>
+<td>0.50</td>
+</tr>
+</table>
+<h2><br></br>Expanded Lane Summary<br></br></h2>
+<table border="1" cellpadding="5">
+<tr>
+
+<tr><td colspan="2">Lane Info</td>
+<td colspan="2">Phasing Info</td>
+<td colspan="2">Raw Data (tile mean)</td>
+<td colspan="7">Filtered Data (tile mean)</td></tr>
+<td>Lane </td>
+<td>Clusters (tile mean) (raw)</td>
+<td>% Phasing </td>
+<td>% Prephasing </td>
+<td>% Error Rate (raw) </td>
+<td> Equiv Perfect Clusters (raw) </td>
+<td>% retained </td>
+<td>Cycle 2-4 Av Int (PF) </td>
+<td>Cycle 2-10 Av % Loss (PF) </td>
+<td>Cycle 10-20 Av % Loss (PF) </td>
+<td>% Align (PF) </td>
+<td>% Error Rate (PF) </td>
+<td> Equiv Perfect Clusters (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>96483</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.00</td>
+<td>49676</td>
+<td>63.21</td>
+<td>317 +/- 32</td>
+<td>0.13 +/- 0.44</td>
+<td>-1.14 +/- 0.34</td>
+<td>70.33</td>
+<td>0.46</td>
+<td>41758</td>
+</tr>
+<tr>
+<td>2</td>
+<td>133738</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.22</td>
+<td>40467</td>
+<td>45.20</td>
+<td>415 +/- 33</td>
+<td>0.29 +/- 0.40</td>
+<td>-0.79 +/- 0.35</td>
+<td>51.98</td>
+<td>0.46</td>
+<td>30615</td>
+</tr>
+<tr>
+<td>3</td>
+<td>152142</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.30</td>
+<td>78588</td>
+<td>47.19</td>
+<td>344 +/- 26</td>
+<td>0.68 +/- 0.51</td>
+<td>-0.77 +/- 0.42</td>
+<td>82.24</td>
+<td>0.41</td>
+<td>57552</td>
+</tr>
+<tr>
+<td>4</td>
+<td>15784</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>0.29</td>
+<td>11095</td>
+<td>85.29</td>
+<td>306 +/- 34</td>
+<td>0.20 +/- 0.69</td>
+<td>-1.28 +/- 0.66</td>
+<td>80.02</td>
+<td>0.15</td>
+<td>10671</td>
+</tr>
+<tr>
+<td>5</td>
+<td>119735</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>0.85</td>
+<td>60335</td>
+<td>54.10</td>
+<td>380 +/- 32</td>
+<td>0.34 +/- 0.49</td>
+<td>-1.55 +/- 4.69</td>
+<td>76.95</td>
+<td>0.28</td>
+<td>49015</td>
+</tr>
+<tr>
+<td>6</td>
+<td>152177</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.21</td>
+<td>70905</td>
+<td>43.98</td>
+<td>333 +/- 27</td>
+<td>0.57 +/- 0.50</td>
+<td>-0.91 +/- 0.39</td>
+<td>78.80</td>
+<td>0.38</td>
+<td>51663</td>
+</tr>
+<tr>
+<td>7</td>
+<td>84649</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.38</td>
+<td>21069</td>
+<td>67.97</td>
+<td>272 +/- 20</td>
+<td>1.15 +/- 0.52</td>
+<td>-0.84 +/- 0.58</td>
+<td>33.38</td>
+<td>1.00</td>
+<td>18265</td>
+</tr>
+<tr>
+<td>8</td>
+<td>54622</td>
+<td>0.7700</td>
+<td>0.3100</td>
+<td>1.17</td>
+<td>21335</td>
+<td>75.39</td>
+<td>262 +/- 31</td>
+<td>1.10 +/- 0.59</td>
+<td>-1.01 +/- 0.47</td>
+<td>48.33</td>
+<td>0.86</td>
+<td>19104</td>
+</tr>
+</table>
+<b><br></br>IVC Plots</b>
+<p> <a href='IVC.htm' target="_blank"> IVC.htm
+ </a></p>
+<b><br></br>All Intensity Plots</b>
+<p> <a href='All.htm' target="_blank"> All.htm
+ </a></p>
+<b><br></br>Error graphs: </b>
+<p> <a href='Error.htm' target="_blank"> Error.htm
+ </a></p>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>1</td>
+<td>0001</td>
+<td>114972</td>
+<td>326.48</td>
+<td>94.39</td>
+<td>57.44</td>
+<td>70.2</td>
+<td>9038.6</td>
+<td>0.44</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>2</td>
+<td>0001</td>
+<td>147793</td>
+<td>448.12</td>
+<td>83.68</td>
+<td>38.57</td>
+<td>53.7</td>
+<td>6905.4</td>
+<td>0.54</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>3</td>
+<td>0001</td>
+<td>167904</td>
+<td>374.05</td>
+<td>86.91</td>
+<td>40.36</td>
+<td>81.3</td>
+<td>10465.0</td>
+<td>0.47</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>4</td>
+<td>0001</td>
+<td>20308</td>
+<td>276.85</td>
+<td>92.87</td>
+<td>84.26</td>
+<td>80.4</td>
+<td>10413.8</td>
+<td>0.16</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>6</td>
+<td>0001</td>
+<td>166844</td>
+<td>348.12</td>
+<td>77.59</td>
+<td>38.13</td>
+<td>79.7</td>
+<td>10264.4</td>
+<td>0.44</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>7</td>
+<td>0001</td>
+<td>98913</td>
+<td>269.90</td>
+<td>86.66</td>
+<td>64.55</td>
+<td>33.2</td>
+<td>4217.5</td>
+<td>1.02</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+<a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
+<table border="1" cellpadding="5">
+<tr>
+<td>Lane </td>
+<td>Tile </td>
+<td>Clusters (raw)</td>
+<td>Av 1st Cycle Int (PF) </td>
+<td>Av % intensity after 20 cycles (PF) </td>
+<td>% PF Clusters </td>
+<td>% Align (PF) </td>
+<td>Av Alignment Score (PF) </td>
+<td>% Error Rate (PF) </td>
+</tr>
+<tr>
+<td>8</td>
+<td>0001</td>
+<td>64972</td>
+<td>243.60</td>
+<td>89.40</td>
+<td>73.17</td>
+<td>48.3</td>
+<td>6182.8</td>
+<td>0.71</td>
+</tr>
+</table>
+<td><a href="#Top">Back to top</a></td>
+</body>
+</html>
+"""
+ pathname = os.path.join(gerald_dir, 'Summary.htm')
+ f = open(pathname, 'w')
+ f.write(summary_htm)
+ f.close()
+
+def make_eland_results(gerald_dir):
+ eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
+>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
+>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
+>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
+"""
+ for i in range(1,9):
+ pathname = os.path.join(gerald_dir,
+ 's_%d_eland_result.txt' % (i,))
+ f = open(pathname, 'w')
+ f.write(eland_result)
+ f.close()
+
+def make_runfolder(obj=None):
+ """
+ Make a fake runfolder, attach all the directories to obj if defined
+ """
+ # make a fake runfolder directory
+ temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
+
+ runfolder_dir = os.path.join(temp_dir,
+ '080102_HWI-EAS229_0010_207BTAAXX')
+ os.mkdir(runfolder_dir)
+
+ data_dir = os.path.join(runfolder_dir, 'Data')
+ os.mkdir(data_dir)
+
+ firecrest_dir = os.path.join(data_dir,
+ 'C1-33_Firecrest1.8.28_12-04-2008_diane'
+ )
+ os.mkdir(firecrest_dir)
+ matrix_dir = os.path.join(firecrest_dir, 'Matrix')
+ os.mkdir(matrix_dir)
+ make_matrix(matrix_dir)
+
+ bustard_dir = os.path.join(firecrest_dir,
+ 'Bustard1.8.28_12-04-2008_diane')
+ os.mkdir(bustard_dir)
+ make_phasing_params(bustard_dir)
+
+ gerald_dir = os.path.join(bustard_dir,
+ 'GERALD_12-04-2008_diane')
+ os.mkdir(gerald_dir)
+ make_gerald_config(gerald_dir)
+ make_summary_htm(gerald_dir)
+ make_eland_results(gerald_dir)
+
+ if obj is not None:
+ obj.temp_dir = temp_dir
+ obj.runfolder_dir = runfolder_dir
+ obj.data_dir = data_dir
+ obj.firecrest_dir = firecrest_dir
+ obj.matrix_dir = matrix_dir
+ obj.bustard_dir = bustard_dir
+ obj.gerald_dir = gerald_dir
+
+
+class RunfolderTests(unittest.TestCase):
+ """
+ Test components of the runfolder processing code
+ which includes firecrest, bustard, and gerald
+ """
+ def setUp(self):
+ # attaches all the directories to the object passed in
+ make_runfolder(self)
+
+ def tearDown(self):
+ shutil.rmtree(self.temp_dir)
+
+ def test_firecrest(self):
+ """
+ Construct a firecrest object
+ """
+ f = firecrest.firecrest(self.firecrest_dir)
+ self.failUnlessEqual(f.version, '1.8.28')
+ self.failUnlessEqual(f.start, 1)
+ self.failUnlessEqual(f.stop, 33)
+ self.failUnlessEqual(f.user, 'diane')
+ self.failUnlessEqual(f.date, date(2008,4,12))
+
+ xml = f.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+
+ f2 = firecrest.Firecrest(xml=xml)
+ self.failUnlessEqual(f.version, f2.version)
+ self.failUnlessEqual(f.start, f2.start)
+ self.failUnlessEqual(f.stop, f2.stop)
+ self.failUnlessEqual(f.user, f2.user)
+ self.failUnlessEqual(f.date, f2.date)
+
+ def test_bustard(self):
+ """
+ construct a bustard object
+ """
+ b = bustard.bustard(self.bustard_dir)
+ self.failUnlessEqual(b.version, '1.8.28')
+ self.failUnlessEqual(b.date, date(2008,4,12))
+ self.failUnlessEqual(b.user, 'diane')
+ self.failUnlessEqual(len(b.phasing), 8)
+ self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
+
+ xml = b.get_elements()
+ b2 = bustard.Bustard(xml=xml)
+ self.failUnlessEqual(b.version, b2.version)
+ self.failUnlessEqual(b.date, b2.date )
+ self.failUnlessEqual(b.user, b2.user)
+ self.failUnlessEqual(len(b.phasing), len(b2.phasing))
+ for key in b.phasing.keys():
+ self.failUnlessEqual(b.phasing[key].lane,
+ b2.phasing[key].lane)
+ self.failUnlessEqual(b.phasing[key].phasing,
+ b2.phasing[key].phasing)
+ self.failUnlessEqual(b.phasing[key].prephasing,
+ b2.phasing[key].prephasing)
+
+ def test_gerald(self):
+ # need to update gerald and make tests for it
+ g = gerald.gerald(self.gerald_dir)
+
+ self.failUnlessEqual(g.version,
+ '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
+ self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
+ self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
+ self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
+
+
+ # list of genomes, matches what was defined up in
+ # make_gerald_config.
+ # the first None is to offset the genomes list to be 1..9
+ # instead of pythons default 0..8
+ genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
+ '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
+
+ # test lane specific parameters from gerald config file
+ for i in range(1,9):
+ cur_lane = g.lanes[str(i)]
+ self.failUnlessEqual(cur_lane.analysis, 'eland')
+ self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
+ self.failUnlessEqual(cur_lane.read_length, '32')
+ self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
+
+ # test data extracted from summary file
+ clusters = [None,
+ (96483, 9074), (133738, 7938),
+ (152142, 10002), (15784, 2162),
+ (119735, 8465), (152177, 8146),
+ (84649, 7325), (54622, 4812),]
+
+ for i in range(1,9):
+ summary_lane = g.summary[str(i)]
+ self.failUnlessEqual(summary_lane.cluster, clusters[i])
+ self.failUnlessEqual(summary_lane.lane, str(i))
+
+ xml = g.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+ g2 = gerald.Gerald(xml=xml)
+
+ # do it all again after extracting from the xml file
+ self.failUnlessEqual(g.version, g2.version)
+ self.failUnlessEqual(g.date, g2.date)
+ self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
+ self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
+
+ # test lane specific parameters from gerald config file
+ for i in range(1,9):
+ g_lane = g.lanes[str(i)]
+ g2_lane = g2.lanes[str(i)]
+ self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
+ self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
+ self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
+ self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+
+ # test (some) summary elements
+ for i in range(1,9):
+ g_summary = g.summary[str(i)]
+ g2_summary = g2.summary[str(i)]
+ self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+ self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+ g_eland = g.eland_results
+ g2_eland = g2.eland_results
+ for lane in g_eland.keys():
+ self.failUnlessEqual(g_eland[lane].reads,
+ g2_eland[lane].reads)
+ self.failUnlessEqual(len(g_eland[lane].mapped_reads),
+ len(g2_eland[lane].mapped_reads))
+ for k in g_eland[lane].mapped_reads.keys():
+ self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+ g2_eland[lane].mapped_reads[k])
+
+ self.failUnlessEqual(len(g_eland[lane].match_codes),
+ len(g2_eland[lane].match_codes))
+ for k in g_eland[lane].match_codes.keys():
+ self.failUnlessEqual(g_eland[lane].match_codes[k],
+ g2_eland[lane].match_codes[k])
+
+
+ def test_eland(self):
+ dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
+ 'chr2L.fa': 'dm3/chr2L.fa',
+ 'Lambda.fa': 'Lambda.fa'}
+ genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
+ '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+ eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
+
+ for i in range(1,9):
+ lane = eland[str(i)]
+ self.failUnlessEqual(lane.reads, 4)
+ self.failUnlessEqual(lane.sample_name, "s")
+ self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(len(lane.mapped_reads), 3)
+ self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
+ self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
+ self.failUnlessEqual(lane.match_codes['U1'], 2)
+ self.failUnlessEqual(lane.match_codes['NM'], 1)
+
+ xml = eland.get_elements()
+ # just make sure that element tree can serialize the tree
+ xml_str = ElementTree.tostring(xml)
+ e2 = gerald.ELAND(xml=xml)
+
+ for i in range(1,9):
+ l1 = eland[str(i)]
+ l2 = e2[str(i)]
+ self.failUnlessEqual(l1.reads, l2.reads)
+ self.failUnlessEqual(l1.sample_name, l2.sample_name)
+ self.failUnlessEqual(l1.lane_id, l2.lane_id)
+ self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
+ self.failUnlessEqual(len(l1.mapped_reads), 3)
+ for k in l1.mapped_reads.keys():
+ self.failUnlessEqual(l1.mapped_reads[k],
+ l2.mapped_reads[k])
+
+ self.failUnlessEqual(len(l1.match_codes), 9)
+ self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
+ for k in l1.match_codes.keys():
+ self.failUnlessEqual(l1.match_codes[k],
+ l2.match_codes[k])
+
+ def test_runfolder(self):
+ runs = runfolder.get_runs(self.runfolder_dir)
+
+ # do we get the flowcell id from the filename?
+ self.failUnlessEqual(len(runs), 1)
+ self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
+
+ # do we get the flowcell id from the FlowcellId.xml file
+ make_flowcell_id(self.runfolder_dir, '207BTAAXY')
+ runs = runfolder.get_runs(self.runfolder_dir)
+ self.failUnlessEqual(len(runs), 1)
+ self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
+
+ r1 = runs[0]
+ xml = r1.get_elements()
+ xml_str = ElementTree.tostring(xml)
+
+ r2 = runfolder.PipelineRun(xml=xml)
+ self.failUnlessEqual(r1.name, r2.name)
+ self.failIfEqual(r2.firecrest, None)
+ self.failIfEqual(r2.bustard, None)
+ self.failIfEqual(r2.gerald, None)
+
+
+def suite():
+ return unittest.makeSuite(RunfolderTests,'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="suite")
+
--- /dev/null
+#\r
+# The Alphanum Algorithm is an improved sorting algorithm for strings\r
+# containing numbers. Instead of sorting numbers in ASCII order like\r
+# a standard sort, this algorithm sorts numbers in numeric order.\r
+#\r
+# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
+#\r
+#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
+#* Distributed under same license as original\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+#\r
+\r
+import re\r
+\r
+#\r
+# TODO: Make decimal points be considered in the same class as digits\r
+#\r
+\r
+def chunkify(str):\r
+ """return a list of numbers and non-numeric substrings of +str+\r
+\r
+ the numeric substrings are converted to integer, non-numeric are left as is\r
+ """\r
+ chunks = re.findall("(\d+|\D+)",str)\r
+ chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
+ return chunks\r
+\r
+def alphanum(a,b):\r
+ """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
+\r
+ +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
+ Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
+ Numeric sorting, comparison of non-numeric gives Lexicographic order\r
+ """\r
+ # split strings into chunks\r
+ aChunks = chunkify(a)\r
+ bChunks = chunkify(b)\r
+\r
+ return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
+\r
+\r
+\r
+if __name__ == "__main__":\r
+ unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
+ sorted = unsorted[:]\r
+ sorted.sort(alphanum)\r
+ print '+++++Sorted...++++'\r
+ print '\n'.join(sorted)\r
--- /dev/null
+"""
+ElementTree helper functions
+"""
+def indent(elem, level=0):
+ """
+ reformat an element tree to be 'pretty' (indented)
+ """
+ i = "\n" + level*" "
+ if len(elem):
+ if not elem.text or not elem.text.strip():
+ elem.text = i + " "
+ for child in elem:
+ indent(child, level+1)
+ # we don't want the closing tag indented too far
+ child.tail = i
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ else:
+ if level and (not elem.tail or not elem.tail.strip()):
+ elem.tail = i
+
+def flatten(elem, include_tail=0):
+ """
+ Extract the text from an element tree
+ (AKA extract the text that not part of XML tags)
+ """
+ text = elem.text or ""
+ for e in elem:
+ text += flatten(e, 1)
+ if include_tail and elem.tail: text += elem.tail
+ return text
+
--- /dev/null
+"""
+Provide some quick and dirty access and reporting for the fctracker database.
+
+The advantage to this code is that it doesn't depend on django being
+installed, so it can run on machines other than the webserver.
+"""
+import datetime
+import os
+import re
+import sys
+import time
+
+if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
+ # we're python 2.5
+ import sqlite3
+else:
+ import pysqlite2.dbapi2 as sqlite3
+
+
+class fctracker:
+ """
+ provide a simple way to interact with the flowcell data in fctracker.db
+ """
+ def __init__(self, database):
+ # default to the current directory
+ if database is None:
+ self.database = self._guess_fctracker_path()
+ else:
+ self.database = database
+ self.conn = sqlite3.connect(self.database)
+ self._get_library()
+ self._get_species()
+
+ def _guess_fctracker_path(self):
+ """
+ Guess a few obvious places for the database
+ """
+ fctracker = 'fctracker.db'
+ name = fctracker
+ # is it in the current dir?
+ if os.path.exists(name):
+ return name
+ name = os.path.expanduser(os.path.join('~', fctracker))
+ if os.path.exists(name):
+ return name
+ raise RuntimeError("Can't find fctracker")
+
+ def _make_dict_from_table(self, table_name, pkey_name):
+ """
+ Convert a django table into a dictionary indexed by the primary key.
+ Yes, it really does just load everything into memory, hopefully
+ we stay under a few tens of thousands of runs for a while.
+ """
+ table = {}
+ c = self.conn.cursor()
+ c.execute('select * from %s;' % (table_name))
+ # extract just the field name
+ description = [ f[0] for f in c.description]
+ for row in c:
+ row_dict = dict(zip(description, row))
+ table[row_dict[pkey_name]] = row_dict
+ c.close()
+ return table
+
+ def _add_lanes_to_libraries(self):
+ """
+ add flowcell/lane ids to new attribute 'lanes' in the library dictionary
+ """
+ library_id_re = re.compile('lane_\d_library_id')
+
+ for fc_id, fc in self.flowcells.items():
+ lane_library = [ (x[0][5], x[1]) for x in fc.items()
+ if library_id_re.match(x[0]) ]
+ for lane, library_id in lane_library:
+ if not self.library[library_id].has_key('lanes'):
+ self.library[library_id]['lanes'] = []
+ self.library[library_id]['lanes'].append((fc_id, lane))
+
+ def _get_library(self):
+ """
+ attach the library dictionary to the instance
+ """
+ self.library = self._make_dict_from_table(
+ 'fctracker_library',
+ 'library_id')
+
+
+ def _get_species(self):
+ """
+ attach the species dictionary to the instance
+ """
+ self.species = self._make_dict_from_table(
+ 'fctracker_species',
+ 'id'
+ )
+
+ def _get_flowcells(self, where=None):
+ """
+ attach the flowcell dictionary to the instance
+
+ where is a sql where clause. (eg "where run_date > '2008-1-1'")
+ that can be used to limit what flowcells we select
+ FIXME: please add sanitization code
+ """
+ if where is None:
+ where = ""
+ self.flowcells = {}
+ c = self.conn.cursor()
+ c.execute('select * from fctracker_flowcell %s;' % (where))
+ # extract just the field name
+ description = [ f[0] for f in c.description ]
+ for row in c:
+ row_dict = dict(zip(description, row))
+ fcid, status = self._parse_flowcell_id(row_dict)
+ row_dict['flowcell_id'] = fcid
+ row_dict['flowcell_status'] = status
+
+ for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
+ lane_library = self.library[row_dict[lane+"_id"]]
+ species_id = lane_library['library_species_id']
+ lane_library['library_species'] = self.species[species_id]
+ row_dict[lane] = lane_library
+ # some useful parsing
+ run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S')
+ run_date = datetime.datetime(*run_date[:6])
+ row_dict['run_date'] = run_date
+ self.flowcells[row_dict['flowcell_id']] = row_dict
+
+ self._add_lanes_to_libraries()
+ return self.flowcells
+
+ def _parse_flowcell_id(self, flowcell_row):
+ """
+ Return flowcell id and status
+
+ We stored the status information in the flowcell id name.
+ this was dumb, but database schemas are hard to update.
+ """
+ fields = flowcell_row['flowcell_id'].split()
+ fcid = None
+ status = None
+ if len(fields) > 0:
+ fcid = fields[0]
+ if len(fields) > 1:
+ status = fields[1]
+ return fcid, status
+
+
+def flowcell_gone(cell):
+ """
+ Use a variety of heuristics to determine if the flowcell drive
+ has been deleted.
+ """
+ status = cell['flowcell_status']
+ if status is None:
+ return False
+ failures = ['failed', 'deleted', 'not run']
+ for f in failures:
+ if re.search(f, status):
+ return True
+ else:
+ return False
+
+def recoverable_drive_report(flowcells):
+ """
+ Attempt to report what flowcells are still on a hard drive
+ """
+ def format_status(status):
+ if status is None:
+ return ""
+ else:
+ return status+" "
+
+ # sort flowcells by run date
+ flowcell_list = []
+ for key, cell in flowcells.items():
+ flowcell_list.append( (cell['run_date'], key) )
+ flowcell_list.sort()
+
+ report = []
+ line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
+ line += "%(species)s"
+ for run_date, flowcell_id in flowcell_list:
+ cell = flowcells[flowcell_id]
+ if flowcell_gone(cell):
+ continue
+ for l in range(1,9):
+ lane = 'lane_%d' % (l)
+ cell_library = cell['%s_library'%(lane)]
+ fields = {
+ 'date': cell['run_date'].strftime('%y-%b-%d'),
+ 'id': cell['flowcell_id'],
+ 'lane': l,
+ 'library_name': cell_library['library_name'],
+ 'library_id': cell['%s_library_id'%(lane)],
+ 'species': cell_library['library_species']['scientific_name'],
+ 'status': format_status(cell['flowcell_status']),
+ }
+ report.append(line % (fields))
+ return os.linesep.join(report)
+
--- /dev/null
+"""
+Utility functions to make bedfiles.
+"""
+import os
+import re
+
+# map eland_result.txt sense
+sense_map = { 'F': '+', 'R': '-'}
+sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
+
+def write_bed_header(outstream, name, description):
+ """
+ Produce the headerline for a bedfile
+ """
+ # provide default track names
+ if name is None: name = "track"
+ if description is None: description = "eland result file"
+ bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
+ bed_header += os.linesep
+ outstream.write(bed_header % (name, description))
+
+def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
+ """
+ read an eland result file from instream and write a bedfile to outstream
+ """
+ # indexes into fields in eland_result.txt file
+ SEQ = 1
+ CHR = 6
+ START = 7
+ SENSE = 8
+
+ write_bed_header(outstream, name, description)
+
+ for line in instream:
+ fields = line.split()
+ # we need more than the CHR field, and it needs to match a chromosome
+ if len(fields) <= CHR or \
+ (chromosome_prefix is not None and \
+ fields[CHR][:3] != chromosome_prefix):
+ continue
+ start = fields[START]
+ stop = int(start) + len(fields[SEQ])
+ chromosome, extension = fields[CHR].split('.')
+ assert extension == "fa"
+ outstream.write('%s %s %d read 0 %s - - %s%s' % (
+ chromosome,
+ start,
+ stop,
+ sense_map[fields[SENSE]],
+ sense_color[fields[SENSE]],
+ os.linesep
+ ))
+
+
+def make_bed_from_multi_eland_stream(
+ instream,
+ outstream,
+ name,
+ description,
+ chr_prefix='chr',
+ max_reads=255
+ ):
+ """
+ read a multi eland stream and write a bedfile
+ """
+ write_bed_header(outstream, name, description)
+ parse_multi_eland(instream, outstream, chr_prefix, max_reads)
+
+def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
+
+ loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
+ other_pattern = '(?P<chr>[^:,]+)'
+ split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
+
+ for line in instream:
+ rec = line.split()
+ if len(rec) > 3:
+ # colony_id = rec[0]
+ seq = rec[1]
+ # number of matches for 0, 1, and 2 mismatches
+ # m0, m1, m2 = [int(x) for x in rec[2].split(':')]
+ compressed_reads = rec[3]
+ cur_chr = ""
+ reads = {0: [], 1: [], 2:[]}
+
+ for token in split_re.finditer(compressed_reads):
+ if token.group('chr') is not None:
+ cur_chr = token.group('chr')[:-3] # strip off .fa
+ elif token.group('fullloc') is not None:
+ matches = int(token.group('count'))
+ # only emit a bed line if
+ # our current chromosome starts with chromosome pattern
+ if chr_prefix is None or cur_chr.startswith(chr_prefix):
+ start = int(token.group('start'))
+ stop = start + len(seq)
+ orientation = token.group('dir')
+ strand = sense_map[orientation]
+ color = sense_color[orientation]
+ # build up list of reads for this record
+ reads[matches].append((cur_chr, start, stop, strand, color))
+
+ # report up to our max_read threshold reporting the fewer-mismatch
+ # matches first
+ reported_reads = 0
+ keys = [0,1,2]
+ for mismatch, read_list in ((k, reads[k]) for k in keys):
+ reported_reads += len(read_list)
+ if reported_reads <= max_reads:
+ for cur_chr, start, stop, strand, color in read_list:
+ reported_reads += 1
+ outstream.write('%s %d %d read 0 %s - - %s%s' % (
+ cur_chr,
+ start,
+ stop,
+ sense_map[orientation],
+ sense_color[orientation],
+ os.linesep
+ ))
+
+def make_description(database, flowcell_id, lane):
+ """
+ compute a bedfile name and description from the fctracker database
+ """
+ from htsworkflow.util.fctracker import fctracker
+
+ fc = fctracker(database)
+ cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
+ if len(cells) != 1:
+ raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
+ lane = int(lane)
+ if lane < 1 or lane > 8:
+ raise RuntimeError("flowcells only have lanes 1-8")
+
+ name = "%s-%s" % (flowcell_id, lane)
+
+ cell_id, cell = cells.items()[0]
+ assert cell_id == flowcell_id
+
+ cell_library_id = cell['lane_%d_library_id' %(lane,)]
+ cell_library = cell['lane_%d_library' %(lane,)]
+ description = "%s-%s" % (cell_library['library_name'], cell_library_id)
+ return name, description
--- /dev/null
+"""
+Utilities for working with unix-style mounts.
+"""
+import os
+import subprocess
+
+def list_mount_points():
+ """
+ Return list of current mount points
+
+ Note: unix-like OS specific
+ """
+ mount_points = []
+ likely_locations = ['/sbin/mount', '/bin/mount']
+ for mount in likely_locations:
+ if os.path.exists(mount):
+ p = subprocess.Popen(mount, stdout=subprocess.PIPE)
+ p.wait()
+ for l in p.stdout.readlines():
+ rec = l.split()
+ device = rec[0]
+ mount_point = rec[2]
+ assert rec[1] == 'on'
+ # looking at the output of mount on linux, osx, and
+ # sunos, the first 3 elements are always the same
+ # devicename on path
+ # everything after that displays the attributes
+ # of the mount points in wildly differing formats
+ mount_points.append(mount_point)
+ return mount_points
+ else:
+ raise RuntimeError("Couldn't find a mount executable")
+
+def is_mounted(point_to_check):
+ """
+ Return true if argument exactly matches a current mount point.
+ """
+ for mount_point in list_mount_points():
+ if point_to_check == mount_point:
+ return True
+ else:
+ return False
+
+def find_mount_point_for(pathname):
+ """
+ Find the deepest mount point pathname is located on
+ """
+ realpath = os.path.realpath(pathname)
+ mount_points = list_mount_points()
+
+ prefixes = set()
+ for current_mount in mount_points:
+ cp = os.path.commonprefix([current_mount, realpath])
+ prefixes.add((len(cp), cp))
+
+ prefixes = list(prefixes)
+ prefixes.sort()
+ if len(prefixes) == 0:
+ return None
+ else:
+ print prefixes
+ # return longest common prefix
+ return prefixes[-1][1]
+
+
--- /dev/null
+"""
+Helpful utilities for turning random names/objects into streams.
+"""
+import os
+import gzip
+import bz2
+import types
+import urllib2
+
+def isfilelike(file_ref, mode):
+ """Does file_ref have the core file operations?
+ """
+ # if mode is w/a check to make sure we writeable ops
+ # but always check to see if we can read
+ read_operations = ['read', 'readline', 'readlines']
+ write_operations = [ 'write', 'writelines' ]
+ #random_operations = [ 'seek', 'tell' ]
+ if mode[0] in ('w', 'a'):
+ for o in write_operations:
+ if not hasattr(file_ref, o):
+ return False
+ for o in read_operations:
+ if not hasattr(file_ref, o):
+ return False
+
+ return True
+
+def isurllike(file_ref, mode):
+ """
+ does file_ref look like a url?
+ (AKA does it start with protocol:// ?)
+ """
+ #what if mode is 'w'?
+ parsed = urllib2.urlparse.urlparse(file_ref)
+ schema, netloc, path, params, query, fragment = parsed
+
+ return len(schema) > 0
+
+def autoopen(file_ref, mode='r'):
+ """
+ Attempt to intelligently turn file_ref into a readable stream
+ """
+ # catch being passed a file
+ if type(file_ref) is types.FileType:
+ return file_ref
+ # does it look like a file?
+ elif isfilelike(file_ref, mode):
+ return file_ref
+ elif isurllike(file_ref, mode):
+ return urllib2.urlopen(file_ref)
+ elif os.path.splitext(file_ref)[1] == ".gz":
+ return gzip.open(file_ref, mode)
+ elif os.path.splitext(file_ref)[1] == '.bz2':
+ return bz2.BZ2File(file_ref, mode)
+ else:
+ return open(file_ref,mode)
+
--- /dev/null
+"""
+Run up to N simultanous jobs from provided of commands
+"""
+
+import logging
+from subprocess import PIPE
+import subprocess
+import select
+import sys
+import time
+
+class QueueCommands(object):
+ """
+ Queue up N commands from cmd_list, launching more jobs as the first
+ finish.
+ """
+
+ def __init__(self, cmd_list, N=0, cwd=None):
+ """
+ cmd_list is a list of elements suitable for subprocess
+ N is the number of simultanious processes to run.
+ 0 is all of them.
+
+ WARNING: this will not work on windows
+ (It depends on being able to pass local file descriptors to the
+ select call with isn't supported by the Win32 API)
+ """
+ self.to_run = cmd_list[:]
+ self.running = {}
+ self.N = N
+ self.cwd = cwd
+
+ def under_process_limit(self):
+ """
+ are we still under the total number of allowable jobs?
+ """
+ if self.N == 0:
+ return True
+
+ if len(self.running) < self.N:
+ return True
+
+ return False
+
+ def start_jobs(self):
+ """
+ Launch jobs until we have the maximum allowable running
+ (or have run out of jobs)
+ """
+ queue_log = logging.getLogger('queue')
+ queue_log.info('using %s as cwd' % (self.cwd,))
+
+ while (len(self.to_run) > 0) and self.under_process_limit():
+ queue_log.info('%d left to run', len(self.to_run))
+ cmd = self.to_run.pop(0)
+ p = subprocess.Popen(cmd, stdout=PIPE, cwd=self.cwd, shell=True)
+ self.running[p.stdout] = p
+ queue_log.info("Created process %d from %s" % (p.pid, str(cmd)))
+
+ def run(self):
+ """
+ run up to N jobs until we run out of jobs
+ """
+ queue_log = logging.getLogger('queue')
+
+ # to_run slowly gets consumed by start_jobs
+ while len(self.to_run) > 0 or len(self.running) > 0:
+ # fill any empty spots in our job queue
+ self.start_jobs()
+
+ # build a list of file descriptors
+ # fds=file desciptors
+ fds = [ x.stdout for x in self.running.values()]
+
+ # wait for something to finish
+ # wl= write list, xl=exception list (not used so get bad names)
+ read_list, wl, xl = select.select(fds, [], fds)
+
+ # for everything that might have finished...
+ for pending_fd in read_list:
+ pending = self.running[pending_fd]
+ # if it really did finish, remove it from running jobs
+ if pending.poll() is not None:
+ queue_log.info("Process %d finished [%d]",
+ pending.pid, pending.returncode)
+ del self.running[pending_fd]
+ time.sleep(1)
--- /dev/null
+import os
+import unittest
+
+try:
+ from xml.etree import ElementTree
+except ImportError, e:
+ from elementtree import ElementTree
+
+from htsworkflow.util.ethelp import indent, flatten
+
+class testETHelper(unittest.TestCase):
+ def setUp(self):
+ self.foo = '<foo><bar>asdf</bar><br/></foo>'
+ self.foo_tree = ElementTree.fromstring(self.foo)
+
+ def test_indent(self):
+ flat_foo = ElementTree.tostring(self.foo_tree)
+ self.failUnlessEqual(len(flat_foo.split('\n')), 1)
+
+ indent(self.foo_tree)
+ pretty_foo = ElementTree.tostring(self.foo_tree)
+ self.failUnlessEqual(len(pretty_foo.split('\n')), 5)
+
+ def test_flatten(self):
+ self.failUnless(flatten(self.foo_tree), 'asdf')
+
+def suite():
+ return unittest.makeSuite(testETHelper, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')
+
+
+
+
--- /dev/null
+import os
+from StringIO import StringIO
+import unittest
+
+from htsworkflow.util import makebed
+
+class testMakeBed(unittest.TestCase):
+ def test_multi_1_0_0_limit_1(self):
+ instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0')
+ out = StringIO()
+
+ makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+ self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+
+ def test_multi_1_0_0_limit_255(self):
+ instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0')
+ out = StringIO()
+
+ makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
+ self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+
+
+ def test_multi_2_0_0_limit_1(self):
+ instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
+ out = StringIO()
+
+ makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+ self.failUnlessEqual(out.len, 0)
+
+ def test_multi_2_0_0_limit_255(self):
+ instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
+ out = StringIO()
+
+ makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
+ self.failUnlessEqual(out.len, 98)
+
+ def test_multi_0_2_0_limit_1(self):
+ instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495 TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG 0:2:1 mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
+ out = StringIO()
+
+ makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
+ print out.getvalue()
+ self.failUnlessEqual(out.len, 0)
+
+def suite():
+ return unittest.makeSuite(testMakeBed, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')
+
+
--- /dev/null
+import os
+import logging
+import time
+import unittest
+
+
+from htsworkflow.util.queuecommands import QueueCommands
+
+class testQueueCommands(unittest.TestCase):
+ def setUp(self):
+ logging.basicConfig(level=logging.DEBUG,
+ format='%(asctime)s %(name)-8s %(message)s')
+
+
+
+ def test_unlimited_run(self):
+ """
+ Run everything at once
+ """
+ cmds = ['/bin/sleep 0',
+ '/bin/sleep 1',
+ '/bin/sleep 2',]
+
+ q = QueueCommands(cmds)
+ start = time.time()
+ q.run()
+ end = time.time()-start
+ # we should only take the length of the longest sleep
+ # pity I had to add a 1 second sleep
+ self.failUnless( end > 2.9 and end < 3.1,
+ "took %s seconds, exected ~3" % (end,))
+
+ def test_limited_run(self):
+ """
+ Run a limited number of jobs
+ """
+ cmds = ['/bin/sleep 1',
+ '/bin/sleep 2',
+ '/bin/sleep 3',]
+
+ q = QueueCommands(cmds, 2)
+
+ start = time.time()
+ q.run()
+ end = time.time()-start
+ # pity I had to add a 1 second sleep
+ self.failUnless( end > 5.9 and end < 6.1,
+ "took %s seconds, expected ~6" % (end,))
+
+def suite():
+ return unittest.makeSuite(testQueueCommands, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')
+
+
+
+
import os
import sys
import re
-from gaworkflow.pipeline.configure_run import *
-from gaworkflow.pipeline import retrieve_config as _rc
-from gaworkflow.pipeline.run_status import startCmdLineStatusMonitor
+from htsworkflow.pipeline.configure_run import *
+from htsworkflow.pipeline import retrieve_config as _rc
+from htsworkflow.pipeline.run_status import startCmdLineStatusMonitor
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)-8s %(message)s',
#!/usr/bin/env python
import sys
-from gaworkflow.automation.copier import main
+from htsworkflow.automation.copier import main
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
import os
import sys
-from gaworkflow.pipeline.gerald import extract_eland_sequence
+from htsworkflow.pipeline.gerald import extract_eland_sequence
def make_parser():
usage = "usage: %prog [options] infile [outfile]"
import sys
import os
-from gaworkflow.util.makebed import make_bed_from_eland_stream, make_description
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
"""
from optparse import OptionParser
import sys
-from gaworkflow.util import fctracker
+from htsworkflow.util import fctracker
def make_parser():
"""
import sys
import os
-from gaworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
+from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
def make_parser():
parser = optparse.OptionParser()
import subprocess
import sys
-from gaworkflow.pipeline import gerald
-from gaworkflow.pipeline import runfolder
+from htsworkflow.pipeline import gerald
+from htsworkflow.pipeline import runfolder
def make_query_filename(eland_obj, output_dir):
query_name = '%s_%s_eland_query.txt'
#!/usr/bin/env python
import sys
-from gaworkflow.pipeline.retrieve_config import *
-from gaworkflow.pipeline import retrieve_config
-from gaworkflow.pipeline.genome_mapper import getAvailableGenomes
-from gaworkflow.pipeline.genome_mapper import constructMapperDict
+from htsworkflow.pipeline.retrieve_config import *
+from htsworkflow.pipeline import retrieve_config
+from htsworkflow.pipeline.genome_mapper import getAvailableGenomes
+from htsworkflow.pipeline.genome_mapper import constructMapperDict
#Turn on built-in command-line parsing.
retrieve_config.DISABLE_CMDLINE = False
import optparse
import sys
-from gaworkflow.pipeline import runfolder
-from gaworkflow.pipeline.runfolder import ElementTree
+from htsworkflow.pipeline import runfolder
+from htsworkflow.pipeline.runfolder import ElementTree
def make_parser():
usage = 'usage: %prog [options] runfolder_root_dir'
#!/usr/bin/env python
import sys
-from gaworkflow.automation.runner import main
+from htsworkflow.automation.runner import main
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
#!/usr/bin/env python
import sys
-from gaworkflow.automation.spoolwatcher import main
+from htsworkflow.automation.spoolwatcher import main
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
import subprocess
import sys
-from gaworkflow.util import queuecommands
+from htsworkflow.util import queuecommands
def make_commands(run_name, lanes, site_name, destdir):
"""
from setuptools import setup
setup(
- name="gaworkflow",
+ name="htsworkflow",
description="some bots and other utilities to help deal with data from an illumina sequencer",
author="Diane Trout & Brandon King",
author_email="diane@caltech.edu",
- packages=["gaworkflow",
- "gaworkflow.pipeline",
- "gaworkflow.frontend",
- "gaworkflow.frontend.fctracker",
- "gaworkflow.frontend.eland_config"
+ packages=["htsworkflow",
+ "htsworkflow.pipeline",
+ "htsworkflow.frontend",
+ "htsworkflow.frontend.fctracker",
+ "htsworkflow.frontend.eland_config"
],
scripts=[
'scripts/configure_pipeline',
import unittest
from StringIO import StringIO
-from gaworkflow.automation import copier
+from htsworkflow.automation import copier
class testCopier(unittest.TestCase):
def test_runfolder_validate(self):