+++ /dev/null
-import ConfigParser
-import copy
-import logging
-import logging.handlers
-import os
-import re
-import subprocess
-import sys
-import time
-import traceback
-
-from benderjab import rpc
-
-def runfolder_validate(fname):
- """
- Return True if fname looks like a runfolder name
- """
- if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
- return True
- else:
- return False
-
-class rsync(object):
- def __init__(self, source, dest, pwfile):
- self.pwfile = os.path.expanduser(pwfile)
- self.cmd = ['/usr/bin/rsync', ]
- self.cmd.append('--password-file=%s' % (self.pwfile))
- self.source_base = source
- self.dest_base = dest
- self.processes = {}
- self.exit_code = None
-
- def list(self):
- """Get a directory listing"""
- args = copy.copy(self.cmd)
- args.append(self.source_base)
-
- logging.debug("Rsync cmd:" + " ".join(args))
- short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
- return self.list_filter(short_process.stdout)
-
- def list_filter(self, lines):
- """
- parse rsync directory listing
- """
- dirs_to_copy = []
- direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
- for permissions, size, filedate, filetime, filename in direntries:
- if permissions[0] == 'd':
- # hey its a directory, the first step to being something we want to
- # copy
- if re.match("[0-9]{6}", filename):
- # it starts with something that looks like a 6 digit date
- # aka good enough for me
- dirs_to_copy.append(filename)
- return dirs_to_copy
-
- def create_copy_process(self, dirname):
- args = copy.copy(self.cmd)
- # we want to copy everything
- args.append('-rlt')
- # from here
- args.append(os.path.join(self.source_base, dirname))
- # to here
- args.append(self.dest_base)
- logging.debug("Rsync cmd:" + " ".join(args))
- return subprocess.Popen(args)
-
- def copy(self):
- """
- copy any interesting looking directories over
- return list of items that we started copying.
- """
- # clean up any lingering non-running processes
- self.poll()
-
- # what's available to copy?
- dirs_to_copy = self.list()
-
- # lets start copying
- started = []
- for d in dirs_to_copy:
- process = self.processes.get(d, None)
-
- if process is None:
- # we don't have a process, so make one
- logging.info("rsyncing %s" % (d))
- self.processes[d] = self.create_copy_process(d)
- started.append(d)
- return started
-
- def poll(self):
- """
- check currently running processes to see if they're done
-
- return path roots that have finished.
- """
- for dir_key, proc_value in self.processes.items():
- retcode = proc_value.poll()
- if retcode is None:
- # process hasn't finished yet
- pass
- elif retcode == 0:
- logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
- del self.processes[dir_key]
- else:
- logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
-
- def __len__(self):
- """
- Return how many active rsync processes we currently have
-
- Call poll first to close finished processes.
- """
- return len(self.processes)
-
- def keys(self):
- """
- Return list of current run folder names
- """
- return self.processes.keys()
-
-class CopierBot(rpc.XmlRpcBot):
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # configfile = '~/.htsworkflow'
-
- super(CopierBot, self).__init__(section, configfile)
-
- # options for rsync command
- self.cfg['rsync_password_file'] = None
- self.cfg['rsync_source'] = None
- self.cfg['rsync_destination'] = None
-
- # options for reporting we're done
- self.cfg['notify_users'] = None
- self.cfg['notify_runner'] = None
-
- self.pending = []
- self.rsync = None
- self.notify_users = None
- self.notify_runner = None
-
- self.register_function(self.startCopy)
- self.register_function(self.sequencingFinished)
- self.eventTasks.append(self.update)
-
- def read_config(self, section=None, configfile=None):
- """
- read the config file
- """
- super(CopierBot, self).read_config(section, configfile)
-
- password = self._check_required_option('rsync_password_file')
- source = self._check_required_option('rsync_source')
- destination = self._check_required_option('rsync_destination')
- self.rsync = rsync(source, destination, password)
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- try:
- self.notify_runner = \
- self._parse_user_list(self.cfg['notify_runner'],
- require_resource=True)
- except bot.JIDMissingResource:
- msg = 'need a full jabber ID + resource for xml-rpc destinations'
- logging.FATAL(msg)
- raise bot.JIDMissingResource(msg)
-
- def startCopy(self, *args):
- """
- start our copy
- """
- logging.info("starting copy scan")
- started = self.rsync.copy()
- logging.info("copying:" + " ".join(started)+".")
- return started
-
- def sequencingFinished(self, runDir, *args):
- """
- The run was finished, if we're done copying, pass the message on
- """
- # close any open processes
- self.rsync.poll()
-
- # see if we're still copying
- if runfolder_validate(runDir):
- logging.info("recevied sequencing finshed for %s" % (runDir))
- self.pending.append(runDir)
- self.startCopy()
- return "PENDING"
- else:
- errmsg = "received bad runfolder name (%s)" % (runDir)
- logging.warning(errmsg)
- # maybe I should use a different error message
- raise RuntimeError(errmsg)
-
- def reportSequencingFinished(self, runDir):
- """
- Send the sequencingFinished message to the interested parties
- """
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Sequencing run %s finished' % (runDir))
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, (runDir,), 'sequencingFinished')
- logging.info("forwarding sequencingFinshed message for %s" % (runDir))
-
- def update(self, *args):
- """
- Update our current status.
- Report if we've finished copying files.
- """
- self.rsync.poll()
- for p in self.pending:
- if p not in self.rsync.keys():
- self.reportSequencingFinished(p)
- self.pending.remove(p)
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can [copy], or report current [status]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("copy", msg):
- started = self.startCopy()
- reply = u"started copying " + ", ".join(started)
- elif re.match(u"status", msg):
- msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
- for d in self.rsync.keys():
- msg.append(u" " + d)
- reply = os.linesep.join(msg)
- else:
- reply = u"I didn't understand '%s'" % (unicode(msg))
- return reply
-
-def main(args=None):
- bot = CopierBot()
- bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-from glob import glob
-import logging
-import os
-import re
-import sys
-import time
-import threading
-
-from benderjab import rpc
-
-from htsworkflow.pipelines.configure_run import *
-
-#s_fc = re.compile('FC[0-9]+')
-s_fc = re.compile('_[0-9a-zA-Z]*$')
-
-
-def _get_flowcell_from_rundir(run_dir):
- """
- Returns flowcell string based on run_dir.
- Returns None and logs error if flowcell can't be found.
- """
- junk, dirname = os.path.split(run_dir)
- mo = s_fc.search(dirname)
- if not mo:
- logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
- return None
-
- return dirname[mo.start()+1:]
-
-
-
-class Runner(rpc.XmlRpcBot):
- """
- Manage running pipeline jobs.
- """
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # self.configfile = "~/.htsworkflow"
- super(Runner, self).__init__(section, configfile)
-
- self.cfg['notify_users'] = None
- self.cfg['genome_dir'] = None
- self.cfg['base_analysis_dir'] = None
-
- self.cfg['notify_users'] = None
- self.cfg['notify_postanalysis'] = None
-
- self.conf_info_dict = {}
-
- self.register_function(self.sequencingFinished)
- #self.eventTasks.append(self.update)
-
-
- def read_config(self, section=None, configfile=None):
- super(Runner, self).read_config(section, configfile)
-
- self.genome_dir = self._check_required_option('genome_dir')
- self.base_analysis_dir = self._check_required_option('base_analysis_dir')
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- #FIXME: process notify_postpipeline cfg
-
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can send [start] a run, or report [status]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("status", msg):
- words = msg.split()
- if len(words) == 2:
- reply = self.getStatusReport(words[1])
- else:
- reply = u"Status available for: %s" \
- % (', '.join([k for k in self.conf_info_dict.keys()]))
- elif re.match(u"start", msg):
- words = msg.split()
- if len(words) == 2:
- self.sequencingFinished(words[1])
- reply = u"starting run for %s" % (words[1])
- else:
- reply = u"need runfolder name"
- elif re.match(u"path", msg):
- reply = u"My path is: " + unicode(os.environ['PATH'])
- else:
- reply = u"I didn't understand '%s'" %(msg)
-
- logging.debug("reply: " + str(reply))
- return reply
-
-
- def getStatusReport(self, fc_num):
- """
- Returns text status report for flow cell number
- """
- if fc_num not in self.conf_info_dict:
- return "No record of a %s run." % (fc_num)
-
- status = self.conf_info_dict[fc_num].status
-
- if status is None:
- return "No status information for %s yet." \
- " Probably still in configure step. Try again later." % (fc_num)
-
- output = status.statusReport()
-
- return '\n'.join(output)
-
-
- def sequencingFinished(self, run_dir):
- """
- Sequenceing (and copying) is finished, time to start pipeline
- """
- logging.debug("received sequencing finished message")
-
- # Setup config info object
- ci = ConfigInfo()
- ci.base_analysis_dir = self.base_analysis_dir
- ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)
-
- # get flowcell from run_dir name
- flowcell = _get_flowcell_from_rundir(run_dir)
-
- # Store ci object in dictionary
- self.conf_info_dict[flowcell] = ci
-
-
- # Launch the job in it's own thread and turn.
- self.launchJob(run_dir, flowcell, ci)
- return "started"
-
-
- def pipelineFinished(self, run_dir):
- # need to strip off self.watch_dir from rundir I suspect.
- logging.info("pipeline finished in" + str(run_dir))
- #pattern = self.watch_dir
- #if pattern[-1] != os.path.sep:
- # pattern += os.path.sep
- #stripped_run_dir = re.sub(pattern, "", run_dir)
- #logging.debug("stripped to " + stripped_run_dir)
-
- # Notify each user that the run has finished.
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Pipeline run %s finished' % (run_dir))
-
- #if self.notify_runner is not None:
- # for r in self.notify_runner:
- # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
- def reportMsg(self, msg):
-
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, msg)
-
-
- def _runner(self, run_dir, flowcell, conf_info):
-
- # retrieve config step
- cfg_filepath = os.path.join(conf_info.analysis_dir,
- 'config-auto.txt')
- status_retrieve_cfg = retrieve_config(conf_info,
- flowcell,
- cfg_filepath,
- self.genome_dir)
- if status_retrieve_cfg:
- logging.info("Runner: Retrieve config: success")
- self.reportMsg("Retrieve config (%s): success" % (run_dir))
- else:
- logging.error("Runner: Retrieve config: failed")
- self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
-
-
- # configure step
- if status_retrieve_cfg:
- status = configure(conf_info)
- if status:
- logging.info("Runner: Configure: success")
- self.reportMsg("Configure (%s): success" % (run_dir))
- self.reportMsg(
- os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
- )
- else:
- logging.error("Runner: Configure: failed")
- self.reportMsg("Configure (%s): FAILED" % (run_dir))
-
- #if successful, continue
- if status:
- # Setup status cmdline status monitor
- #startCmdLineStatusMonitor(ci)
-
- # running step
- print 'Running pipeline now!'
- run_status = run_pipeline(conf_info)
- if run_status is True:
- logging.info('Runner: Pipeline: success')
- self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
- else:
- logging.info('Runner: Pipeline: failed')
- self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
-
-
- def launchJob(self, run_dir, flowcell, conf_info):
- """
- Starts up a thread for running the pipeline
- """
- t = threading.Thread(target=self._runner,
- args=[run_dir, flowcell, conf_info])
- t.setDaemon(True)
- t.start()
-
-
-
-def main(args=None):
- bot = Runner()
- return bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-import logging
-import os
-import re
-import sys
-import time
-#import glob
-
-from htsworkflow.util import mount
-
-# this uses pyinotify
-import pyinotify
-from pyinotify import EventsCodes
-
-from benderjab import rpc
-
-
-class WatcherEvents(object):
- # two events need to be tracked
- # one to send startCopy
- # one to send OMG its broken
- # OMG its broken needs to stop when we've seen enough
- # cycles
- # this should be per runfolder.
- # read the xml files
- def __init__(self):
- pass
-
-
-class Handler(pyinotify.ProcessEvent):
- def __init__(self, watchmanager, bot):
- self.last_event_time = None
- self.watchmanager = watchmanager
- self.bot = bot
-
- def process_IN_CREATE(self, event):
- self.last_event_time = time.time()
- msg = "Create: %s" % os.path.join(event.path, event.name)
- if event.name.lower() == "run.completed":
- try:
- self.bot.sequencingFinished(event.path)
- except IOError, e:
- logging.error("Couldn't send sequencingFinished")
- logging.debug(msg)
-
- def process_IN_DELETE(self, event):
- logging.debug("Remove: %s" % os.path.join(event.path, event.name))
-
- def process_IN_UNMOUNT(self, event):
- pathname = os.path.join(event.path, event.name)
- logging.debug("IN_UNMOUNT: %s" % (pathname,))
- self.bot.unmount_watch()
-
-class SpoolWatcher(rpc.XmlRpcBot):
- """
- Watch a directory and send a message when another process is done writing.
-
- This monitors a directory tree using inotify (linux specific) and
- after some files having been written will send a message after <timeout>
- seconds of no file writing.
-
- (Basically when the solexa machine finishes dumping a round of data
- this'll hopefully send out a message saying hey look theres data available
-
- """
- # these params need to be in the config file
- # I wonder where I should put the documentation
- #:Parameters:
- # `watchdir` - which directory tree to monitor for modifications
- # `profile` - specify which .htsworkflow profile to use
- # `write_timeout` - how many seconds to wait for writes to finish to
- # the spool
- # `notify_timeout` - how often to timeout from notify
-
- def __init__(self, section=None, configfile=None):
- #if configfile is None:
- # self.configfile = "~/.htsworkflow"
- super(SpoolWatcher, self).__init__(section, configfile)
-
- self.cfg['watchdir'] = None
- self.cfg['write_timeout'] = 10
- self.cfg['notify_users'] = None
- self.cfg['notify_runner'] = None
-
- self.notify_timeout = 0.001
- self.wm = pyinotify.WatchManager()
- self.handler = Handler(self.wm, self)
- self.notifier = pyinotify.Notifier(self.wm, self.handler)
- self.wdd = None
- self.mount_point = None
- self.mounted = True
-
- self.notify_users = None
- self.notify_runner = None
-
- self.eventTasks.append(self.process_notify)
-
- def read_config(self, section=None, configfile=None):
- super(SpoolWatcher, self).read_config(section, configfile)
-
- self.watch_dir = self._check_required_option('watchdir')
- self.write_timeout = int(self.cfg['write_timeout'])
-
- self.notify_users = self._parse_user_list(self.cfg['notify_users'])
- try:
- self.notify_runner = \
- self._parse_user_list(self.cfg['notify_runner'],
- require_resource=True)
- except bot.JIDMissingResource:
- msg = 'need a full jabber ID + resource for xml-rpc destinations'
- logging.FATAL(msg)
- raise bot.JIDMissingResource(msg)
-
- def add_watch(self, watchdir=None):
- """
- start watching watchdir or self.watch_dir
- we're currently limited to watching one directory tree.
- """
- # the one tree limit is mostly because self.wdd is a single item
- # but managing it as a list might be a bit more annoying
- if watchdir is None:
- watchdir = self.watch_dir
- logging.info("Watching:"+str(watchdir))
-
- self.mount_point = mount.find_mount_point_for(watchdir)
-
- mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
- # rec traverses the tree and adds all the directories that are there
- # at the start.
- # auto_add will add in new directories as they are created
- self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
-
- def unmount_watch(self):
- if self.wdd is not None:
- self.wm.rm_watch(self.wdd.values())
- self.wdd = None
- self.mounted = False
-
- def process_notify(self, *args):
- # process the queue of events as explained above
- self.notifier.process_events()
- #check events waits timeout
- if self.notifier.check_events(self.notify_timeout):
- # read notified events and enqeue them
- self.notifier.read_events()
- # should we do something?
- # has something happened?
- last_event_time = self.handler.last_event_time
- if last_event_time is not None:
- time_delta = time.time() - last_event_time
- if time_delta > self.write_timeout:
- self.startCopy()
- self.handler.last_event_time = None
- # handle unmounted filesystems
- if not self.mounted:
- if mount.is_mounted(self.mount_point):
- # we've been remounted. Huzzah!
- # restart the watch
- self.add_watch()
- self.mounted = True
- logging.info(
- "%s was remounted, restarting watch" % \
- (self.mount_point)
- )
-
- def _parser(self, msg, who):
- """
- Parse xmpp chat messages
- """
- help = u"I can send [copy] message, or squencer [finished]"
- if re.match(u"help", msg):
- reply = help
- elif re.match("copy", msg):
- self.startCopy()
- reply = u"sent copy message"
- elif re.match(u"finished", msg):
- words = msg.split()
- if len(words) == 2:
- self.sequencingFinished(words[1])
- reply = u"sending sequencing finished for %s" % (words[1])
- else:
- reply = u"need runfolder name"
- else:
- reply = u"I didn't understand '%s'" %(msg)
- return reply
-
- def start(self, daemonize):
- """
- Start application
- """
- self.add_watch()
- super(SpoolWatcher, self).start(daemonize)
-
- def stop(self):
- """
- shutdown application
- """
- # destroy the inotify's instance on this interrupt (stop monitoring)
- self.notifier.stop()
- super(SpoolWatcher, self).stop()
-
- def startCopy(self):
- logging.debug("writes seem to have stopped")
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, tuple(), 'startCopy')
-
- def sequencingFinished(self, run_dir):
- # need to strip off self.watch_dir from rundir I suspect.
- logging.info("run.completed in " + str(run_dir))
- pattern = self.watch_dir
- if pattern[-1] != os.path.sep:
- pattern += os.path.sep
- stripped_run_dir = re.sub(pattern, "", run_dir)
- logging.debug("stripped to " + stripped_run_dir)
- if self.notify_users is not None:
- for u in self.notify_users:
- self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
- if self.notify_runner is not None:
- for r in self.notify_runner:
- self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
-def main(args=None):
- bot = SpoolWatcher()
- return bot.main(args)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-import unittest
-
-
-import os
-from htsworkflow.automation.copier import runfolder_validate
-
-def extract_runfolder_path(watchdir, event):
- runfolder_path = watchdir
- path = event.path
- if not path.startswith(watchdir):
- return None
-
- fragments = path[len(watchdir):].split(os.path.sep)
- for f in fragments:
- runfolder_path = os.path.join(runfolder_path, f)
- if runfolder_validate(f):
- return runfolder_path
- return None
-
-class Event(object):
- def __init__(self, path=None, name=None):
- self.path = path
- self.name = name
-
-class testRunner(unittest.TestCase):
-
- def test_extract_runfolder(self):
- watchdir = os.path.join('root', 'server', 'mount')
- runfolder = os.path.join(watchdir, '080909_HWI-EAS229_0052_1234ABCD')
- ipar = os.path.join(runfolder, 'Data', 'IPAR_1.01')
- other = os.path.join(watchdir, 'other')
-
- event = Event( path=runfolder )
- self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder)
-
- event = Event( path=ipar )
- self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder)
-
- event = Event( path=other)
- self.failUnlessEqual(extract_runfolder_path(watchdir, event), None )
-
-def suite():
- return unittest.makeSuite(testRunner,'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
+++ /dev/null
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-import datetime
-import os
-import re
-import sys
-import time
-
-if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
- # we're python 2.5
- import sqlite3
-else:
- import pysqlite2.dbapi2 as sqlite3
-
-
-class fctracker:
- """
- provide a simple way to interact with the flowcell data in fctracker.db
- """
- def __init__(self, database):
- # default to the current directory
- if database is None:
- self.database = self._guess_fctracker_path()
- else:
- self.database = database
- self.conn = sqlite3.connect(self.database)
- self._get_library()
- self._get_species()
-
- def _guess_fctracker_path(self):
- """
- Guess a few obvious places for the database
- """
- fctracker = 'fctracker.db'
- name = fctracker
- # is it in the current dir?
- if os.path.exists(name):
- return name
- name = os.path.expanduser(os.path.join('~', fctracker))
- if os.path.exists(name):
- return name
- raise RuntimeError("Can't find fctracker")
-
- def _make_dict_from_table(self, table_name, pkey_name):
- """
- Convert a django table into a dictionary indexed by the primary key.
- Yes, it really does just load everything into memory, hopefully
- we stay under a few tens of thousands of runs for a while.
- """
- table = {}
- c = self.conn.cursor()
- c.execute('select * from %s;' % (table_name))
- # extract just the field name
- description = [ f[0] for f in c.description]
- for row in c:
- row_dict = dict(zip(description, row))
- table[row_dict[pkey_name]] = row_dict
- c.close()
- return table
-
- def _add_lanes_to_libraries(self):
- """
- add flowcell/lane ids to new attribute 'lanes' in the library dictionary
- """
- library_id_re = re.compile('lane_\d_library_id')
-
- for fc_id, fc in self.flowcells.items():
- lane_library = [ (x[0][5], x[1]) for x in fc.items()
- if library_id_re.match(x[0]) ]
- for lane, library_id in lane_library:
- if not self.library[library_id].has_key('lanes'):
- self.library[library_id]['lanes'] = []
- self.library[library_id]['lanes'].append((fc_id, lane))
-
- def _get_library(self):
- """
- attach the library dictionary to the instance
- """
- self.library = self._make_dict_from_table(
- 'fctracker_library',
- 'library_id')
-
-
- def _get_species(self):
- """
- attach the species dictionary to the instance
- """
- self.species = self._make_dict_from_table(
- 'fctracker_species',
- 'id'
- )
-
- def _get_flowcells(self, where=None):
- """
- attach the flowcell dictionary to the instance
-
- where is a sql where clause. (eg "where run_date > '2008-1-1'")
- that can be used to limit what flowcells we select
- FIXME: please add sanitization code
- """
- if where is None:
- where = ""
- self.flowcells = {}
- c = self.conn.cursor()
- c.execute('select * from fctracker_flowcell %s;' % (where))
- # extract just the field name
- description = [ f[0] for f in c.description ]
- for row in c:
- row_dict = dict(zip(description, row))
- fcid, status = self._parse_flowcell_id(row_dict)
- row_dict['flowcell_id'] = fcid
- row_dict['flowcell_status'] = status
-
- for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
- lane_library = self.library[row_dict[lane+"_id"]]
- species_id = lane_library['library_species_id']
- lane_library['library_species'] = self.species[species_id]
- row_dict[lane] = lane_library
- # some useful parsing
- run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S')
- run_date = datetime.datetime(*run_date[:6])
- row_dict['run_date'] = run_date
- self.flowcells[row_dict['flowcell_id']] = row_dict
-
- self._add_lanes_to_libraries()
- return self.flowcells
-
- def _parse_flowcell_id(self, flowcell_row):
- """
- Return flowcell id and status
-
- We stored the status information in the flowcell id name.
- this was dumb, but database schemas are hard to update.
- """
- fields = flowcell_row['flowcell_id'].split()
- fcid = None
- status = None
- if len(fields) > 0:
- fcid = fields[0]
- if len(fields) > 1:
- status = fields[1]
- return fcid, status
-
-
-def flowcell_gone(cell):
- """
- Use a variety of heuristics to determine if the flowcell drive
- has been deleted.
- """
- status = cell['flowcell_status']
- if status is None:
- return False
- failures = ['failed', 'deleted', 'not run']
- for f in failures:
- if re.search(f, status):
- return True
- else:
- return False
-
-def recoverable_drive_report(flowcells):
- """
- Attempt to report what flowcells are still on a hard drive
- """
- def format_status(status):
- if status is None:
- return ""
- else:
- return status+" "
-
- # sort flowcells by run date
- flowcell_list = []
- for key, cell in flowcells.items():
- flowcell_list.append( (cell['run_date'], key) )
- flowcell_list.sort()
-
- report = []
- line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
- line += "%(species)s"
- for run_date, flowcell_id in flowcell_list:
- cell = flowcells[flowcell_id]
- if flowcell_gone(cell):
- continue
- for l in range(1,9):
- lane = 'lane_%d' % (l)
- cell_library = cell['%s_library'%(lane)]
- fields = {
- 'date': cell['run_date'].strftime('%y-%b-%d'),
- 'id': cell['flowcell_id'],
- 'lane': l,
- 'library_name': cell_library['library_name'],
- 'library_id': cell['%s_library_id'%(lane)],
- 'species': cell_library['library_species']['scientific_name'],
- 'status': format_status(cell['flowcell_status']),
- }
- report.append(line % (fields))
- return os.linesep.join(report)
-
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.copier import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import optparse
-import os
-import sys
-
-from htsworkflow.pipelines.eland import extract_eland_sequence
-
-def make_parser():
- usage = "usage: %prog [options] infile [outfile]"
-
- parser = optparse.OptionParser(usage)
- parser.add_option("-e", "--extract", dest="slice",
- default=":",
- help="provide a python slice operator to select a portion of an eland file")
- return parser
-
-def main(argv):
- parser = make_parser()
-
- (opt, args) = parser.parse_args(argv)
-
- if len(args) not in (0, 1, 2):
- parser.error('incorrect number of arguments')
-
- # get our slice coordinates
- start, end = opt.slice.split(':')
- if len(start) > 0:
- start = int(start)
- else:
- start = None
- if len(end) > 0:
- end = int(end)
- else:
- end = None
-
- # open infile
- if len(args) > 0:
- instream = open(args[0],'r')
- else:
- instream = sys.stdin
-
- if len(args) > 1:
- outstream = open(args[1],'w')
- else:
- outstream = sys.stdout
-
- extract_eland_sequence(instream, outstream, start, end)
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/python
-"""
-Convert a group of eland_result files from a sequencer run to bed files.
-"""
-from glob import glob
-import logging
-import optparse
-import sys
-import os
-
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
-
-def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
- """
- convert s_[1-8]_eland_result.txt to corresponding bed files
- """
- eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
- out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
- if len(out_files) > 0:
- raise RuntimeError("please move old bedfiles")
-
- logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
- for pathname in eland_files:
- path, name = os.path.split(pathname)
- lane = int(name[2])
- outname = 's_%d_eland_result.bed' %(lane,)
- logging.info('Converting lane %d to %s' % (lane, outname))
-
- outpathname = os.path.join(eland_dir, outname)
- # look up descriptions
- bed_name, description = make_description(database, flowcell, lane)
-
- # open files
- instream = open(pathname,'r')
- outstream = open(outpathname,'w')
-
- make_bed_from_eland_stream(
- instream, outstream, name, description, prefix
- )
-
-def make_parser():
- usage = """%prog: --flowcell <flowcell id> directory_name
-
-directory should contain a set of 8 eland result files named like
-s_[12345678]_eland_result.txt"""
-
-
- parser = optparse.OptionParser(usage)
-
- parser.add_option('-o', '--output', dest='output',
- help="destination directory for our bed files" \
- "defaults to eland directory",
- default=None)
- parser.add_option('--chromosome', dest='prefix',
- help='Set the chromosome prefix name. defaults to "chr"',
- default='chr')
- parser.add_option("--database", dest='database',
- help="specify location of fctracker database",
- default=None)
- parser.add_option("--flowcell", dest='flowcell',
- help="specify the flowcell id for this run",
- default=None)
- parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
- help='increase verbosity',
- default=False)
- return parser
-
-def main(command_line=None):
- logging.basicConfig(level=logging.WARNING)
- if command_line is None:
- command_line = sys.argv[1:]
-
- parser = make_parser()
- (opts, args) = parser.parse_args(command_line)
-
- if len(args) != 1:
- parser.error('Directory name required')
-
- eland_dir = args[0]
- if not os.path.isdir(eland_dir):
- parser.error('%s must be a directory' % (eland_dir,))
-
- if opts.flowcell is None:
- parser.error('Flowcell ID required')
-
- if opts.verbose:
- logger = logging.getLogger()
- logger.setLevel(logging.INFO)
-
- make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-"""
-Provide some quick and dirty access and reporting for the fctracker database.
-
-The advantage to this code is that it doesn't depend on django being
-installed, so it can run on machines other than the webserver.
-"""
-from optparse import OptionParser
-import sys
-
-from htsworkflow.util import fctracker
-
-def make_parser():
- """
- Make parser
- """
- parser = OptionParser()
- parser.add_option("-d", "--database", dest="database",
- help="path to the fctracker.db",
- default=None)
- parser.add_option("-w", "--where", dest="where",
- help="add a where clause",
- default=None)
- return parser
-
-def main(argv=None):
- if argv is None:
- argv = []
- parser = make_parser()
-
- opt, args = parser.parse_args(argv)
-
- fc = fctracker.fctracker(opt.database)
- cells = fc._get_flowcells(opt.where)
-
- print fctracker.recoverable_drive_report(cells)
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-"""
-Make a tree of symlinks organized by library id.
-"""
-from ConfigParser import SafeConfigParser
-from glob import glob
-import logging
-from optparse import OptionParser
-import os
-import stat
-import sys
-
-from htsworkflow.util import fctracker
-
-
-def find_lanes(flowcell_dir, flowcell_id, lane):
- lane_name = "s_%s_eland_result*" %(lane)
- pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
- lanes = glob(pattern)
- return lanes
-
-def make_long_lane_name(flowcell_dir, lane_pathname):
- """
- make a name from the eland result file name
- """
- if flowcell_dir == lane_pathname[0:len(flowcell_dir)]:
- subpath = lane_pathname[len(flowcell_dir):]
- long_name = subpath.replace(os.path.sep, "_")
- return long_name
- else:
- return None
-
-def parse_srf_directory(srf_dir):
- """
- search srf_dir for *.srf files
-
- builds a dictionary indexed by flowcell name.
- """
- flowcells = {}
- srfs = glob(os.path.join(srf_dir,'*.srf'))
- for pathname in srfs:
- path, filename = os.path.split(pathname)
- basename, ext = os.path.splitext(filename)
- record = basename.split('_')
- assert len(record) == 6
-
- site = record[0]
- date = record[1]
- machine = record[2]
- runid = record[3]
- flowcellid = record[4]
- laneid = record[5]
-
- desc = "_".join([site,date,machine,runid,flowcellid])
- flowcells[flowcellid] = desc
- return flowcells
-
-
-def carefully_make_hardlink(source, destination, dry_run=False):
- """
- Make a hard link, failing if a different link already exists
-
- Checking to see if the link already exists and is
- the same as the link we want to make.
- If the link already exists and is different, throw an error.
- """
- logging.debug("%s -> %s", source, destination)
-
- if not os.path.exists(source):
- logging.warning("%s doesn't exist", source)
- return
-
- if os.path.exists(destination):
- if os.path.samefile(source, destination):
- return
- else:
- raise IOError('%s and %s are different files' % \
- (source, destination))
-
- if dry_run: return
-
- os.link(source, destination)
- os.chmod(destination,
- stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
-
-def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run):
- """
- find eland files at different alignment lengths
- and put each of those in the file
- """
- lanes = find_lanes(flowcell_dir, flowcell_id, lane)
- for lane_pathname in lanes:
- long_name = make_long_lane_name(flowcell_dir,
- lane_pathname)
- long_pathname = os.path.join(library_path, long_name)
- carefully_make_hardlink(lane_pathname,
- long_pathname,
- dry_run)
-
-def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run):
- """
- Link srf files into our library directories.
-
- the srf files must be named:
- <site>_<date>_<machine>_<run>_<flowcellid>_<lane>.srf
- """
- srf_basename = srf_names.get(flowcell_id, None)
- if srf_basename is None:
- logging.info("srf file for %s was not found", flowcell_id)
- else:
- srf_filename = "%s_%s.srf" % (srf_basename, lane)
- source = os.path.join(srf_dir, srf_filename)
- destination = os.path.join(library_path, srf_filename)
- carefully_make_hardlink(source, destination, dry_run)
-
-
-def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir,
- dry_run=False):
- """
- Iterate over the library
- """
- library_dir = os.path.normpath(library_dir) + os.path.sep
- flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep
- srfs_dir = os.path.normpath(srfs_dir) + os.path.sep
-
- srf_names = parse_srf_directory(srfs_dir)
-
- for lib_id, lib in fcdb.library.items():
- library_path = os.path.join(library_dir, str(lib_id))
- if not os.path.exists(library_path):
- os.mkdir(library_path)
-
- for flowcell_id, lane in lib.get('lanes', []):
- link_all_eland_lanes(library_path,
- flowcell_dir,
- flowcell_id,
- lane,
- dry_run)
-
- link_srf_lanes(srf_names,
- library_path,
- srfs_dir,
- flowcell_id,
- lane,
- dry_run)
-
-def make_parser():
- """
- Make parser
- """
- parser = OptionParser()
- parser.add_option('-c', '--config', default=None,
- help='path to a configuration file containing a '
- 'sequence archive section')
-
- parser.add_option("-d", "--database", dest="database",
- help="path to the fctracker.db",
- default=None)
- parser.add_option('-a', '--sequence-archive', default=None,
- help='path to where the sequence archive lives')
- parser.add_option("-w", "--where", dest="where",
- help="add a where clause",
- default=None)
-
- parser.add_option("--dry-run", dest="dry_run", action="store_true",
- default=False,
- help="Don't modify the filesystem")
- return parser
-
-def main(argv=None):
- logging.basicConfig(level=logging.INFO)
-
- FRONTEND_NAME = 'frontend'
- SECTION_NAME = 'sequence_archive'
- DATABASE_OPT = 'database_name'
- ARCHIVE_OPT = 'archive_path'
-
- if argv is None:
- argv = []
- parser = make_parser()
-
- # parse command line arguments
- opt, args = parser.parse_args(argv)
-
- # figure out what config file to read
- config_path = [os.path.expanduser('~/.htsworkflow.ini'),
- '/etc/htsworkflow.ini']
- if opt.config is not None:
- config_path = [opt.config]
-
- # parse options from config file
- config_file = SafeConfigParser()
- config_file.read(config_path)
-
- # load defaults from config file if not overriden by the command line
- print opt.database
- if opt.database is None and \
- config_file.has_option(FRONTEND_NAME, DATABASE_OPT):
- opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT)
-
- if opt.sequence_archive is None and \
- config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
- opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
-
- # complain if critical things are missing
- if opt.database is None:
- parser.error('Need location of htsworkflow frontend database')
-
- if opt.sequence_archive is None:
- parser.error('Need the root path for the sequence archive')
-
- fcdb = fctracker.fctracker(opt.database)
- cells = fcdb._get_flowcells(opt.where)
-
- library_dir = os.path.join(opt.sequence_archive, 'libraries')
- flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells')
- srfs_dir = os.path.join(opt.sequence_archive, 'srfs')
- make_library_tree(fcdb,
- library_dir, flowcell_dir, srfs_dir,
- opt.dry_run)
-
- return 0
-
-if __name__ == "__main__":
- rv = main(sys.argv[1:])
- # sys.exit(rv)
+++ /dev/null
-#!/usr/bin/python
-import optparse
-import sys
-import os
-
-from htsworkflow.util.opener import autoopen
-from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
-
-def make_parser():
- parser = optparse.OptionParser()
- parser.add_option('-e', '--eland', dest='inname',
- help='specify input eland filename')
- parser.add_option('-b', '--bed', dest='outname',
- help='specify output befilename')
- parser.add_option('-n', '--name', dest='name',
- help='specify the track (short) name.',
- default=None)
- parser.add_option('-d', '--description', dest='description',
- help='specify the track description',
- default=None)
- parser.add_option('--chromosome', dest='prefix',
- help='Set the chromosome prefix name. defaults to "chr"',
- default='chr')
- parser.add_option("--database", dest='database',
- help="specify location of fctracker database",
- default=None)
- parser.add_option("--flowcell", dest='flowcell',
- help="compute name and description from database using flowcell id",
- default=None)
- parser.add_option("--lane", dest='lane',
- help='specify which lane to use when retrieving description from database',
- default=None)
-
- multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
-
- multi.add_option('-m', '--multi', action='store_true',
- help='Enable parsing multi-read eland files',
- default=False)
- multi.add_option('--reads', type='int',
- help='limit reporting multi reads to this many reads'
- '(most usefully --reads=1 will turn a multi-read '
- 'file into a single read file)',
- default=255)
- parser.add_option_group(multi)
-
- return parser
-
-def main(command_line=None):
- instream = None
- outstream = None
-
- if command_line is None:
- command_line = sys.argv[1:]
-
- parser = make_parser()
- (options, args) = parser.parse_args(command_line)
-
- if options.inname is None:
- parser.error("Need eland input file name")
- return 1
-
- if options.inname == '-':
- instream = sys.stdin
- elif os.path.exists(options.inname):
- instream = autoopen(options.inname, 'r')
- else:
- parser.error('%s was not found' % (options.inname))
- return 1
-
- # figure out name for output file
- if options.outname is None:
- # if outname wasn't defined, and we're reading from stdout
- if instream is sys.stdin:
- # write to stdout
- outstream = sys.stdout
- else:
- # if there's a name write to name.bed
- options.outname = os.path.splitext(options.inname)[0]+'.bed'
- print >>sys.stderr, "defaulting to outputname", options.outname
- elif options.outname == '-':
- outstream = sys.stdout
-
- if outstream is None:
- if os.path.exists(options.outname):
- parser.error("not overwriting %s" % (options.outname))
- return 1
- else:
- outstream = open(options.outname, 'w')
-
- if options.flowcell is not None and options.lane is not None:
- # get our name/description out of the database
- name, description = make_description(
- options.database, options.flowcell, options.lane
- )
- else:
- name = options.name
- description = options.description
-
- if options.multi:
- make_bed_from_multi_eland_stream(instream, outstream,
- name, description,
- options.prefix,
- options.reads)
-
- else:
- make_bed_from_eland_stream(instream, outstream,
- name, description,
- options.prefix)
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
-
+++ /dev/null
-#!/usr/bin/env python
-
-import logging
-from optparse import OptionParser
-import os
-import subprocess
-import sys
-
-from htsworkflow.pipelines import gerald
-from htsworkflow.pipelines.eland import extract_eland_sequence
-from htsworkflow.pipelines import runfolder
-
-def make_query_filename(eland_obj, output_dir):
- query_name = '%s_%s_eland_query.txt'
- query_name %= (eland_obj.sample_name, eland_obj.lane_id)
-
- query_pathname = os.path.join(output_dir, query_name)
-
- if os.path.exists(query_pathname):
- logging.warn("overwriting %s" % (query_pathname,))
-
- return query_pathname
-
-def make_result_filename(eland_obj, output_dir):
- result_name = '%s_%s_eland_result.txt'
- result_name %= (eland_obj.sample_name, eland_obj.lane_id)
-
- result_pathname = os.path.join(output_dir, result_name)
-
- if os.path.exists(result_pathname):
- logging.warn("overwriting %s" % (result_pathname,))
-
- return result_pathname
-
-def extract_sequence(inpathname, query_pathname, length, dry_run=False):
- logging.info('extracting %d bases' %(length,))
- logging.info('extracting from %s' %(inpathname,))
- logging.info('extracting to %s' %(query_pathname,))
-
- if not dry_run:
- try:
- instream = open(inpathname, 'r')
- outstream = open(query_pathname, 'w')
- extract_eland_sequence(instream, outstream, 0, length)
- finally:
- outstream.close()
- instream.close()
-
-def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False):
- cmdline = ['eland_%d' % (length,), query_name, genome, result_name]
- if multi:
- cmdline += ['--multi']
-
- logging.info('running eland: ' + " ".join(cmdline))
- if not dry_run:
- return subprocess.Popen(cmdline)
- else:
- return None
-
-
-def rerun(gerald_dir, output_dir, length=25, dry_run=False):
- """
- look for eland files in gerald_dir and write a subset to output_dir
- """
- logging.info("Extracting %d bp from files in %s" % (length, gerald_dir))
- g = gerald.gerald(gerald_dir)
-
- # this will only work if we're only missing the last dir in output_dir
- if not os.path.exists(output_dir):
- logging.info("Making %s" %(output_dir,))
- if not dry_run: os.mkdir(output_dir)
-
- processes = []
- for lane_id, lane_param in g.lanes.items():
- eland = g.eland_results[lane_id]
-
- inpathname = eland.pathname
- query_pathname = make_query_filename(eland, output_dir)
- result_pathname = make_result_filename(eland, output_dir)
-
- extract_sequence(inpathname, query_pathname, length, dry_run=dry_run)
-
- p = run_eland(length,
- query_pathname,
- lane_param.eland_genome,
- result_pathname,
- dry_run=dry_run)
- if p is not None:
- processes.append(p)
-
- for p in processes:
- p.wait()
-
-def make_parser():
- usage = '%prog: [options] runfolder'
-
- parser = OptionParser(usage)
-
- parser.add_option('--gerald',
- help='specify location of GERALD directory',
- default=None)
- parser.add_option('-o', '--output',
- help='specify output location of files',
- default=None)
- parser.add_option('-l', '--read-length', type='int',
- help='specify new eland length',
- dest='length',
- default=25)
- parser.add_option('--dry-run', action='store_true',
- help='only pretend to run',
- default=False)
- parser.add_option('-v', '--verbose', action='store_true',
- help='increase verbosity',
- default=False)
-
- return parser
-
-
-def main(cmdline=None):
- logging.basicConfig(level=logging.WARNING)
-
- parser = make_parser()
- opts, args = parser.parse_args(cmdline)
-
- if opts.length < 16 or opts.length > 32:
- parser.error("eland can only process reads in the range 16-32")
-
- if len(args) > 1:
- parser.error("Can only process one runfolder directory")
- elif len(args) == 1:
- runs = runfolder.get_runs(args[0])
- if len(runs) != 1:
- parser.error("Not a runfolder")
- opts.gerald = runs[0].gerald.pathname
- if opts.output is None:
- opts.output = os.path.join(
- runs[0].pathname,
- 'Data',
- # pythons 0..n ==> elands 1..n+1
- 'C1-%d' % (opts.length+1,)
- )
-
- elif opts.gerald is None:
- parser.error("need gerald directory")
-
- if opts.output is None:
- parser.error("specify location for the new eland files")
-
- if opts.verbose:
- root_logger = logging.getLogger()
- root_logger.setLevel(logging.INFO)
-
- rerun(opts.gerald, opts.output, opts.length, dry_run=opts.dry_run)
-
- return 0
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.runner import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-#!/usr/bin/env python
-import sys
-from htsworkflow.automation.spoolwatcher import main
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-import unittest
-
-from StringIO import StringIO
-from htsworkflow.automation import copier
-
-class testCopier(unittest.TestCase):
- def test_runfolder_validate(self):
- self.failUnlessEqual(copier.runfolder_validate(""), False)
- self.failUnlessEqual(copier.runfolder_validate("1345_23"), False)
- self.failUnlessEqual(copier.runfolder_validate("123456_asdf-$23'"), False)
- self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44"), True)
- self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44 "), False)
-
- def test_empty_config(self):
- cfg = StringIO("""[fake]
-something: unrelated
-""")
- bot = copier.CopierBot('fake', configfile=cfg)
- self.failUnlessRaises(RuntimeError, bot.read_config)
-
- def test_full_config(self):
- cfg = StringIO("""[copier]
-jid: copier@example.fake
-password: badpassword
-authorized_users: user1@example.fake user2@example.fake
-rsync_password_file: ~/.sequencer
-rsync_source: /tmp/sequencer_source
-rsync_destination: /tmp/sequencer_destination
-notify_users: user3@example.fake
-# who to run to
-#runner:
-""")
- c = copier.CopierBot("copier", configfile=cfg)
- c.read_config()
- self.failUnlessEqual(c.jid, 'copier@example.fake')
- self.failUnlessEqual(c.cfg['password'], 'badpassword')
- self.failUnlessEqual(len(c.authorized_users), 2)
- self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake')
- self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake')
- self.failUnlessEqual(c.rsync.source_base, '/tmp/sequencer_source')
- self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination')
- self.failUnlessEqual(len(c.notify_users), 1)
- self.failUnlessEqual(c.notify_users[0], 'user3@example.fake')
-
- def test_dirlist_filter(self):
- """
- test our dir listing parser
- """
- # everyone should have a root dir, and since we're not
- # currently writing files... it should all be good
- r = copier.rsync('/', '/', '/')
-
- listing = [
- 'drwxrwxr-x 0 2007/12/29 12:34:56 071229_USI-EAS229_001_FC1234\n',
- '-rwxrw-r-- 123268 2007/12/29 17:39:31 2038EAAXX.rtf\n',
- '-rwxrw-r-- 6 2007/12/29 15:10:29 New Text Document.txt\n',
- ]
-
- result = r.list_filter(listing)
- self.failUnlessEqual(len(result), 1)
- self.failUnlessEqual(result[0][-1], '4')
-
-def suite():
- return unittest.makeSuite(testCopier,'test')
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
+++ /dev/null
-#!/usr/bin/env python
-
-"""
-Build a fake directory tree for testing rsync management code.
-"""
-
-import os
-import random
-
-def make_random_string(length=8):
- """Make a random string, length characters long
- """
- symbols = "abcdefhijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
- name = []
- for i in xrange(length):
- name.append(random.choice(symbols))
- return "".join(name)
-
-def make_file(pathname):
- """Make a file with some random stuff in it
- """
- stream = open(pathname,'w')
- stream.write(make_random_string(16))
- stream.close()
-
-def make_tree(root, depth=3, directories=5, files=10):
- """
- Make a tree of random directories and files
-
- depth is how many levels of subdirectories
- directories is how many directories each subdirectory should have
- files is how many files to create in each directory
- """
- if not os.path.exists(root):
- os.mkdir(root)
-
- paths = []
- # make files
- for i in range(files):
- name = make_random_string()
- paths.append(name)
- pathname = os.path.join(root, name)
- make_file(pathname)
-
- # make subdirectories if we still have some depth to go
- if depth > 0:
- for i in range(directories):
- name = make_random_string()
- # paths.append(name)
- pathname = os.path.join(root, name)
- subpaths = make_tree(pathname, depth-1, directories, files)
- paths.extend([ os.path.join(name, x) for x in subpaths ])
-
- return paths
-
-def generate_paths(root):
- """Make a list of relative paths like generated by make_tree
- """
- paths = []
- for curdir, subdirs, files in os.walk(root):
- paths.extend([ os.path.join(curdir, f) for f in files ])
-
- # an inefficient way of getting the correct common prefix
- # (e.g. root might not have a trailing /)
- common_root = os.path.commonprefix(paths)
- common_len = len(common_root)
- return [ p[common_len:] for p in paths ]
-
-def compare_tree(root, paths, verbose=False):
- """Make sure the tree matches our relative list of paths
- """
- # what we find when we look
- experimental_set = set(generate_paths(root))
- # what we expect
- theoretical_set = set(paths)
- # true if the difference of the two sets is the empty set
- difference = experimental_set - theoretical_set
- issame = (len(difference) == 0)
- if verbose and not issame:
- print difference
- return issame