From 2c6dd00e6b5faee38653392a0411c57658a223c4 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Sat, 14 Feb 2009 00:33:23 +0000 Subject: [PATCH] remove all caltech pipeline specific code --- htsworkflow/automation/__init__.py | 0 htsworkflow/automation/copier.py | 245 --------------------- htsworkflow/automation/runner.py | 224 ------------------- htsworkflow/automation/spoolwatcher.py | 229 ------------------- htsworkflow/automation/test/test_runner.py | 46 ---- htsworkflow/util/fctracker.py | 201 ----------------- scripts/copier | 6 - scripts/elandseq | 51 ----- scripts/gerald2bed.py | 96 -------- scripts/library.py | 39 ---- scripts/make-library-tree | 225 ------------------- scripts/makebed | 113 ---------- scripts/rerun_eland.py | 158 ------------- scripts/runner | 6 - scripts/spoolwatcher | 6 - test/test_copier.py | 67 ------ test/tree.py | 81 ------- 17 files changed, 1793 deletions(-) delete mode 100644 htsworkflow/automation/__init__.py delete mode 100644 htsworkflow/automation/copier.py delete mode 100644 htsworkflow/automation/runner.py delete mode 100644 htsworkflow/automation/spoolwatcher.py delete mode 100644 htsworkflow/automation/test/test_runner.py delete mode 100644 htsworkflow/util/fctracker.py delete mode 100644 scripts/copier delete mode 100755 scripts/elandseq delete mode 100644 scripts/gerald2bed.py delete mode 100644 scripts/library.py delete mode 100644 scripts/make-library-tree delete mode 100755 scripts/makebed delete mode 100644 scripts/rerun_eland.py delete mode 100644 scripts/runner delete mode 100644 scripts/spoolwatcher delete mode 100644 test/test_copier.py delete mode 100644 test/tree.py diff --git a/htsworkflow/automation/__init__.py b/htsworkflow/automation/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/htsworkflow/automation/copier.py b/htsworkflow/automation/copier.py deleted file mode 100644 index 572cfb3..0000000 --- a/htsworkflow/automation/copier.py +++ /dev/null @@ -1,245 +0,0 @@ -import ConfigParser -import copy -import logging -import logging.handlers -import os -import re -import subprocess -import sys -import time -import traceback - -from benderjab import rpc - -def runfolder_validate(fname): - """ - Return True if fname looks like a runfolder name - """ - if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname): - return True - else: - return False - -class rsync(object): - def __init__(self, source, dest, pwfile): - self.pwfile = os.path.expanduser(pwfile) - self.cmd = ['/usr/bin/rsync', ] - self.cmd.append('--password-file=%s' % (self.pwfile)) - self.source_base = source - self.dest_base = dest - self.processes = {} - self.exit_code = None - - def list(self): - """Get a directory listing""" - args = copy.copy(self.cmd) - args.append(self.source_base) - - logging.debug("Rsync cmd:" + " ".join(args)) - short_process = subprocess.Popen(args, stdout=subprocess.PIPE) - return self.list_filter(short_process.stdout) - - def list_filter(self, lines): - """ - parse rsync directory listing - """ - dirs_to_copy = [] - direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ] - for permissions, size, filedate, filetime, filename in direntries: - if permissions[0] == 'd': - # hey its a directory, the first step to being something we want to - # copy - if re.match("[0-9]{6}", filename): - # it starts with something that looks like a 6 digit date - # aka good enough for me - dirs_to_copy.append(filename) - return dirs_to_copy - - def create_copy_process(self, dirname): - args = copy.copy(self.cmd) - # we want to copy everything - args.append('-rlt') - # from here - args.append(os.path.join(self.source_base, dirname)) - # to here - args.append(self.dest_base) - logging.debug("Rsync cmd:" + " ".join(args)) - return subprocess.Popen(args) - - def copy(self): - """ - copy any interesting looking directories over - return list of items that we started copying. - """ - # clean up any lingering non-running processes - self.poll() - - # what's available to copy? - dirs_to_copy = self.list() - - # lets start copying - started = [] - for d in dirs_to_copy: - process = self.processes.get(d, None) - - if process is None: - # we don't have a process, so make one - logging.info("rsyncing %s" % (d)) - self.processes[d] = self.create_copy_process(d) - started.append(d) - return started - - def poll(self): - """ - check currently running processes to see if they're done - - return path roots that have finished. - """ - for dir_key, proc_value in self.processes.items(): - retcode = proc_value.poll() - if retcode is None: - # process hasn't finished yet - pass - elif retcode == 0: - logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode)) - del self.processes[dir_key] - else: - logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode)) - - def __len__(self): - """ - Return how many active rsync processes we currently have - - Call poll first to close finished processes. - """ - return len(self.processes) - - def keys(self): - """ - Return list of current run folder names - """ - return self.processes.keys() - -class CopierBot(rpc.XmlRpcBot): - def __init__(self, section=None, configfile=None): - #if configfile is None: - # configfile = '~/.htsworkflow' - - super(CopierBot, self).__init__(section, configfile) - - # options for rsync command - self.cfg['rsync_password_file'] = None - self.cfg['rsync_source'] = None - self.cfg['rsync_destination'] = None - - # options for reporting we're done - self.cfg['notify_users'] = None - self.cfg['notify_runner'] = None - - self.pending = [] - self.rsync = None - self.notify_users = None - self.notify_runner = None - - self.register_function(self.startCopy) - self.register_function(self.sequencingFinished) - self.eventTasks.append(self.update) - - def read_config(self, section=None, configfile=None): - """ - read the config file - """ - super(CopierBot, self).read_config(section, configfile) - - password = self._check_required_option('rsync_password_file') - source = self._check_required_option('rsync_source') - destination = self._check_required_option('rsync_destination') - self.rsync = rsync(source, destination, password) - - self.notify_users = self._parse_user_list(self.cfg['notify_users']) - try: - self.notify_runner = \ - self._parse_user_list(self.cfg['notify_runner'], - require_resource=True) - except bot.JIDMissingResource: - msg = 'need a full jabber ID + resource for xml-rpc destinations' - logging.FATAL(msg) - raise bot.JIDMissingResource(msg) - - def startCopy(self, *args): - """ - start our copy - """ - logging.info("starting copy scan") - started = self.rsync.copy() - logging.info("copying:" + " ".join(started)+".") - return started - - def sequencingFinished(self, runDir, *args): - """ - The run was finished, if we're done copying, pass the message on - """ - # close any open processes - self.rsync.poll() - - # see if we're still copying - if runfolder_validate(runDir): - logging.info("recevied sequencing finshed for %s" % (runDir)) - self.pending.append(runDir) - self.startCopy() - return "PENDING" - else: - errmsg = "received bad runfolder name (%s)" % (runDir) - logging.warning(errmsg) - # maybe I should use a different error message - raise RuntimeError(errmsg) - - def reportSequencingFinished(self, runDir): - """ - Send the sequencingFinished message to the interested parties - """ - if self.notify_users is not None: - for u in self.notify_users: - self.send(u, 'Sequencing run %s finished' % (runDir)) - if self.notify_runner is not None: - for r in self.notify_runner: - self.rpc_send(r, (runDir,), 'sequencingFinished') - logging.info("forwarding sequencingFinshed message for %s" % (runDir)) - - def update(self, *args): - """ - Update our current status. - Report if we've finished copying files. - """ - self.rsync.poll() - for p in self.pending: - if p not in self.rsync.keys(): - self.reportSequencingFinished(p) - self.pending.remove(p) - - def _parser(self, msg, who): - """ - Parse xmpp chat messages - """ - help = u"I can [copy], or report current [status]" - if re.match(u"help", msg): - reply = help - elif re.match("copy", msg): - started = self.startCopy() - reply = u"started copying " + ", ".join(started) - elif re.match(u"status", msg): - msg = [u"Currently %d rsync processes are running." % (len(self.rsync))] - for d in self.rsync.keys(): - msg.append(u" " + d) - reply = os.linesep.join(msg) - else: - reply = u"I didn't understand '%s'" % (unicode(msg)) - return reply - -def main(args=None): - bot = CopierBot() - bot.main(args) - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/htsworkflow/automation/runner.py b/htsworkflow/automation/runner.py deleted file mode 100644 index 45d4ffc..0000000 --- a/htsworkflow/automation/runner.py +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python -from glob import glob -import logging -import os -import re -import sys -import time -import threading - -from benderjab import rpc - -from htsworkflow.pipelines.configure_run import * - -#s_fc = re.compile('FC[0-9]+') -s_fc = re.compile('_[0-9a-zA-Z]*$') - - -def _get_flowcell_from_rundir(run_dir): - """ - Returns flowcell string based on run_dir. - Returns None and logs error if flowcell can't be found. - """ - junk, dirname = os.path.split(run_dir) - mo = s_fc.search(dirname) - if not mo: - logging.error('RunDir 2 FlowCell error: %s' % (run_dir)) - return None - - return dirname[mo.start()+1:] - - - -class Runner(rpc.XmlRpcBot): - """ - Manage running pipeline jobs. - """ - def __init__(self, section=None, configfile=None): - #if configfile is None: - # self.configfile = "~/.htsworkflow" - super(Runner, self).__init__(section, configfile) - - self.cfg['notify_users'] = None - self.cfg['genome_dir'] = None - self.cfg['base_analysis_dir'] = None - - self.cfg['notify_users'] = None - self.cfg['notify_postanalysis'] = None - - self.conf_info_dict = {} - - self.register_function(self.sequencingFinished) - #self.eventTasks.append(self.update) - - - def read_config(self, section=None, configfile=None): - super(Runner, self).read_config(section, configfile) - - self.genome_dir = self._check_required_option('genome_dir') - self.base_analysis_dir = self._check_required_option('base_analysis_dir') - - self.notify_users = self._parse_user_list(self.cfg['notify_users']) - #FIXME: process notify_postpipeline cfg - - - def _parser(self, msg, who): - """ - Parse xmpp chat messages - """ - help = u"I can send [start] a run, or report [status]" - if re.match(u"help", msg): - reply = help - elif re.match("status", msg): - words = msg.split() - if len(words) == 2: - reply = self.getStatusReport(words[1]) - else: - reply = u"Status available for: %s" \ - % (', '.join([k for k in self.conf_info_dict.keys()])) - elif re.match(u"start", msg): - words = msg.split() - if len(words) == 2: - self.sequencingFinished(words[1]) - reply = u"starting run for %s" % (words[1]) - else: - reply = u"need runfolder name" - elif re.match(u"path", msg): - reply = u"My path is: " + unicode(os.environ['PATH']) - else: - reply = u"I didn't understand '%s'" %(msg) - - logging.debug("reply: " + str(reply)) - return reply - - - def getStatusReport(self, fc_num): - """ - Returns text status report for flow cell number - """ - if fc_num not in self.conf_info_dict: - return "No record of a %s run." % (fc_num) - - status = self.conf_info_dict[fc_num].status - - if status is None: - return "No status information for %s yet." \ - " Probably still in configure step. Try again later." % (fc_num) - - output = status.statusReport() - - return '\n'.join(output) - - - def sequencingFinished(self, run_dir): - """ - Sequenceing (and copying) is finished, time to start pipeline - """ - logging.debug("received sequencing finished message") - - # Setup config info object - ci = ConfigInfo() - ci.base_analysis_dir = self.base_analysis_dir - ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir) - - # get flowcell from run_dir name - flowcell = _get_flowcell_from_rundir(run_dir) - - # Store ci object in dictionary - self.conf_info_dict[flowcell] = ci - - - # Launch the job in it's own thread and turn. - self.launchJob(run_dir, flowcell, ci) - return "started" - - - def pipelineFinished(self, run_dir): - # need to strip off self.watch_dir from rundir I suspect. - logging.info("pipeline finished in" + str(run_dir)) - #pattern = self.watch_dir - #if pattern[-1] != os.path.sep: - # pattern += os.path.sep - #stripped_run_dir = re.sub(pattern, "", run_dir) - #logging.debug("stripped to " + stripped_run_dir) - - # Notify each user that the run has finished. - if self.notify_users is not None: - for u in self.notify_users: - self.send(u, 'Pipeline run %s finished' % (run_dir)) - - #if self.notify_runner is not None: - # for r in self.notify_runner: - # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished') - - def reportMsg(self, msg): - - if self.notify_users is not None: - for u in self.notify_users: - self.send(u, msg) - - - def _runner(self, run_dir, flowcell, conf_info): - - # retrieve config step - cfg_filepath = os.path.join(conf_info.analysis_dir, - 'config-auto.txt') - status_retrieve_cfg = retrieve_config(conf_info, - flowcell, - cfg_filepath, - self.genome_dir) - if status_retrieve_cfg: - logging.info("Runner: Retrieve config: success") - self.reportMsg("Retrieve config (%s): success" % (run_dir)) - else: - logging.error("Runner: Retrieve config: failed") - self.reportMsg("Retrieve config (%s): FAILED" % (run_dir)) - - - # configure step - if status_retrieve_cfg: - status = configure(conf_info) - if status: - logging.info("Runner: Configure: success") - self.reportMsg("Configure (%s): success" % (run_dir)) - self.reportMsg( - os.linesep.join(glob(os.path.join(run_dir,'Data','C*'))) - ) - else: - logging.error("Runner: Configure: failed") - self.reportMsg("Configure (%s): FAILED" % (run_dir)) - - #if successful, continue - if status: - # Setup status cmdline status monitor - #startCmdLineStatusMonitor(ci) - - # running step - print 'Running pipeline now!' - run_status = run_pipeline(conf_info) - if run_status is True: - logging.info('Runner: Pipeline: success') - self.reportMsg("Pipeline run (%s): Finished" % (run_dir,)) - else: - logging.info('Runner: Pipeline: failed') - self.reportMsg("Pipeline run (%s): FAILED" % (run_dir)) - - - def launchJob(self, run_dir, flowcell, conf_info): - """ - Starts up a thread for running the pipeline - """ - t = threading.Thread(target=self._runner, - args=[run_dir, flowcell, conf_info]) - t.setDaemon(True) - t.start() - - - -def main(args=None): - bot = Runner() - return bot.main(args) - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/htsworkflow/automation/spoolwatcher.py b/htsworkflow/automation/spoolwatcher.py deleted file mode 100644 index 2a57535..0000000 --- a/htsworkflow/automation/spoolwatcher.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python -import logging -import os -import re -import sys -import time -#import glob - -from htsworkflow.util import mount - -# this uses pyinotify -import pyinotify -from pyinotify import EventsCodes - -from benderjab import rpc - - -class WatcherEvents(object): - # two events need to be tracked - # one to send startCopy - # one to send OMG its broken - # OMG its broken needs to stop when we've seen enough - # cycles - # this should be per runfolder. - # read the xml files - def __init__(self): - pass - - -class Handler(pyinotify.ProcessEvent): - def __init__(self, watchmanager, bot): - self.last_event_time = None - self.watchmanager = watchmanager - self.bot = bot - - def process_IN_CREATE(self, event): - self.last_event_time = time.time() - msg = "Create: %s" % os.path.join(event.path, event.name) - if event.name.lower() == "run.completed": - try: - self.bot.sequencingFinished(event.path) - except IOError, e: - logging.error("Couldn't send sequencingFinished") - logging.debug(msg) - - def process_IN_DELETE(self, event): - logging.debug("Remove: %s" % os.path.join(event.path, event.name)) - - def process_IN_UNMOUNT(self, event): - pathname = os.path.join(event.path, event.name) - logging.debug("IN_UNMOUNT: %s" % (pathname,)) - self.bot.unmount_watch() - -class SpoolWatcher(rpc.XmlRpcBot): - """ - Watch a directory and send a message when another process is done writing. - - This monitors a directory tree using inotify (linux specific) and - after some files having been written will send a message after - seconds of no file writing. - - (Basically when the solexa machine finishes dumping a round of data - this'll hopefully send out a message saying hey look theres data available - - """ - # these params need to be in the config file - # I wonder where I should put the documentation - #:Parameters: - # `watchdir` - which directory tree to monitor for modifications - # `profile` - specify which .htsworkflow profile to use - # `write_timeout` - how many seconds to wait for writes to finish to - # the spool - # `notify_timeout` - how often to timeout from notify - - def __init__(self, section=None, configfile=None): - #if configfile is None: - # self.configfile = "~/.htsworkflow" - super(SpoolWatcher, self).__init__(section, configfile) - - self.cfg['watchdir'] = None - self.cfg['write_timeout'] = 10 - self.cfg['notify_users'] = None - self.cfg['notify_runner'] = None - - self.notify_timeout = 0.001 - self.wm = pyinotify.WatchManager() - self.handler = Handler(self.wm, self) - self.notifier = pyinotify.Notifier(self.wm, self.handler) - self.wdd = None - self.mount_point = None - self.mounted = True - - self.notify_users = None - self.notify_runner = None - - self.eventTasks.append(self.process_notify) - - def read_config(self, section=None, configfile=None): - super(SpoolWatcher, self).read_config(section, configfile) - - self.watch_dir = self._check_required_option('watchdir') - self.write_timeout = int(self.cfg['write_timeout']) - - self.notify_users = self._parse_user_list(self.cfg['notify_users']) - try: - self.notify_runner = \ - self._parse_user_list(self.cfg['notify_runner'], - require_resource=True) - except bot.JIDMissingResource: - msg = 'need a full jabber ID + resource for xml-rpc destinations' - logging.FATAL(msg) - raise bot.JIDMissingResource(msg) - - def add_watch(self, watchdir=None): - """ - start watching watchdir or self.watch_dir - we're currently limited to watching one directory tree. - """ - # the one tree limit is mostly because self.wdd is a single item - # but managing it as a list might be a bit more annoying - if watchdir is None: - watchdir = self.watch_dir - logging.info("Watching:"+str(watchdir)) - - self.mount_point = mount.find_mount_point_for(watchdir) - - mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT - # rec traverses the tree and adds all the directories that are there - # at the start. - # auto_add will add in new directories as they are created - self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True) - - def unmount_watch(self): - if self.wdd is not None: - self.wm.rm_watch(self.wdd.values()) - self.wdd = None - self.mounted = False - - def process_notify(self, *args): - # process the queue of events as explained above - self.notifier.process_events() - #check events waits timeout - if self.notifier.check_events(self.notify_timeout): - # read notified events and enqeue them - self.notifier.read_events() - # should we do something? - # has something happened? - last_event_time = self.handler.last_event_time - if last_event_time is not None: - time_delta = time.time() - last_event_time - if time_delta > self.write_timeout: - self.startCopy() - self.handler.last_event_time = None - # handle unmounted filesystems - if not self.mounted: - if mount.is_mounted(self.mount_point): - # we've been remounted. Huzzah! - # restart the watch - self.add_watch() - self.mounted = True - logging.info( - "%s was remounted, restarting watch" % \ - (self.mount_point) - ) - - def _parser(self, msg, who): - """ - Parse xmpp chat messages - """ - help = u"I can send [copy] message, or squencer [finished]" - if re.match(u"help", msg): - reply = help - elif re.match("copy", msg): - self.startCopy() - reply = u"sent copy message" - elif re.match(u"finished", msg): - words = msg.split() - if len(words) == 2: - self.sequencingFinished(words[1]) - reply = u"sending sequencing finished for %s" % (words[1]) - else: - reply = u"need runfolder name" - else: - reply = u"I didn't understand '%s'" %(msg) - return reply - - def start(self, daemonize): - """ - Start application - """ - self.add_watch() - super(SpoolWatcher, self).start(daemonize) - - def stop(self): - """ - shutdown application - """ - # destroy the inotify's instance on this interrupt (stop monitoring) - self.notifier.stop() - super(SpoolWatcher, self).stop() - - def startCopy(self): - logging.debug("writes seem to have stopped") - if self.notify_runner is not None: - for r in self.notify_runner: - self.rpc_send(r, tuple(), 'startCopy') - - def sequencingFinished(self, run_dir): - # need to strip off self.watch_dir from rundir I suspect. - logging.info("run.completed in " + str(run_dir)) - pattern = self.watch_dir - if pattern[-1] != os.path.sep: - pattern += os.path.sep - stripped_run_dir = re.sub(pattern, "", run_dir) - logging.debug("stripped to " + stripped_run_dir) - if self.notify_users is not None: - for u in self.notify_users: - self.send(u, 'Sequencing run %s finished' % (stripped_run_dir)) - if self.notify_runner is not None: - for r in self.notify_runner: - self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished') - -def main(args=None): - bot = SpoolWatcher() - return bot.main(args) - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/htsworkflow/automation/test/test_runner.py b/htsworkflow/automation/test/test_runner.py deleted file mode 100644 index 6c3b9df..0000000 --- a/htsworkflow/automation/test/test_runner.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest - - -import os -from htsworkflow.automation.copier import runfolder_validate - -def extract_runfolder_path(watchdir, event): - runfolder_path = watchdir - path = event.path - if not path.startswith(watchdir): - return None - - fragments = path[len(watchdir):].split(os.path.sep) - for f in fragments: - runfolder_path = os.path.join(runfolder_path, f) - if runfolder_validate(f): - return runfolder_path - return None - -class Event(object): - def __init__(self, path=None, name=None): - self.path = path - self.name = name - -class testRunner(unittest.TestCase): - - def test_extract_runfolder(self): - watchdir = os.path.join('root', 'server', 'mount') - runfolder = os.path.join(watchdir, '080909_HWI-EAS229_0052_1234ABCD') - ipar = os.path.join(runfolder, 'Data', 'IPAR_1.01') - other = os.path.join(watchdir, 'other') - - event = Event( path=runfolder ) - self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder) - - event = Event( path=ipar ) - self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder) - - event = Event( path=other) - self.failUnlessEqual(extract_runfolder_path(watchdir, event), None ) - -def suite(): - return unittest.makeSuite(testRunner,'test') - -if __name__ == "__main__": - unittest.main(defaultTest="suite") diff --git a/htsworkflow/util/fctracker.py b/htsworkflow/util/fctracker.py deleted file mode 100644 index 57b5dcf..0000000 --- a/htsworkflow/util/fctracker.py +++ /dev/null @@ -1,201 +0,0 @@ -""" -Provide some quick and dirty access and reporting for the fctracker database. - -The advantage to this code is that it doesn't depend on django being -installed, so it can run on machines other than the webserver. -""" -import datetime -import os -import re -import sys -import time - -if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5: - # we're python 2.5 - import sqlite3 -else: - import pysqlite2.dbapi2 as sqlite3 - - -class fctracker: - """ - provide a simple way to interact with the flowcell data in fctracker.db - """ - def __init__(self, database): - # default to the current directory - if database is None: - self.database = self._guess_fctracker_path() - else: - self.database = database - self.conn = sqlite3.connect(self.database) - self._get_library() - self._get_species() - - def _guess_fctracker_path(self): - """ - Guess a few obvious places for the database - """ - fctracker = 'fctracker.db' - name = fctracker - # is it in the current dir? - if os.path.exists(name): - return name - name = os.path.expanduser(os.path.join('~', fctracker)) - if os.path.exists(name): - return name - raise RuntimeError("Can't find fctracker") - - def _make_dict_from_table(self, table_name, pkey_name): - """ - Convert a django table into a dictionary indexed by the primary key. - Yes, it really does just load everything into memory, hopefully - we stay under a few tens of thousands of runs for a while. - """ - table = {} - c = self.conn.cursor() - c.execute('select * from %s;' % (table_name)) - # extract just the field name - description = [ f[0] for f in c.description] - for row in c: - row_dict = dict(zip(description, row)) - table[row_dict[pkey_name]] = row_dict - c.close() - return table - - def _add_lanes_to_libraries(self): - """ - add flowcell/lane ids to new attribute 'lanes' in the library dictionary - """ - library_id_re = re.compile('lane_\d_library_id') - - for fc_id, fc in self.flowcells.items(): - lane_library = [ (x[0][5], x[1]) for x in fc.items() - if library_id_re.match(x[0]) ] - for lane, library_id in lane_library: - if not self.library[library_id].has_key('lanes'): - self.library[library_id]['lanes'] = [] - self.library[library_id]['lanes'].append((fc_id, lane)) - - def _get_library(self): - """ - attach the library dictionary to the instance - """ - self.library = self._make_dict_from_table( - 'fctracker_library', - 'library_id') - - - def _get_species(self): - """ - attach the species dictionary to the instance - """ - self.species = self._make_dict_from_table( - 'fctracker_species', - 'id' - ) - - def _get_flowcells(self, where=None): - """ - attach the flowcell dictionary to the instance - - where is a sql where clause. (eg "where run_date > '2008-1-1'") - that can be used to limit what flowcells we select - FIXME: please add sanitization code - """ - if where is None: - where = "" - self.flowcells = {} - c = self.conn.cursor() - c.execute('select * from fctracker_flowcell %s;' % (where)) - # extract just the field name - description = [ f[0] for f in c.description ] - for row in c: - row_dict = dict(zip(description, row)) - fcid, status = self._parse_flowcell_id(row_dict) - row_dict['flowcell_id'] = fcid - row_dict['flowcell_status'] = status - - for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]: - lane_library = self.library[row_dict[lane+"_id"]] - species_id = lane_library['library_species_id'] - lane_library['library_species'] = self.species[species_id] - row_dict[lane] = lane_library - # some useful parsing - run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S') - run_date = datetime.datetime(*run_date[:6]) - row_dict['run_date'] = run_date - self.flowcells[row_dict['flowcell_id']] = row_dict - - self._add_lanes_to_libraries() - return self.flowcells - - def _parse_flowcell_id(self, flowcell_row): - """ - Return flowcell id and status - - We stored the status information in the flowcell id name. - this was dumb, but database schemas are hard to update. - """ - fields = flowcell_row['flowcell_id'].split() - fcid = None - status = None - if len(fields) > 0: - fcid = fields[0] - if len(fields) > 1: - status = fields[1] - return fcid, status - - -def flowcell_gone(cell): - """ - Use a variety of heuristics to determine if the flowcell drive - has been deleted. - """ - status = cell['flowcell_status'] - if status is None: - return False - failures = ['failed', 'deleted', 'not run'] - for f in failures: - if re.search(f, status): - return True - else: - return False - -def recoverable_drive_report(flowcells): - """ - Attempt to report what flowcells are still on a hard drive - """ - def format_status(status): - if status is None: - return "" - else: - return status+" " - - # sort flowcells by run date - flowcell_list = [] - for key, cell in flowcells.items(): - flowcell_list.append( (cell['run_date'], key) ) - flowcell_list.sort() - - report = [] - line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) " - line += "%(species)s" - for run_date, flowcell_id in flowcell_list: - cell = flowcells[flowcell_id] - if flowcell_gone(cell): - continue - for l in range(1,9): - lane = 'lane_%d' % (l) - cell_library = cell['%s_library'%(lane)] - fields = { - 'date': cell['run_date'].strftime('%y-%b-%d'), - 'id': cell['flowcell_id'], - 'lane': l, - 'library_name': cell_library['library_name'], - 'library_id': cell['%s_library_id'%(lane)], - 'species': cell_library['library_species']['scientific_name'], - 'status': format_status(cell['flowcell_status']), - } - report.append(line % (fields)) - return os.linesep.join(report) - diff --git a/scripts/copier b/scripts/copier deleted file mode 100644 index 9338b07..0000000 --- a/scripts/copier +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -import sys -from htsworkflow.automation.copier import main - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/scripts/elandseq b/scripts/elandseq deleted file mode 100755 index 6a5178c..0000000 --- a/scripts/elandseq +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -import optparse -import os -import sys - -from htsworkflow.pipelines.eland import extract_eland_sequence - -def make_parser(): - usage = "usage: %prog [options] infile [outfile]" - - parser = optparse.OptionParser(usage) - parser.add_option("-e", "--extract", dest="slice", - default=":", - help="provide a python slice operator to select a portion of an eland file") - return parser - -def main(argv): - parser = make_parser() - - (opt, args) = parser.parse_args(argv) - - if len(args) not in (0, 1, 2): - parser.error('incorrect number of arguments') - - # get our slice coordinates - start, end = opt.slice.split(':') - if len(start) > 0: - start = int(start) - else: - start = None - if len(end) > 0: - end = int(end) - else: - end = None - - # open infile - if len(args) > 0: - instream = open(args[0],'r') - else: - instream = sys.stdin - - if len(args) > 1: - outstream = open(args[1],'w') - else: - outstream = sys.stdout - - extract_eland_sequence(instream, outstream, start, end) - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/scripts/gerald2bed.py b/scripts/gerald2bed.py deleted file mode 100644 index 7a726e7..0000000 --- a/scripts/gerald2bed.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/python -""" -Convert a group of eland_result files from a sequencer run to bed files. -""" -from glob import glob -import logging -import optparse -import sys -import os - -from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description - -def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell): - """ - convert s_[1-8]_eland_result.txt to corresponding bed files - """ - eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt')) - out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed')) - if len(out_files) > 0: - raise RuntimeError("please move old bedfiles") - - logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell)) - for pathname in eland_files: - path, name = os.path.split(pathname) - lane = int(name[2]) - outname = 's_%d_eland_result.bed' %(lane,) - logging.info('Converting lane %d to %s' % (lane, outname)) - - outpathname = os.path.join(eland_dir, outname) - # look up descriptions - bed_name, description = make_description(database, flowcell, lane) - - # open files - instream = open(pathname,'r') - outstream = open(outpathname,'w') - - make_bed_from_eland_stream( - instream, outstream, name, description, prefix - ) - -def make_parser(): - usage = """%prog: --flowcell directory_name - -directory should contain a set of 8 eland result files named like -s_[12345678]_eland_result.txt""" - - - parser = optparse.OptionParser(usage) - - parser.add_option('-o', '--output', dest='output', - help="destination directory for our bed files" \ - "defaults to eland directory", - default=None) - parser.add_option('--chromosome', dest='prefix', - help='Set the chromosome prefix name. defaults to "chr"', - default='chr') - parser.add_option("--database", dest='database', - help="specify location of fctracker database", - default=None) - parser.add_option("--flowcell", dest='flowcell', - help="specify the flowcell id for this run", - default=None) - parser.add_option('-v', '--verbose', dest='verbose', action='store_true', - help='increase verbosity', - default=False) - return parser - -def main(command_line=None): - logging.basicConfig(level=logging.WARNING) - if command_line is None: - command_line = sys.argv[1:] - - parser = make_parser() - (opts, args) = parser.parse_args(command_line) - - if len(args) != 1: - parser.error('Directory name required') - - eland_dir = args[0] - if not os.path.isdir(eland_dir): - parser.error('%s must be a directory' % (eland_dir,)) - - if opts.flowcell is None: - parser.error('Flowcell ID required') - - if opts.verbose: - logger = logging.getLogger() - logger.setLevel(logging.INFO) - - make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell) - - return 0 - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/scripts/library.py b/scripts/library.py deleted file mode 100644 index 35532f4..0000000 --- a/scripts/library.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Provide some quick and dirty access and reporting for the fctracker database. - -The advantage to this code is that it doesn't depend on django being -installed, so it can run on machines other than the webserver. -""" -from optparse import OptionParser -import sys - -from htsworkflow.util import fctracker - -def make_parser(): - """ - Make parser - """ - parser = OptionParser() - parser.add_option("-d", "--database", dest="database", - help="path to the fctracker.db", - default=None) - parser.add_option("-w", "--where", dest="where", - help="add a where clause", - default=None) - return parser - -def main(argv=None): - if argv is None: - argv = [] - parser = make_parser() - - opt, args = parser.parse_args(argv) - - fc = fctracker.fctracker(opt.database) - cells = fc._get_flowcells(opt.where) - - print fctracker.recoverable_drive_report(cells) - return 0 - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/scripts/make-library-tree b/scripts/make-library-tree deleted file mode 100644 index 67a9282..0000000 --- a/scripts/make-library-tree +++ /dev/null @@ -1,225 +0,0 @@ -""" -Make a tree of symlinks organized by library id. -""" -from ConfigParser import SafeConfigParser -from glob import glob -import logging -from optparse import OptionParser -import os -import stat -import sys - -from htsworkflow.util import fctracker - - -def find_lanes(flowcell_dir, flowcell_id, lane): - lane_name = "s_%s_eland_result*" %(lane) - pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name) - lanes = glob(pattern) - return lanes - -def make_long_lane_name(flowcell_dir, lane_pathname): - """ - make a name from the eland result file name - """ - if flowcell_dir == lane_pathname[0:len(flowcell_dir)]: - subpath = lane_pathname[len(flowcell_dir):] - long_name = subpath.replace(os.path.sep, "_") - return long_name - else: - return None - -def parse_srf_directory(srf_dir): - """ - search srf_dir for *.srf files - - builds a dictionary indexed by flowcell name. - """ - flowcells = {} - srfs = glob(os.path.join(srf_dir,'*.srf')) - for pathname in srfs: - path, filename = os.path.split(pathname) - basename, ext = os.path.splitext(filename) - record = basename.split('_') - assert len(record) == 6 - - site = record[0] - date = record[1] - machine = record[2] - runid = record[3] - flowcellid = record[4] - laneid = record[5] - - desc = "_".join([site,date,machine,runid,flowcellid]) - flowcells[flowcellid] = desc - return flowcells - - -def carefully_make_hardlink(source, destination, dry_run=False): - """ - Make a hard link, failing if a different link already exists - - Checking to see if the link already exists and is - the same as the link we want to make. - If the link already exists and is different, throw an error. - """ - logging.debug("%s -> %s", source, destination) - - if not os.path.exists(source): - logging.warning("%s doesn't exist", source) - return - - if os.path.exists(destination): - if os.path.samefile(source, destination): - return - else: - raise IOError('%s and %s are different files' % \ - (source, destination)) - - if dry_run: return - - os.link(source, destination) - os.chmod(destination, - stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH ) - -def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run): - """ - find eland files at different alignment lengths - and put each of those in the file - """ - lanes = find_lanes(flowcell_dir, flowcell_id, lane) - for lane_pathname in lanes: - long_name = make_long_lane_name(flowcell_dir, - lane_pathname) - long_pathname = os.path.join(library_path, long_name) - carefully_make_hardlink(lane_pathname, - long_pathname, - dry_run) - -def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run): - """ - Link srf files into our library directories. - - the srf files must be named: - _____.srf - """ - srf_basename = srf_names.get(flowcell_id, None) - if srf_basename is None: - logging.info("srf file for %s was not found", flowcell_id) - else: - srf_filename = "%s_%s.srf" % (srf_basename, lane) - source = os.path.join(srf_dir, srf_filename) - destination = os.path.join(library_path, srf_filename) - carefully_make_hardlink(source, destination, dry_run) - - -def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir, - dry_run=False): - """ - Iterate over the library - """ - library_dir = os.path.normpath(library_dir) + os.path.sep - flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep - srfs_dir = os.path.normpath(srfs_dir) + os.path.sep - - srf_names = parse_srf_directory(srfs_dir) - - for lib_id, lib in fcdb.library.items(): - library_path = os.path.join(library_dir, str(lib_id)) - if not os.path.exists(library_path): - os.mkdir(library_path) - - for flowcell_id, lane in lib.get('lanes', []): - link_all_eland_lanes(library_path, - flowcell_dir, - flowcell_id, - lane, - dry_run) - - link_srf_lanes(srf_names, - library_path, - srfs_dir, - flowcell_id, - lane, - dry_run) - -def make_parser(): - """ - Make parser - """ - parser = OptionParser() - parser.add_option('-c', '--config', default=None, - help='path to a configuration file containing a ' - 'sequence archive section') - - parser.add_option("-d", "--database", dest="database", - help="path to the fctracker.db", - default=None) - parser.add_option('-a', '--sequence-archive', default=None, - help='path to where the sequence archive lives') - parser.add_option("-w", "--where", dest="where", - help="add a where clause", - default=None) - - parser.add_option("--dry-run", dest="dry_run", action="store_true", - default=False, - help="Don't modify the filesystem") - return parser - -def main(argv=None): - logging.basicConfig(level=logging.INFO) - - FRONTEND_NAME = 'frontend' - SECTION_NAME = 'sequence_archive' - DATABASE_OPT = 'database_name' - ARCHIVE_OPT = 'archive_path' - - if argv is None: - argv = [] - parser = make_parser() - - # parse command line arguments - opt, args = parser.parse_args(argv) - - # figure out what config file to read - config_path = [os.path.expanduser('~/.htsworkflow.ini'), - '/etc/htsworkflow.ini'] - if opt.config is not None: - config_path = [opt.config] - - # parse options from config file - config_file = SafeConfigParser() - config_file.read(config_path) - - # load defaults from config file if not overriden by the command line - print opt.database - if opt.database is None and \ - config_file.has_option(FRONTEND_NAME, DATABASE_OPT): - opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT) - - if opt.sequence_archive is None and \ - config_file.has_option(SECTION_NAME, ARCHIVE_OPT): - opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT) - - # complain if critical things are missing - if opt.database is None: - parser.error('Need location of htsworkflow frontend database') - - if opt.sequence_archive is None: - parser.error('Need the root path for the sequence archive') - - fcdb = fctracker.fctracker(opt.database) - cells = fcdb._get_flowcells(opt.where) - - library_dir = os.path.join(opt.sequence_archive, 'libraries') - flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells') - srfs_dir = os.path.join(opt.sequence_archive, 'srfs') - make_library_tree(fcdb, - library_dir, flowcell_dir, srfs_dir, - opt.dry_run) - - return 0 - -if __name__ == "__main__": - rv = main(sys.argv[1:]) - # sys.exit(rv) diff --git a/scripts/makebed b/scripts/makebed deleted file mode 100755 index 577b868..0000000 --- a/scripts/makebed +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python -import optparse -import sys -import os - -from htsworkflow.util.opener import autoopen -from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description - -def make_parser(): - parser = optparse.OptionParser() - parser.add_option('-e', '--eland', dest='inname', - help='specify input eland filename') - parser.add_option('-b', '--bed', dest='outname', - help='specify output befilename') - parser.add_option('-n', '--name', dest='name', - help='specify the track (short) name.', - default=None) - parser.add_option('-d', '--description', dest='description', - help='specify the track description', - default=None) - parser.add_option('--chromosome', dest='prefix', - help='Set the chromosome prefix name. defaults to "chr"', - default='chr') - parser.add_option("--database", dest='database', - help="specify location of fctracker database", - default=None) - parser.add_option("--flowcell", dest='flowcell', - help="compute name and description from database using flowcell id", - default=None) - parser.add_option("--lane", dest='lane', - help='specify which lane to use when retrieving description from database', - default=None) - - multi = optparse.OptionGroup(parser, 'Multi-read ELAND support') - - multi.add_option('-m', '--multi', action='store_true', - help='Enable parsing multi-read eland files', - default=False) - multi.add_option('--reads', type='int', - help='limit reporting multi reads to this many reads' - '(most usefully --reads=1 will turn a multi-read ' - 'file into a single read file)', - default=255) - parser.add_option_group(multi) - - return parser - -def main(command_line=None): - instream = None - outstream = None - - if command_line is None: - command_line = sys.argv[1:] - - parser = make_parser() - (options, args) = parser.parse_args(command_line) - - if options.inname is None: - parser.error("Need eland input file name") - return 1 - - if options.inname == '-': - instream = sys.stdin - elif os.path.exists(options.inname): - instream = autoopen(options.inname, 'r') - else: - parser.error('%s was not found' % (options.inname)) - return 1 - - # figure out name for output file - if options.outname is None: - # if outname wasn't defined, and we're reading from stdout - if instream is sys.stdin: - # write to stdout - outstream = sys.stdout - else: - # if there's a name write to name.bed - options.outname = os.path.splitext(options.inname)[0]+'.bed' - print >>sys.stderr, "defaulting to outputname", options.outname - elif options.outname == '-': - outstream = sys.stdout - - if outstream is None: - if os.path.exists(options.outname): - parser.error("not overwriting %s" % (options.outname)) - return 1 - else: - outstream = open(options.outname, 'w') - - if options.flowcell is not None and options.lane is not None: - # get our name/description out of the database - name, description = make_description( - options.database, options.flowcell, options.lane - ) - else: - name = options.name - description = options.description - - if options.multi: - make_bed_from_multi_eland_stream(instream, outstream, - name, description, - options.prefix, - options.reads) - - else: - make_bed_from_eland_stream(instream, outstream, - name, description, - options.prefix) - return 0 - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) - diff --git a/scripts/rerun_eland.py b/scripts/rerun_eland.py deleted file mode 100644 index af06cdd..0000000 --- a/scripts/rerun_eland.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python - -import logging -from optparse import OptionParser -import os -import subprocess -import sys - -from htsworkflow.pipelines import gerald -from htsworkflow.pipelines.eland import extract_eland_sequence -from htsworkflow.pipelines import runfolder - -def make_query_filename(eland_obj, output_dir): - query_name = '%s_%s_eland_query.txt' - query_name %= (eland_obj.sample_name, eland_obj.lane_id) - - query_pathname = os.path.join(output_dir, query_name) - - if os.path.exists(query_pathname): - logging.warn("overwriting %s" % (query_pathname,)) - - return query_pathname - -def make_result_filename(eland_obj, output_dir): - result_name = '%s_%s_eland_result.txt' - result_name %= (eland_obj.sample_name, eland_obj.lane_id) - - result_pathname = os.path.join(output_dir, result_name) - - if os.path.exists(result_pathname): - logging.warn("overwriting %s" % (result_pathname,)) - - return result_pathname - -def extract_sequence(inpathname, query_pathname, length, dry_run=False): - logging.info('extracting %d bases' %(length,)) - logging.info('extracting from %s' %(inpathname,)) - logging.info('extracting to %s' %(query_pathname,)) - - if not dry_run: - try: - instream = open(inpathname, 'r') - outstream = open(query_pathname, 'w') - extract_eland_sequence(instream, outstream, 0, length) - finally: - outstream.close() - instream.close() - -def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False): - cmdline = ['eland_%d' % (length,), query_name, genome, result_name] - if multi: - cmdline += ['--multi'] - - logging.info('running eland: ' + " ".join(cmdline)) - if not dry_run: - return subprocess.Popen(cmdline) - else: - return None - - -def rerun(gerald_dir, output_dir, length=25, dry_run=False): - """ - look for eland files in gerald_dir and write a subset to output_dir - """ - logging.info("Extracting %d bp from files in %s" % (length, gerald_dir)) - g = gerald.gerald(gerald_dir) - - # this will only work if we're only missing the last dir in output_dir - if not os.path.exists(output_dir): - logging.info("Making %s" %(output_dir,)) - if not dry_run: os.mkdir(output_dir) - - processes = [] - for lane_id, lane_param in g.lanes.items(): - eland = g.eland_results[lane_id] - - inpathname = eland.pathname - query_pathname = make_query_filename(eland, output_dir) - result_pathname = make_result_filename(eland, output_dir) - - extract_sequence(inpathname, query_pathname, length, dry_run=dry_run) - - p = run_eland(length, - query_pathname, - lane_param.eland_genome, - result_pathname, - dry_run=dry_run) - if p is not None: - processes.append(p) - - for p in processes: - p.wait() - -def make_parser(): - usage = '%prog: [options] runfolder' - - parser = OptionParser(usage) - - parser.add_option('--gerald', - help='specify location of GERALD directory', - default=None) - parser.add_option('-o', '--output', - help='specify output location of files', - default=None) - parser.add_option('-l', '--read-length', type='int', - help='specify new eland length', - dest='length', - default=25) - parser.add_option('--dry-run', action='store_true', - help='only pretend to run', - default=False) - parser.add_option('-v', '--verbose', action='store_true', - help='increase verbosity', - default=False) - - return parser - - -def main(cmdline=None): - logging.basicConfig(level=logging.WARNING) - - parser = make_parser() - opts, args = parser.parse_args(cmdline) - - if opts.length < 16 or opts.length > 32: - parser.error("eland can only process reads in the range 16-32") - - if len(args) > 1: - parser.error("Can only process one runfolder directory") - elif len(args) == 1: - runs = runfolder.get_runs(args[0]) - if len(runs) != 1: - parser.error("Not a runfolder") - opts.gerald = runs[0].gerald.pathname - if opts.output is None: - opts.output = os.path.join( - runs[0].pathname, - 'Data', - # pythons 0..n ==> elands 1..n+1 - 'C1-%d' % (opts.length+1,) - ) - - elif opts.gerald is None: - parser.error("need gerald directory") - - if opts.output is None: - parser.error("specify location for the new eland files") - - if opts.verbose: - root_logger = logging.getLogger() - root_logger.setLevel(logging.INFO) - - rerun(opts.gerald, opts.output, opts.length, dry_run=opts.dry_run) - - return 0 - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/scripts/runner b/scripts/runner deleted file mode 100644 index 560299f..0000000 --- a/scripts/runner +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -import sys -from htsworkflow.automation.runner import main - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/scripts/spoolwatcher b/scripts/spoolwatcher deleted file mode 100644 index b2f833e..0000000 --- a/scripts/spoolwatcher +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -import sys -from htsworkflow.automation.spoolwatcher import main - -if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) diff --git a/test/test_copier.py b/test/test_copier.py deleted file mode 100644 index 9a1b738..0000000 --- a/test/test_copier.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest - -from StringIO import StringIO -from htsworkflow.automation import copier - -class testCopier(unittest.TestCase): - def test_runfolder_validate(self): - self.failUnlessEqual(copier.runfolder_validate(""), False) - self.failUnlessEqual(copier.runfolder_validate("1345_23"), False) - self.failUnlessEqual(copier.runfolder_validate("123456_asdf-$23'"), False) - self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44"), True) - self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44 "), False) - - def test_empty_config(self): - cfg = StringIO("""[fake] -something: unrelated -""") - bot = copier.CopierBot('fake', configfile=cfg) - self.failUnlessRaises(RuntimeError, bot.read_config) - - def test_full_config(self): - cfg = StringIO("""[copier] -jid: copier@example.fake -password: badpassword -authorized_users: user1@example.fake user2@example.fake -rsync_password_file: ~/.sequencer -rsync_source: /tmp/sequencer_source -rsync_destination: /tmp/sequencer_destination -notify_users: user3@example.fake -# who to run to -#runner: -""") - c = copier.CopierBot("copier", configfile=cfg) - c.read_config() - self.failUnlessEqual(c.jid, 'copier@example.fake') - self.failUnlessEqual(c.cfg['password'], 'badpassword') - self.failUnlessEqual(len(c.authorized_users), 2) - self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake') - self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake') - self.failUnlessEqual(c.rsync.source_base, '/tmp/sequencer_source') - self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination') - self.failUnlessEqual(len(c.notify_users), 1) - self.failUnlessEqual(c.notify_users[0], 'user3@example.fake') - - def test_dirlist_filter(self): - """ - test our dir listing parser - """ - # everyone should have a root dir, and since we're not - # currently writing files... it should all be good - r = copier.rsync('/', '/', '/') - - listing = [ - 'drwxrwxr-x 0 2007/12/29 12:34:56 071229_USI-EAS229_001_FC1234\n', - '-rwxrw-r-- 123268 2007/12/29 17:39:31 2038EAAXX.rtf\n', - '-rwxrw-r-- 6 2007/12/29 15:10:29 New Text Document.txt\n', - ] - - result = r.list_filter(listing) - self.failUnlessEqual(len(result), 1) - self.failUnlessEqual(result[0][-1], '4') - -def suite(): - return unittest.makeSuite(testCopier,'test') - -if __name__ == "__main__": - unittest.main(defaultTest="suite") diff --git a/test/tree.py b/test/tree.py deleted file mode 100644 index 4f666cc..0000000 --- a/test/tree.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -""" -Build a fake directory tree for testing rsync management code. -""" - -import os -import random - -def make_random_string(length=8): - """Make a random string, length characters long - """ - symbols = "abcdefhijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - name = [] - for i in xrange(length): - name.append(random.choice(symbols)) - return "".join(name) - -def make_file(pathname): - """Make a file with some random stuff in it - """ - stream = open(pathname,'w') - stream.write(make_random_string(16)) - stream.close() - -def make_tree(root, depth=3, directories=5, files=10): - """ - Make a tree of random directories and files - - depth is how many levels of subdirectories - directories is how many directories each subdirectory should have - files is how many files to create in each directory - """ - if not os.path.exists(root): - os.mkdir(root) - - paths = [] - # make files - for i in range(files): - name = make_random_string() - paths.append(name) - pathname = os.path.join(root, name) - make_file(pathname) - - # make subdirectories if we still have some depth to go - if depth > 0: - for i in range(directories): - name = make_random_string() - # paths.append(name) - pathname = os.path.join(root, name) - subpaths = make_tree(pathname, depth-1, directories, files) - paths.extend([ os.path.join(name, x) for x in subpaths ]) - - return paths - -def generate_paths(root): - """Make a list of relative paths like generated by make_tree - """ - paths = [] - for curdir, subdirs, files in os.walk(root): - paths.extend([ os.path.join(curdir, f) for f in files ]) - - # an inefficient way of getting the correct common prefix - # (e.g. root might not have a trailing /) - common_root = os.path.commonprefix(paths) - common_len = len(common_root) - return [ p[common_len:] for p in paths ] - -def compare_tree(root, paths, verbose=False): - """Make sure the tree matches our relative list of paths - """ - # what we find when we look - experimental_set = set(generate_paths(root)) - # what we expect - theoretical_set = set(paths) - # true if the difference of the two sets is the empty set - difference = experimental_set - theoretical_set - issame = (len(difference) == 0) - if verbose and not issame: - print difference - return issame -- 2.30.2