Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow
[htsworkflow.git] / scripts / make-library-tree
diff --git a/scripts/make-library-tree b/scripts/make-library-tree
deleted file mode 100644 (file)
index 2ccbec6..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/usr/bin/env python
-
-from ConfigParser import SafeConfigParser
-
-import logging
-import os
-from optparse import OptionParser
-import stat
-import shelve
-
-from htsworkflow.util import api
-from htsworkflow.pipelines.sequences import scan_for_sequences
-
-def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
-    """
-    compare our flowcell database with our list of sequences and return
-    a fully populated database
-    """
-    fcdb = shelve.open(fcdb_filename)
-    libdb = {}
-    apidata = {'apiid': apiid, 'apikey': apikey}
-    for seq in sequences:
-        flowcell = seq.flowcell
-        flowcell_info = None
-
-        # get info about flowcell from server or shelf
-        if not fcdb.has_key(flowcell):
-            url = api.flowcell_url(baseurl, flowcell)
-            flowcell_info = api.retrieve_info(url, apidata)
-            if flowcell_info is not None:
-                fcdb[flowcell] = flowcell_info
-        else:
-            flowcell_info = fcdb[flowcell]
-
-        # make library id db
-        if flowcell_info is not None:
-            seq_library_id = flowcell_info['lane_set'][unicode(seq.lane)]['library_id']
-            libdb.setdefault(seq_library_id, []).append(seq)
-           
-    fcdb.sync()
-    return fcdb, libdb
-
-def carefully_make_hardlink(source, destination, dry_run=False):
-    """
-    Make a hard link, failing if a different link already exists
-
-    Checking to see if the link already exists and is
-    the same as the link we want to make.
-    If the link already exists and is different, throw an error.
-
-    If we didn't update anything return 0, if we did update
-    return 1.
-    """
-    logging.debug("CHECKING: %s -> %s", source, destination)
-
-    if not os.path.exists(source):
-        logging.warning("%s doesn't exist", source)
-        return 0
-
-    if os.path.exists(destination):
-        if os.path.samefile(source, destination):
-            logging.debug('SAME: %s -> %s' % (source, destination))
-            return 0
-        else:
-            logging.error('%s and %s are different files, skipping' % \
-                          (source, destination)) 
-            return 0
-    logging.debug('Linking: %s -> %s' % (source, destination))
-
-    # we would do something by this part
-    if dry_run: return 1
-
-    os.link(source, destination)
-    os.chmod(destination,
-             stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
-    return 1
-    
-def make_library_links(root, library_db, dry_run=False):
-    """
-    Make a tree of sequencer roots organized by library id
-
-    Root is the root of the library tree
-    library_db is a dictionary of SequenceFiles organized by library id
-    """
-    count = 0
-    root = os.path.abspath(root)
-    for lib_id, sequences in library_db.items():
-        target_dir = os.path.join(root, lib_id)
-        if not os.path.exists(target_dir):
-            logging.info("mkdir %s" % (target_dir,))
-            if not dry_run:
-                os.mkdir(target_dir)
-            
-        for s in sequences:
-            count += carefully_make_hardlink(s.path,
-                                             s.make_target_name(target_dir),
-                                             dry_run=dry_run)
-    return count
-
-def configure_logging(opts):
-    # setup logging
-    level = logging.WARN
-    if opts.verbose:
-        level = logging.INFO
-    if opts.debug:
-        level = logging.DEBUG
-    logging.basicConfig(level=level)
-    
-
-def configure_opts(opts):
-    """
-    Load in options from config file
-    """
-    SECTION_NAME = 'sequence_archive'
-    ARCHIVE_OPT = 'sequence_archive'
-    CACHE_OPT = 'cache'
-    HOST_OPT = 'host'
-    APIID_OPT = 'apiid'
-    APIKEY_OPT = 'apikey'
-
-    # figure out what config file to read
-    config_path = [os.path.expanduser('~/.htsworkflow.ini'),
-                   '/etc/htsworkflow.ini']
-    if opts.config is not None:
-        config_path = [opts.config]
-    # parse options from config file
-    config_file = SafeConfigParser()
-    config_file.read(config_path)
-
-    # load defaults from config file if not overriden by the command line
-    if opts.cache is None:
-        if config_file.has_option(SECTION_NAME, CACHE_OPT):
-            opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
-        else:
-            opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
-
-    if opts.sequence_archive is None and \
-       config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
-        opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
-        opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
-
-    opts.sequence_archive = os.path.abspath(opts.sequence_archive)
-    opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
-    opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
-    opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
-
-    if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
-        opts.host = config_file.get(SECTION_NAME, HOST_OPT)
-
-    if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
-        opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
-
-    if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
-        opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
-      
-    return opts
-
-def make_parser():
-    """
-    Make parser
-    """
-    parser = OptionParser()
-    parser.add_option('-c', '--config', default=None,
-                      help='path to a configuration file containing a '
-                           'sequence archive section')
-    parser.add_option('--cache', default=None,
-                      help="default flowcell cache")
-    
-    parser.add_option('--host', default=None,
-                      help="specify http://host for quering flowcell information")
-    parser.add_option('--apiid', default=None,
-                      help="API ID to use when retriving information")
-    parser.add_option("--apikey", default=None,
-                      help="API Key for when retriving information")
-    
-    parser.add_option('-a', '--sequence-archive', default=None,
-                      help='path to where the sequence archive lives')
-
-    parser.add_option('-v', '--verbose', action='store_true', default=False,
-                      help='be more verbose')
-    parser.add_option('-d', '--debug', action='store_true', default=False,
-                      help='report everything')
-             
-    parser.add_option("--dry-run", dest="dry_run", action="store_true",
-                      default=False,
-                      help="Don't modify the filesystem")
-    return parser
-
-def main(cmdline=None):
-    parser = make_parser()
-    opts, args = parser.parse_args(cmdline)
-
-    configure_logging(opts)
-    opts = configure_opts(opts)
-  
-    # complain if critical things are missing
-    if opts.cache is None:
-       parser.error('Need location of htsworkflow frontend database')
-
-    if opts.sequence_archive is None:
-       parser.error('Need the root path for the sequence archive')
-
-    seq_dirs = [ opts.flowcells, opts.srfs ]
-    if len(args) > 0:
-        seq_dirs = [os.path.abspath(f) for f in args]
-    
-    seqs = scan_for_sequences(seq_dirs)
-    fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
-    updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
-    
-    logging.warn("%s flowcells in database" % (len(fcdb),))
-    logging.warn("found %s sequence files" % (len(seqs),))
-    logging.warn("%s libraries being checked" % (len(libdb),))
-    logging.warn("%s sequence files were linked" % (updates,))
-    
-    return 0
-    
-if __name__ == "__main__":
-    main()