Fix typo in srf command
[htsworkflow.git] / scripts / make-library-tree
index 57414c466b0b3853ed2272b4d221c7b10884a96f..50ce7a932ef263300c70d2e9483f18431ced6f5d 100644 (file)
@@ -1,18 +1,20 @@
+#!/usr/bin/python
 """
 Make a tree of symlinks organized by library id.
 """
+from ConfigParser import SafeConfigParser
 from glob import glob
 import logging
 from optparse import OptionParser
+import logging
 import os
 import stat
 import sys
 
 from htsworkflow.util import fctracker
 
-
 def find_lanes(flowcell_dir, flowcell_id, lane):
-    lane_name = "s_%s_eland_result*" %(lane)
+    lane_name = "s_%s_eland_*" %(lane)
     pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
     lanes = glob(pattern)
     return lanes
@@ -40,7 +42,9 @@ def parse_srf_directory(srf_dir):
         path, filename = os.path.split(pathname)
         basename, ext = os.path.splitext(filename)
         record = basename.split('_')
-        assert len(record) == 6
+        if len(record) != 6:
+            logging.error("Unrecognized srf file: %s expected 6 fields got %d" % (pathname,len(record)))
+            continue
 
         site = record[0]
         date = record[1]
@@ -62,7 +66,7 @@ def carefully_make_hardlink(source, destination, dry_run=False):
     the same as the link we want to make.
     If the link already exists and is different, throw an error.
     """
-    logging.debug("%s -> %s", source, destination)
+    logging.debug("CHECKING: %s -> %s", source, destination)
 
     if not os.path.exists(source):
         logging.warning("%s doesn't exist", source)
@@ -70,10 +74,12 @@ def carefully_make_hardlink(source, destination, dry_run=False):
 
     if os.path.exists(destination):
         if os.path.samefile(source, destination):
+            logging.debug('SAME: %s -> %s' % (source, destination))
             return
         else:
             raise IOError('%s and %s are different files' % \
                            (source, destination))
+    logging.info('Linking: %s -> %s' % (source, destination))
 
     if dry_run: return 
 
@@ -147,33 +153,83 @@ def make_parser():
     Make parser
     """
     parser = OptionParser()
-    parser.add_option("-d", "--database", dest="database",
+    parser.add_option('-c', '--config', default=None,
+                      help='path to a configuration file containing a '
+                           'sequence archive section')
+                      
+    parser.add_option("--database", dest="database",
                       help="path to the fctracker.db",
                       default=None)
+    parser.add_option('-a', '--sequence-archive', default=None,
+                      help='path to where the sequence archive lives')
     parser.add_option("-w", "--where", dest="where",
                       help="add a where clause",
                       default=None)
+
+    parser.add_option('-v', '--verbose', action='store_true', default=False,
+                      help='be more verbose')
+    parser.add_option('-d', '--debug', action='store_true', default=False,
+                      help='report everything')
+             
     parser.add_option("--dry-run", dest="dry_run", action="store_true",
                       default=False,
                       help="Don't modify the filesystem")
     return parser
 
 def main(argv=None):
-    logging.basicConfig(level=logging.INFO)
+    FRONTEND_NAME = 'frontend'
+    SECTION_NAME = 'sequence_archive'
+    DATABASE_OPT = 'database_name'
+    ARCHIVE_OPT = 'archive_path'
 
     if argv is None:
         argv = []
     parser = make_parser()
 
+    # parse command line arguments
     opt, args = parser.parse_args(argv)
+
+    # setup logging
+    level = logging.WARN
+    if opt.verbose:
+        level = logging.INFO
+    if opt.debug:
+        level = logging.DEBUG
+    logging.basicConfig(level=level)
+
+    # figure out what config file to read
+    config_path = [os.path.expanduser('~/.htsworkflow.ini'),
+                   '/etc/htsworkflow.ini']
+    if opt.config is not None:
+        config_path = [opt.config]
     
+    # parse options from config file
+    config_file = SafeConfigParser()
+    config_file.read(config_path)
+
+    # load defaults from config file if not overriden by the command line
+    print opt.database
+    if opt.database is None and \
+       config_file.has_option(FRONTEND_NAME, DATABASE_OPT):
+        opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT)
+
+    if opt.sequence_archive is None and \
+       config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
+        opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
+  
+    # complain if critical things are missing
+    if opt.database is None:
+       parser.error('Need location of htsworkflow frontend database')
+
+    if opt.sequence_archive is None:
+       parser.error('Need the root path for the sequence archive')
+
     fcdb = fctracker.fctracker(opt.database)
     cells = fcdb._get_flowcells(opt.where)
 
-    root_dir = '/woldlab/loxcyc/data00/solexa-sequence'
-    library_dir = os.path.join(root_dir, 'libraries')
-    flowcell_dir = os.path.join(root_dir, 'flowcells')
-    srfs_dir = os.path.join(root_dir, 'srfs')
+    library_dir = os.path.join(opt.sequence_archive, 'libraries')
+    flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells')
+    srfs_dir = os.path.join(opt.sequence_archive, 'srfs')
     make_library_tree(fcdb, 
                       library_dir, flowcell_dir, srfs_dir, 
                       opt.dry_run)