+++ /dev/null
-"""
-Make a tree of symlinks organized by library id.
-"""
-from ConfigParser import SafeConfigParser
-from glob import glob
-import logging
-from optparse import OptionParser
-import os
-import stat
-import sys
-
-from htsworkflow.util import fctracker
-
-
-def find_lanes(flowcell_dir, flowcell_id, lane):
- lane_name = "s_%s_eland_result*" %(lane)
- pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
- lanes = glob(pattern)
- return lanes
-
-def make_long_lane_name(flowcell_dir, lane_pathname):
- """
- make a name from the eland result file name
- """
- if flowcell_dir == lane_pathname[0:len(flowcell_dir)]:
- subpath = lane_pathname[len(flowcell_dir):]
- long_name = subpath.replace(os.path.sep, "_")
- return long_name
- else:
- return None
-
-def parse_srf_directory(srf_dir):
- """
- search srf_dir for *.srf files
-
- builds a dictionary indexed by flowcell name.
- """
- flowcells = {}
- srfs = glob(os.path.join(srf_dir,'*.srf'))
- for pathname in srfs:
- path, filename = os.path.split(pathname)
- basename, ext = os.path.splitext(filename)
- record = basename.split('_')
- assert len(record) == 6
-
- site = record[0]
- date = record[1]
- machine = record[2]
- runid = record[3]
- flowcellid = record[4]
- laneid = record[5]
-
- desc = "_".join([site,date,machine,runid,flowcellid])
- flowcells[flowcellid] = desc
- return flowcells
-
-
-def carefully_make_hardlink(source, destination, dry_run=False):
- """
- Make a hard link, failing if a different link already exists
-
- Checking to see if the link already exists and is
- the same as the link we want to make.
- If the link already exists and is different, throw an error.
- """
- logging.debug("%s -> %s", source, destination)
-
- if not os.path.exists(source):
- logging.warning("%s doesn't exist", source)
- return
-
- if os.path.exists(destination):
- if os.path.samefile(source, destination):
- return
- else:
- raise IOError('%s and %s are different files' % \
- (source, destination))
-
- if dry_run: return
-
- os.link(source, destination)
- os.chmod(destination,
- stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
-
-def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run):
- """
- find eland files at different alignment lengths
- and put each of those in the file
- """
- lanes = find_lanes(flowcell_dir, flowcell_id, lane)
- for lane_pathname in lanes:
- long_name = make_long_lane_name(flowcell_dir,
- lane_pathname)
- long_pathname = os.path.join(library_path, long_name)
- carefully_make_hardlink(lane_pathname,
- long_pathname,
- dry_run)
-
-def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run):
- """
- Link srf files into our library directories.
-
- the srf files must be named:
- <site>_<date>_<machine>_<run>_<flowcellid>_<lane>.srf
- """
- srf_basename = srf_names.get(flowcell_id, None)
- if srf_basename is None:
- logging.info("srf file for %s was not found", flowcell_id)
- else:
- srf_filename = "%s_%s.srf" % (srf_basename, lane)
- source = os.path.join(srf_dir, srf_filename)
- destination = os.path.join(library_path, srf_filename)
- carefully_make_hardlink(source, destination, dry_run)
-
-
-def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir,
- dry_run=False):
- """
- Iterate over the library
- """
- library_dir = os.path.normpath(library_dir) + os.path.sep
- flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep
- srfs_dir = os.path.normpath(srfs_dir) + os.path.sep
-
- srf_names = parse_srf_directory(srfs_dir)
-
- for lib_id, lib in fcdb.library.items():
- library_path = os.path.join(library_dir, str(lib_id))
- if not os.path.exists(library_path):
- os.mkdir(library_path)
-
- for flowcell_id, lane in lib.get('lanes', []):
- link_all_eland_lanes(library_path,
- flowcell_dir,
- flowcell_id,
- lane,
- dry_run)
-
- link_srf_lanes(srf_names,
- library_path,
- srfs_dir,
- flowcell_id,
- lane,
- dry_run)
-
-def make_parser():
- """
- Make parser
- """
- parser = OptionParser()
- parser.add_option('-c', '--config', default=None,
- help='path to a configuration file containing a '
- 'sequence archive section')
-
- parser.add_option("-d", "--database", dest="database",
- help="path to the fctracker.db",
- default=None)
- parser.add_option('-a', '--sequence-archive', default=None,
- help='path to where the sequence archive lives')
- parser.add_option("-w", "--where", dest="where",
- help="add a where clause",
- default=None)
-
- parser.add_option("--dry-run", dest="dry_run", action="store_true",
- default=False,
- help="Don't modify the filesystem")
- return parser
-
-def main(argv=None):
- logging.basicConfig(level=logging.INFO)
-
- FRONTEND_NAME = 'frontend'
- SECTION_NAME = 'sequence_archive'
- DATABASE_OPT = 'database_name'
- ARCHIVE_OPT = 'archive_path'
-
- if argv is None:
- argv = []
- parser = make_parser()
-
- # parse command line arguments
- opt, args = parser.parse_args(argv)
-
- # figure out what config file to read
- config_path = [os.path.expanduser('~/.htsworkflow.ini'),
- '/etc/htsworkflow.ini']
- if opt.config is not None:
- config_path = [opt.config]
-
- # parse options from config file
- config_file = SafeConfigParser()
- config_file.read(config_path)
-
- # load defaults from config file if not overriden by the command line
- print opt.database
- if opt.database is None and \
- config_file.has_option(FRONTEND_NAME, DATABASE_OPT):
- opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT)
-
- if opt.sequence_archive is None and \
- config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
- opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
-
- # complain if critical things are missing
- if opt.database is None:
- parser.error('Need location of htsworkflow frontend database')
-
- if opt.sequence_archive is None:
- parser.error('Need the root path for the sequence archive')
-
- fcdb = fctracker.fctracker(opt.database)
- cells = fcdb._get_flowcells(opt.where)
-
- library_dir = os.path.join(opt.sequence_archive, 'libraries')
- flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells')
- srfs_dir = os.path.join(opt.sequence_archive, 'srfs')
- make_library_tree(fcdb,
- library_dir, flowcell_dir, srfs_dir,
- opt.dry_run)
-
- return 0
-
-if __name__ == "__main__":
- rv = main(sys.argv[1:])
- # sys.exit(rv)