3 Make a tree of symlinks organized by library id.
5 from ConfigParser import SafeConfigParser
8 from optparse import OptionParser
14 from htsworkflow.util import fctracker
16 def find_lanes(flowcell_dir, flowcell_id, lane):
17 lane_name = "s_%s_eland_*" %(lane)
18 pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
22 def make_long_lane_name(flowcell_dir, lane_pathname):
24 make a name from the eland result file name
26 if flowcell_dir == lane_pathname[0:len(flowcell_dir)]:
27 subpath = lane_pathname[len(flowcell_dir):]
28 long_name = subpath.replace(os.path.sep, "_")
33 def parse_srf_directory(srf_dir):
35 search srf_dir for *.srf files
37 builds a dictionary indexed by flowcell name.
40 srfs = glob(os.path.join(srf_dir,'*.srf'))
42 path, filename = os.path.split(pathname)
43 basename, ext = os.path.splitext(filename)
44 record = basename.split('_')
45 assert len(record) == 6
51 flowcellid = record[4]
54 desc = "_".join([site,date,machine,runid,flowcellid])
55 flowcells[flowcellid] = desc
59 def carefully_make_hardlink(source, destination, dry_run=False):
61 Make a hard link, failing if a different link already exists
63 Checking to see if the link already exists and is
64 the same as the link we want to make.
65 If the link already exists and is different, throw an error.
67 logging.debug("%s -> %s", source, destination)
69 if not os.path.exists(source):
70 logging.warning("%s doesn't exist", source)
73 if os.path.exists(destination):
74 if os.path.samefile(source, destination):
77 raise IOError('%s and %s are different files' % \
78 (source, destination))
79 logging.info('Linking: %s -> %s' % (source, destination))
83 os.link(source, destination)
85 stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
87 def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run):
89 find eland files at different alignment lengths
90 and put each of those in the file
92 lanes = find_lanes(flowcell_dir, flowcell_id, lane)
93 for lane_pathname in lanes:
94 long_name = make_long_lane_name(flowcell_dir,
96 long_pathname = os.path.join(library_path, long_name)
97 carefully_make_hardlink(lane_pathname,
101 def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run):
103 Link srf files into our library directories.
105 the srf files must be named:
106 <site>_<date>_<machine>_<run>_<flowcellid>_<lane>.srf
108 srf_basename = srf_names.get(flowcell_id, None)
109 if srf_basename is None:
110 logging.info("srf file for %s was not found", flowcell_id)
112 srf_filename = "%s_%s.srf" % (srf_basename, lane)
113 source = os.path.join(srf_dir, srf_filename)
114 destination = os.path.join(library_path, srf_filename)
115 carefully_make_hardlink(source, destination, dry_run)
118 def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir,
121 Iterate over the library
123 library_dir = os.path.normpath(library_dir) + os.path.sep
124 flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep
125 srfs_dir = os.path.normpath(srfs_dir) + os.path.sep
127 srf_names = parse_srf_directory(srfs_dir)
129 for lib_id, lib in fcdb.library.items():
130 library_path = os.path.join(library_dir, str(lib_id))
131 if not os.path.exists(library_path):
132 os.mkdir(library_path)
134 for flowcell_id, lane in lib.get('lanes', []):
135 link_all_eland_lanes(library_path,
141 link_srf_lanes(srf_names,
152 parser = OptionParser()
153 parser.add_option('-c', '--config', default=None,
154 help='path to a configuration file containing a '
155 'sequence archive section')
157 parser.add_option("--database", dest="database",
158 help="path to the fctracker.db",
160 parser.add_option('-a', '--sequence-archive', default=None,
161 help='path to where the sequence archive lives')
162 parser.add_option("-w", "--where", dest="where",
163 help="add a where clause",
166 parser.add_option('-v', '--verbose', action='store_true', default=False,
167 help='be more verbose')
168 parser.add_option('-d', '--debug', action='store_true', default=False,
169 help='report everything')
171 parser.add_option("--dry-run", dest="dry_run", action="store_true",
173 help="Don't modify the filesystem")
177 FRONTEND_NAME = 'frontend'
178 SECTION_NAME = 'sequence_archive'
179 DATABASE_OPT = 'database_name'
180 ARCHIVE_OPT = 'archive_path'
184 parser = make_parser()
186 # parse command line arguments
187 opt, args = parser.parse_args(argv)
194 level = logging.DEBUG
195 logging.basicConfig(level=level)
197 # figure out what config file to read
198 config_path = [os.path.expanduser('~/.htsworkflow.ini'),
199 '/etc/htsworkflow.ini']
200 if opt.config is not None:
201 config_path = [opt.config]
203 # parse options from config file
204 config_file = SafeConfigParser()
205 config_file.read(config_path)
207 # load defaults from config file if not overriden by the command line
209 if opt.database is None and \
210 config_file.has_option(FRONTEND_NAME, DATABASE_OPT):
211 opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT)
213 if opt.sequence_archive is None and \
214 config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
215 opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
217 # complain if critical things are missing
218 if opt.database is None:
219 parser.error('Need location of htsworkflow frontend database')
221 if opt.sequence_archive is None:
222 parser.error('Need the root path for the sequence archive')
224 fcdb = fctracker.fctracker(opt.database)
225 cells = fcdb._get_flowcells(opt.where)
227 library_dir = os.path.join(opt.sequence_archive, 'libraries')
228 flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells')
229 srfs_dir = os.path.join(opt.sequence_archive, 'srfs')
230 make_library_tree(fcdb,
231 library_dir, flowcell_dir, srfs_dir,
236 if __name__ == "__main__":
237 rv = main(sys.argv[1:])