2 Make a tree of symlinks organized by library id.
6 from optparse import OptionParser
11 from htsworkflow.util import fctracker
14 def find_lanes(flowcell_dir, flowcell_id, lane):
15 lane_name = "s_%s_eland_result*" %(lane)
16 pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name)
20 def make_long_lane_name(flowcell_dir, lane_pathname):
22 make a name from the eland result file name
24 if flowcell_dir == lane_pathname[0:len(flowcell_dir)]:
25 subpath = lane_pathname[len(flowcell_dir):]
26 long_name = subpath.replace(os.path.sep, "_")
31 def parse_srf_directory(srf_dir):
33 search srf_dir for *.srf files
35 builds a dictionary indexed by flowcell name.
38 srfs = glob(os.path.join(srf_dir,'*.srf'))
40 path, filename = os.path.split(pathname)
41 basename, ext = os.path.splitext(filename)
42 record = basename.split('_')
43 assert len(record) == 6
49 flowcellid = record[4]
52 desc = "_".join([site,date,machine,runid,flowcellid])
53 flowcells[flowcellid] = desc
57 def carefully_make_hardlink(source, destination, dry_run=False):
59 Make a hard link, failing if a different link already exists
61 Checking to see if the link already exists and is
62 the same as the link we want to make.
63 If the link already exists and is different, throw an error.
65 logging.debug("%s -> %s", source, destination)
67 if not os.path.exists(source):
68 logging.warning("%s doesn't exist", source)
71 if os.path.exists(destination):
72 if os.path.samefile(source, destination):
75 raise IOError('%s and %s are different files' % \
76 (source, destination))
80 os.link(source, destination)
82 stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
84 def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run):
86 find eland files at different alignment lengths
87 and put each of those in the file
89 lanes = find_lanes(flowcell_dir, flowcell_id, lane)
90 for lane_pathname in lanes:
91 long_name = make_long_lane_name(flowcell_dir,
93 long_pathname = os.path.join(library_path, long_name)
94 carefully_make_hardlink(lane_pathname,
98 def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run):
100 Link srf files into our library directories.
102 the srf files must be named:
103 <site>_<date>_<machine>_<run>_<flowcellid>_<lane>.srf
105 srf_basename = srf_names.get(flowcell_id, None)
106 if srf_basename is None:
107 logging.info("srf file for %s was not found", flowcell_id)
109 srf_filename = "%s_%s.srf" % (srf_basename, lane)
110 source = os.path.join(srf_dir, srf_filename)
111 destination = os.path.join(library_path, srf_filename)
112 carefully_make_hardlink(source, destination, dry_run)
115 def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir,
118 Iterate over the library
120 library_dir = os.path.normpath(library_dir) + os.path.sep
121 flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep
122 srfs_dir = os.path.normpath(srfs_dir) + os.path.sep
124 srf_names = parse_srf_directory(srfs_dir)
126 for lib_id, lib in fcdb.library.items():
127 library_path = os.path.join(library_dir, str(lib_id))
128 if not os.path.exists(library_path):
129 os.mkdir(library_path)
131 for flowcell_id, lane in lib.get('lanes', []):
132 link_all_eland_lanes(library_path,
138 link_srf_lanes(srf_names,
149 parser = OptionParser()
150 parser.add_option("-d", "--database", dest="database",
151 help="path to the fctracker.db",
153 parser.add_option("-w", "--where", dest="where",
154 help="add a where clause",
156 parser.add_option("--dry-run", dest="dry_run", action="store_true",
158 help="Don't modify the filesystem")
162 logging.basicConfig(level=logging.INFO)
166 parser = make_parser()
168 opt, args = parser.parse_args(argv)
170 fcdb = fctracker.fctracker(opt.database)
171 cells = fcdb._get_flowcells(opt.where)
173 root_dir = '/woldlab/loxcyc/data00/solexa-sequence'
174 library_dir = os.path.join(root_dir, 'libraries')
175 flowcell_dir = os.path.join(root_dir, 'flowcells')
176 srfs_dir = os.path.join(root_dir, 'srfs')
177 make_library_tree(fcdb,
178 library_dir, flowcell_dir, srfs_dir,
183 if __name__ == "__main__":
184 rv = main(sys.argv[1:])