3 from ConfigParser import SafeConfigParser
7 from optparse import OptionParser
11 from htsworkflow.util import api
12 from htsworkflow.pipelines.sequences import scan_for_sequences
14 LOGGER = logging.getLogger(__name__)
16 def build_flowcell_db(fcdb_filename, sequences, baseurl, apiid, apikey):
18 compare our flowcell database with our list of sequences and return
19 a fully populated database
21 fcdb = shelve.open(fcdb_filename)
23 apidata = {'apiid': apiid, 'apikey': apikey}
25 flowcell = seq.flowcell
28 # get info about flowcell from server or shelf
29 if not fcdb.has_key(flowcell):
30 url = api.flowcell_url(baseurl, flowcell)
31 flowcell_info = api.retrieve_info(url, apidata)
32 if flowcell_info is not None:
33 fcdb[flowcell] = flowcell_info
35 flowcell_info = fcdb[flowcell]
38 if flowcell_info is not None:
39 lane_collection = flowcell_info['lane_set'][unicode(seq.lane)]
40 if type(lane_collection) != type([]):
41 lane_collection = [lane_collection]
42 for sample in lane_collection:
43 seq_library_id = sample['library_id']
44 libdb.setdefault(seq_library_id, []).append(seq)
49 def carefully_make_hardlink(source, destination, dry_run=False):
51 Make a hard link, failing if a different link already exists
53 Checking to see if the link already exists and is
54 the same as the link we want to make.
55 If the link already exists and is different, throw an error.
57 If we didn't update anything return 0, if we did update
60 LOGGER.debug("CHECKING: %s -> %s", source, destination)
62 if not os.path.exists(source):
63 LOGGER.warning("%s doesn't exist", source)
66 if os.path.exists(destination):
67 if os.path.samefile(source, destination):
68 LOGGER.debug('SAME: %s -> %s' % (source, destination))
71 LOGGER.error('%s and %s are different files, skipping' % \
72 (source, destination))
74 LOGGER.debug('Linking: %s -> %s' % (source, destination))
76 # we would do something by this part
79 os.link(source, destination)
81 stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
84 def make_library_links(root, library_db, dry_run=False):
86 Make a tree of sequencer roots organized by library id
88 Root is the root of the library tree
89 library_db is a dictionary of SequenceFiles organized by library id
92 root = os.path.abspath(root)
93 for lib_id, sequences in library_db.items():
94 target_dir = os.path.join(root, lib_id)
95 if not os.path.exists(target_dir):
96 LOGGER.info("mkdir %s" % (target_dir,))
101 count += carefully_make_hardlink(s.path,
102 s.make_target_name(target_dir),
106 def configure_logging(opts):
112 level = logging.DEBUG
113 logging.basicConfig(level=level)
116 def configure_opts(opts):
118 Load in options from config file
120 SECTION_NAME = 'sequence_archive'
121 ARCHIVE_OPT = 'sequence_archive'
125 APIKEY_OPT = 'apikey'
127 # figure out what config file to read
128 config_path = [os.path.expanduser('~/.htsworkflow.ini'),
129 '/etc/htsworkflow.ini']
130 if opts.config is not None:
131 config_path = [opts.config]
132 # parse options from config file
133 config_file = SafeConfigParser()
134 config_file.read(config_path)
136 # load defaults from config file if not overriden by the command line
137 if opts.cache is None:
138 if config_file.has_option(SECTION_NAME, CACHE_OPT):
139 opts.cache = config_file.get(FRONTEND_NAME, CACHE_OPT)
141 opts.cache = os.path.expanduser('~/.flowcelldb.shelve')
143 if opts.sequence_archive is None and \
144 config_file.has_option(SECTION_NAME, ARCHIVE_OPT):
145 opts.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT)
146 opts.sequence_archive = os.path.expanduser(opts.sequence_archive)
148 opts.sequence_archive = os.path.abspath(opts.sequence_archive)
149 opts.library_tree = os.path.join(opts.sequence_archive, 'libraries')
150 opts.flowcells = os.path.join(opts.sequence_archive, 'flowcells')
151 opts.srfs = os.path.join(opts.sequence_archive, 'srfs')
153 if opts.host is None and config_file.has_option(SECTION_NAME, HOST_OPT):
154 opts.host = config_file.get(SECTION_NAME, HOST_OPT)
156 if opts.apiid is None and config_file.has_option(SECTION_NAME, APIID_OPT):
157 opts.apiid = config_file.get(SECTION_NAME, APIID_OPT)
159 if opts.apikey is None and config_file.has_option(SECTION_NAME, APIKEY_OPT):
160 opts.apikey = config_file.get(SECTION_NAME, APIKEY_OPT)
168 parser = OptionParser()
169 parser.add_option('-c', '--config', default=None,
170 help='path to a configuration file containing a '
171 'sequence archive section')
172 parser.add_option('--cache', default=None,
173 help="default flowcell cache")
175 parser.add_option('--host', default=None,
176 help="specify http://host for quering flowcell information")
177 parser.add_option('--apiid', default=None,
178 help="API ID to use when retriving information")
179 parser.add_option("--apikey", default=None,
180 help="API Key for when retriving information")
182 parser.add_option('-a', '--sequence-archive', default=None,
183 help='path to where the sequence archive lives')
185 parser.add_option('-v', '--verbose', action='store_true', default=False,
186 help='be more verbose')
187 parser.add_option('-d', '--debug', action='store_true', default=False,
188 help='report everything')
190 parser.add_option("--dry-run", dest="dry_run", action="store_true",
192 help="Don't modify the filesystem")
195 def main(cmdline=None):
196 parser = make_parser()
197 opts, args = parser.parse_args(cmdline)
199 configure_logging(opts)
200 opts = configure_opts(opts)
202 # complain if critical things are missing
203 if opts.cache is None:
204 parser.error('Need location of htsworkflow frontend database')
206 if opts.sequence_archive is None:
207 parser.error('Need the root path for the sequence archive')
209 seq_dirs = [ opts.flowcells, opts.srfs ]
211 seq_dirs = [os.path.abspath(f) for f in args]
213 seqs = scan_for_sequences(seq_dirs)
214 fcdb, libdb = build_flowcell_db(opts.cache, seqs, opts.host, opts.apiid, opts.apikey)
215 updates = make_library_links(opts.library_tree, libdb, dry_run=opts.dry_run)
217 LOGGER.warn("%s flowcells in database" % (len(fcdb),))
218 LOGGER.warn("found %s sequence files" % (len(seqs),))
219 LOGGER.warn("%s libraries being checked" % (len(libdb),))
220 LOGGER.warn("%s sequence files were linked" % (updates,))
224 if __name__ == "__main__":