2 from ConfigParser import SafeConfigParser
8 from optparse import OptionParser, OptionGroup
10 from pprint import pprint, pformat
12 from StringIO import StringIO
20 from zipfile import ZipFile
24 from htsworkflow.util import api
25 from htsworkflow.util.rdfhelp import \
33 from htsworkflow.submission.daf import \
35 MetadataLookupException, \
37 from htsworkflow.submission.results import ResultMap
38 from htsworkflow.submission.condorfastq import CondorFastqExtract
40 logger = logging.getLogger('ucsc_gather')
42 def main(cmdline=None):
43 parser = make_parser()
44 opts, args = parser.parse_args(cmdline)
48 logging.basicConfig(level = logging.DEBUG )
50 logging.basicConfig(level = logging.INFO )
52 logging.basicConfig(level = logging.WARNING )
54 apidata = api.make_auth_from_opts(opts, parser)
56 model = get_model(opts.model, opts.db_path)
59 mapper = UCSCSubmssion(opts.name, opts.daf, model)
60 if opts.library_url is not None:
61 mapper.library_url = opts.library_url
62 submission_uri = get_submission_uri(opts.name)
65 if opts.load_rdf is not None:
66 if submission_uri is None:
67 parser.error("Please specify the submission name")
68 load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
70 if opts.make_ddf and opts.daf is None:
71 parser.error("Please specify your daf when making ddf files")
75 results.add_results_from_file(a)
77 if opts.make_tree_from is not None:
78 results.make_tree_from(opts.make_tree_from)
82 parser.error("Specify a submission model")
83 if mapper.daf is None:
84 parser.error("Please load a daf first")
85 mapper.link_daf(results)
88 extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
90 extractor.create_scripts(results)
92 if opts.scan_submission:
93 mapper.scan_submission_dirs(results)
96 make_all_ddfs(mapper, results, opts.daf, force=opts.force)
99 zip_ddfs(mapper, results, opts.daf)
102 sparql_query(model, opts.sparql)
105 writer = get_serializer()
106 print writer.serialize_model_to_string(model)
110 parser = OptionParser()
112 model = OptionGroup(parser, 'model')
113 model.add_option('--name', help="Set submission name")
114 model.add_option('--db-path', default=None,
115 help="set rdf database path")
116 model.add_option('--model', default=None,
117 help="Load model database")
118 model.add_option('--load-rdf', default=None,
119 help="load rdf statements into model")
120 model.add_option('--sparql', default=None, help="execute sparql query")
121 model.add_option('--print-rdf', action="store_true", default=False,
122 help="print ending model state")
123 parser.add_option_group(model)
125 commands = OptionGroup(parser, 'commands')
126 commands.add_option('--make-tree-from',
127 help="create directories & link data files",
129 commands.add_option('--fastq', default=False, action="store_true",
130 help="generate scripts for making fastq files")
131 commands.add_option('--scan-submission', default=False, action="store_true",
132 help="Import metadata for submission into our model")
133 commands.add_option('--link-daf', default=False, action="store_true",
134 help="link daf into submission directories")
135 commands.add_option('--make-ddf', help='make the ddfs', default=False,
137 commands.add_option('--zip-ddf', default=False, action='store_true',
138 help='zip up just the metadata')
140 parser.add_option_group(commands)
142 parser.add_option('--force', default=False, action="store_true",
143 help="Force regenerating fastqs")
144 parser.add_option('--daf', default=None, help='specify daf name')
145 parser.add_option('--library-url', default=None,
146 help="specify an alternate source for library information")
148 parser.add_option('--verbose', default=False, action="store_true",
149 help='verbose logging')
150 parser.add_option('--debug', default=False, action="store_true",
151 help='debug logging')
153 api.add_auth_options(parser)
158 def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
160 for lib_id, result_dir in library_result_map:
161 submissionNode = view_map.get_submission_node(result_dir)
163 make_ddf(view_map, submissionNode, daf_name, make_condor, result_dir)
166 if make_condor and len(dag_fragment) > 0:
167 dag_filename = 'submission.dagman'
168 if not force and os.path.exists(dag_filename):
169 logger.warn("%s exists, please delete" % (dag_filename,))
171 f = open(dag_filename,'w')
172 f.write( os.linesep.join(dag_fragment))
173 f.write( os.linesep )
177 def make_ddf(view_map, submissionNode, daf_name, make_condor=False, outdir=None):
179 Make ddf files, and bonus condor file
181 query_template = """PREFIX libraryOntology: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
182 PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
183 PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
185 select ?submitView ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?strain ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength ?replicate ?mapAlgorithm
187 ?file ucscDaf:filename ?files ;
188 ucscDaf:md5sum ?md5sum .
189 ?submitView ucscDaf:has_file ?file ;
190 ucscDaf:view ?dafView ;
191 ucscDaf:submission <%(submission)s> .
192 ?dafView ucscDaf:name ?view .
193 <%(submission)s> submissionOntology:library ?library ;
195 OPTIONAL { ?library libraryOntology:antibody ?antibody }
196 OPTIONAL { ?library libraryOntology:cell_line ?cell }
197 OPTIONAL { <%(submission)s> ucscDaf:control ?control }
198 OPTIONAL { <%(submission)s> ucscDaf:controlId ?controlId }
199 OPTIONAL { ?library ucscDaf:sex ?sex }
200 OPTIONAL { ?library libraryOntology:library_id ?labExpId }
201 OPTIONAL { ?library libraryOntology:library_id ?labVersion }
202 OPTIONAL { ?library libraryOntology:replicate ?replicate }
203 OPTIONAL { ?library libraryOntology:condition_term ?treatment }
204 OPTIONAL { ?library ucscDaf:protocol ?protocol }
205 OPTIONAL { ?library ucscDaf:readType ?readType }
206 OPTIONAL { ?library ucscDaf:strain ?strain }
207 OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
208 OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
210 ORDER BY ?submitView"""
213 name = fromTypedNode(view_map.model.get_target(submissionNode, submissionOntology['name']))
215 logger.error("Need name for %s" % (str(submissionNode)))
218 ddf_name = make_ddf_name(name)
219 if outdir is not None:
220 outfile = os.path.join(outdir, ddf_name)
221 output = open(outfile,'w')
226 formatted_query = query_template % {'submission': str(submissionNode.uri)}
228 query = RDF.SPARQLQuery(formatted_query)
229 results = query.execute(view_map.model)
231 # filename goes first
232 variables = view_map.get_daf_variables()
234 output.write('\t'.join(variables))
235 output.write(os.linesep)
240 viewname = fromTypedNode(row['view'])
241 current = all_views.setdefault(viewname, {})
242 for variable_name in variables:
243 value = str(fromTypedNode(row[variable_name]))
244 if value is None or value == 'None':
245 logger.warn("{0}: {1} was None".format(outfile, variable_name))
246 if variable_name in ('files', 'md5sum'):
247 current.setdefault(variable_name,[]).append(value)
249 current[variable_name] = value
251 for view in all_views.keys():
253 for variable_name in variables:
254 if variable_name in ('files', 'md5sum'):
255 line.append(','.join(all_views[view][variable_name]))
257 line.append(all_views[view][variable_name])
258 output.write("\t".join(line))
259 output.write(os.linesep)
260 all_files.extend(all_views[view]['files'])
263 "Examined {0}, found files: {1}".format(
264 str(submissionNode), ", ".join(all_files)))
266 all_files.append(daf_name)
267 all_files.append(ddf_name)
270 archive_condor = make_condor_archive_script(name, all_files, outdir)
271 upload_condor = make_condor_upload_script(name, outdir)
273 dag_fragments.extend(
274 make_dag_fragment(name, archive_condor, upload_condor)
280 def zip_ddfs(view_map, library_result_map, daf_name):
281 """zip up just the ddf & daf files
283 rootdir = os.getcwd()
284 for lib_id, result_dir in library_result_map:
285 submissionNode = view_map.get_submission_node(result_dir)
286 nameNode = view_map.model.get_target(submissionNode,
287 submissionOntology['name'])
288 name = fromTypedNode(nameNode)
290 logger.error("Need name for %s" % (str(submissionNode)))
293 zip_name = '../{0}.zip'.format(lib_id)
294 os.chdir(os.path.join(rootdir, result_dir))
295 with ZipFile(zip_name, 'w') as stream:
296 stream.write(make_ddf_name(name))
297 stream.write(daf_name)
301 def make_condor_archive_script(name, files, outdir=None):
302 script = """Universe = vanilla
304 Executable = /bin/tar
305 arguments = czvhf ../%(archivename)s %(filelist)s
307 Error = compress.out.$(Process).log
308 Output = compress.out.$(Process).log
309 Log = /tmp/submission-compress-%(user)s.log
310 initialdir = %(initialdir)s
311 environment="GZIP=-3"
319 pathname = os.path.join(outdir, f)
320 if not os.path.exists(pathname):
321 raise RuntimeError("Missing %s from %s" % (f,outdir))
323 context = {'archivename': make_submission_name(name),
324 'filelist': " ".join(files),
325 'initialdir': os.path.abspath(outdir),
326 'user': os.getlogin()}
328 condor_script = os.path.join(outdir, make_condor_name(name, 'archive'))
329 condor_stream = open(condor_script,'w')
330 condor_stream.write(script % context)
331 condor_stream.close()
335 def make_condor_upload_script(name, outdir=None):
336 script = """Universe = vanilla
338 Executable = /usr/bin/lftp
339 arguments = -c put ../%(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
341 Error = upload.out.$(Process).log
342 Output = upload.out.$(Process).log
343 Log = /tmp/submission-upload-%(user)s.log
344 initialdir = %(initialdir)s
351 auth = netrc.netrc(os.path.expanduser("~diane/.netrc"))
353 encodeftp = 'encodeftp.cse.ucsc.edu'
354 ftpuser = auth.hosts[encodeftp][0]
355 ftppassword = auth.hosts[encodeftp][2]
356 context = {'archivename': make_submission_name(name),
357 'initialdir': os.path.abspath(outdir),
358 'user': os.getlogin(),
360 'ftppassword': ftppassword,
361 'ftphost': encodeftp}
363 condor_script = os.path.join(outdir, make_condor_name(name, 'upload'))
364 condor_stream = open(condor_script,'w')
365 condor_stream.write(script % context)
366 condor_stream.close()
367 os.chmod(condor_script, stat.S_IREAD|stat.S_IWRITE)
372 def make_dag_fragment(ininame, archive_condor, upload_condor):
374 Make the couple of fragments compress and then upload the data.
376 cur_dir = os.getcwd()
377 archive_condor = os.path.join(cur_dir, archive_condor)
378 upload_condor = os.path.join(cur_dir, upload_condor)
379 job_basename = make_base_name(ininame)
382 fragments.append('JOB %s_archive %s' % (job_basename, archive_condor))
383 fragments.append('JOB %s_upload %s' % (job_basename, upload_condor))
384 fragments.append('PARENT %s_archive CHILD %s_upload' % (job_basename, job_basename))
389 def make_base_name(pathname):
390 base = os.path.basename(pathname)
391 name, ext = os.path.splitext(base)
395 def make_submission_name(ininame):
396 name = make_base_name(ininame)
400 def make_ddf_name(pathname):
401 name = make_base_name(pathname)
405 def make_condor_name(pathname, run_type=None):
406 name = make_base_name(pathname)
408 if run_type is not None:
409 elements.append(run_type)
410 elements.append("condor")
411 return ".".join(elements)
414 def parse_filelist(file_string):
415 return file_string.split(",")
418 def validate_filelist(files):
420 Die if a file doesn't exist in a file list
423 if not os.path.exists(f):
424 raise RuntimeError("%s does not exist" % (f,))
426 if __name__ == "__main__":