I added a new option to the trackhub generation script.
There were some changes to the model generation to capture
relative path names and add the library URI to files to
make some queries faster.
mapper.scan_submission_dirs(results)
if opts.make_hub:
- make_hub(results)
+ make_hub(mapper, results, opts.make_hub)
+ if opts.make_manifest:
+ make_manifest(mapper, results, opts.make_manifest)
+
if opts.sparql:
sparql_query(model, opts.sparql)
print writer.serialize_model_to_string(model)
-def make_hub(results):
+def make_hub(mapper, results, filename=None):
trackdb = mapper.make_hub(results)
- manifest = mapper.make_manifest(results)
- trackstream = sys.stdout
- #with open('trackDb.txt', 'w') as trackstream:
- trackstream.write(trackdb)
+ if filename is None or filename == '-':
+ sys.stdout.write(trackdb)
+ else:
+ with open('trackDb.txt', 'w') as trackstream:
+ trackstream.write(trackdb)
- #with open('manifest.txt', 'w') as mainifeststream:
- manifeststream = sys.stdout
- mainifeststream.write(mainifest)
+def make_manifest(mapper, results, filename=None):
+ manifest = mapper.make_manifest(results)
+
+ if filename is None or filename == '-':
+ sys.stdout.write(manifest)
+ else:
+ with open(filename, 'w') as mainifeststream:
+ mainifeststream.write(manifest)
def make_parser():
parser = OptionParser()
help="generate scripts for making fastq files")
commands.add_option('--scan-submission', default=False, action="store_true",
help="Import metadata for submission into our model")
- commands.add_option('--make-hub', help='make the hub file', default=False,
- action="store_true")
+ commands.add_option('--make-hub', default=None,
+ help='name the hub file or - for stdout to create it')
+ commands.add_option('--make-manifest',
+ help='name the manifest file name or - for stdout to create it',
+ default=None)
+
parser.add_option_group(commands)
RDF.Statement(fileNode,
rdfNS['type'],
file_type))
+ self.model.add_statement(
+ RDF.Statement(fileNode,
+ libraryOntology['library'],
+ libNode))
+
LOGGER.debug("Done.")
def make_file_node(self, pathname, submissionNode):
"""
# add file specific information
path, filename = os.path.split(pathname)
- fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
+ pathname = os.path.abspath(pathname)
+ fileNode = RDF.Node(RDF.Uri('file://'+ pathname))
self.model.add_statement(
RDF.Statement(submissionNode,
dafTermOntology['has_file'],
RDF.Statement(fileNode,
dafTermOntology['filename'],
filename))
+ self.model.add_statement(
+ RDF.Statement(fileNode,
+ dafTermOntology['relative_path'],
+ os.path.relpath(pathname)))
return fileNode
def add_md5s(self, filename, fileNode, analysis_dir):
libraryOntology['has_mappings'],
dafTermOntology['has_file']))
parser = RDF.Parser(name='rdfa')
- new_statements = parser.parse_as_stream(libNode.uri)
+ try:
+ new_statements = parser.parse_as_stream(libNode.uri)
+ except RDF.RedlandError as e:
+ LOGGER.error(e)
+ return
+ LOGGER.debug("Scanning %s", str(libNode.uri))
toadd = []
for s in new_statements:
# always add "collections"
paired = ['Barcoded Illumina',
'Multiplexing',
'Nextera',
- 'Paired End (non-multiplexed)',]
+ 'Paired End (non-multiplexed)',
+ 'Dual Index Illumina',]
if library_type in single:
return False
elif library_type in paired:
metadata = metadata[0]
samples.append(metadata)
- soft_template = loader.get_template('trackDb.txt')
+ template = loader.get_template('trackDb.txt')
context = Context({
'samples': samples,
})
- return str(soft_template.render(context))
+ return str(template.render(context))
- def make_mainifest(self, result_map):
- pass
+ def make_manifest(self, result_map):
+ files = []
+ for lib_id, result_dir in result_map.items():
+ an_analysis = self.get_submission_node(result_dir)
+ metadata = self.get_manifest_metadata(an_analysis)
+ files.extend(metadata)
+
+ template = loader.get_template('manifest.txt')
+ context = Context({
+ 'files': files
+ })
+ return str(template.render(context))
def get_sample_metadata(self, analysis_node):
"""Gather information for filling out sample section of a SOFT file
results = self.execute_query(query_template, context)
return results
+
+ def get_manifest_metadata(self, analysis_node):
+ query_template = loader.get_template('trackhub_manifest.sparql')
+
+ context = Context({
+ 'submission': str(analysis_node.uri),
+ 'submissionSet': str(self.submissionSetNS[''].uri),
+ })
+ results = self.execute_query(query_template, context)
+ return results
--- /dev/null
+#file_name format output_type experiment replicate enriched_in ucsc_db{% for r in files %}
+{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }}{% endfor %}
--- /dev/null
+PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
+PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
+PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
+PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
+PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
+
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db
+WHERE {
+ <{{submission}}> a submissionOntology:submission ;
+ submissionOntology:name ?name ;
+ ucscDaf:has_file ?file .
+
+ ?file ucscDaf:filename ?filename ;
+ ucscDaf:relative_path ?relative_path ;
+ htswlib:library ?library ;
+ a ?fileClass .
+
+ ?fileClass geoSoft:fileTypeLabel ?file_format ;
+ ucscDaf:output_type ?output_type .
+
+ ?library htswlib:replicate ?replicate ;
+ ucscDaf:enriched_in ?enriched_in;
+ ucscDaf:genome_build ?ucsc_db .
+
+ ?library encode3:dataset_id ?dataset_id .
+ # This is lame! why!!!!
+ # ?library2 encode3:dcc_id ?dcc_library_id .
+ # FILTER (?library = ?library2)
+
+}