Instead just query the library directly.
This involved updating the sparql query in make_ddf a bit.
Associated with doing that I also changed when I import
data from the RDFa pages, so I only connect to the web server
once per library, instead of once per scanned file per attribute.
That provided a significant performance improvement.
def main(cmdline=None):
parser = make_parser()
opts, args = parser.parse_args(cmdline)
+ submission_uri = None
if opts.debug:
logging.basicConfig(level = logging.DEBUG )
model = get_model(opts.load_model)
if opts.name:
mapper = DAFMapper(opts.name, opts.daf, model)
+ if opts.library_url is not None:
+ mapper.library_url = opts.library_url
submission_uri = get_submission_uri(opts.name)
-
- if opts.library_url is not None:
- mapper.library_url = opts.library_url
+
if opts.load_rdf is not None:
+ if submission_uri is None:
+ parser.error("Please specify the submission name")
load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
if opts.make_ddf and opts.daf is None:
PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
-select ?submitView ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?controlId ?labExpId ?labVersion ?treatment ?protocol
+select ?submitView ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength
WHERE {
?file ucscDaf:filename ?files ;
ucscDaf:md5sum ?md5sum .
ucscDaf:view ?dafView ;
ucscDaf:submission <%(submission)s> .
?dafView ucscDaf:name ?view .
- <%(submission)s> submissionOntology:library ?library .
-
- OPTIONAL { ?submitView ucscDaf:antibody ?antibody }
- OPTIONAL { ?submitView ucscDaf:cell ?cell }
- OPTIONAL { ?submitView ucscDaf:control ?control }
- OPTIONAL { ?library ucscDaf:controlId ?controlId }
- OPTIONAL { ?submitView ucscDaf:sex ?sex }
- OPTIONAL { ?submitView ucscDaf:labVersion ?labExpId }
- OPTIONAL { ?submitView ucscDaf:labVersion ?labVersion }
- OPTIONAL { ?library ucscDaf:treatment ?treatment }
- OPTIONAL { ?submitView ucscDaf:protocol ?protocol }
+ <%(submission)s> submissionOntology:library ?library ;
+
+ OPTIONAL { ?library libraryOntology:antibody ?antibody }
+ OPTIONAL { ?library libraryOntology:cell_line ?cell }
+ OPTIONAL { <%(submission)s> ucscDaf:control ?control }
+ OPTIONAL { <%(submission)s> ucscDaf:controlId ?controlId }
+ OPTIONAL { ?library ucscDaf:sex ?sex }
+ OPTIONAL { ?library libraryOntology:library_id ?labExpId }
+ OPTIONAL { ?library libraryOntology:library_id ?labVersion }
+ OPTIONAL { ?library libraryOntology:condition ?treatment }
+ OPTIONAL { ?library ucscDaf:protocol ?protocol }
+ OPTIONAL { ?library ucscDaf:readType ?readType }
+ OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
}
ORDER BY ?submitView"""
dag_fragments = []
variables = ['files']
# filename goes first
variables.extend(view_map.get_daf_variables())
- variables += ['controlId', 'labExpId', 'md5sum']
+ # 'controlId',
+ variables += [ 'labExpId', 'md5sum']
output.write('\t'.join(variables))
output.write(os.linesep)
for f in files:
pathname = os.path.join(outdir, f)
if not os.path.exists(pathname):
- raise RuntimeError("Missing %s" % (f,))
+ raise RuntimeError("Missing %s from %s" % (f,outdir))
context = {'archivename': make_submission_name(name),
'filelist': " ".join(files),
#attributes = get_filename_attribute_map(paired)
libNode = self.libraryNS[library_id + "/"]
+ self._add_library_details_to_model(libNode)
+
submission_files = os.listdir(submission_dir)
for f in submission_files:
self.construct_file_attributes(submission_dir, libNode, f)
"""
path, filename = os.path.split(pathname)
+ logger.debug("Searching for view")
view = self.find_view(filename)
if view is None:
logger.warn("Unrecognized file: %s" % (pathname,))
submission_name = self.make_submission_name(submission_dir)
submissionNode = self.get_submission_node(submission_dir)
submission_uri = str(submissionNode.uri)
- view_name = str(fromTypedNode(self.model.get_target(view, dafTermOntology['name'])))
+ view_name = fromTypedNode(self.model.get_target(view, dafTermOntology['name']))
+ if view_name is None:
+ logging.warning('Could not find view name for {0}'.format(str(view)))
+ return
+
+ view_name = str(view_name)
submissionView = RDF.Node(RDF.Uri(submission_uri + '/' + view_name))
self.model.add_statement(
RDF.Statement(self.submissionSet, dafTermOntology['has_submission'], submissionNode))
-
+ logger.debug("Adding statements to {0}".format(str(submissionNode)))
self.model.add_statement(RDF.Statement(submissionNode, submissionOntology['has_view'], submissionView))
self.model.add_statement(RDF.Statement(submissionNode, submissionOntology['name'], toTypedNode(submission_name)))
self.model.add_statement(RDF.Statement(submissionNode, rdfNS['type'], submissionOntology['submission']))
self.model.add_statement(RDF.Statement(submissionNode, submissionOntology['library'], libNode))
-
+
+ logger.debug("Adding statements to {0}".format(str(submissionView)))
# add trac specific information
self.model.add_statement(
RDF.Statement(submissionView, dafTermOntology['view'], view))
]
terms.extend((dafTermOntology[v] for v in self.get_daf_variables()))
- # Add everything I can find
- for term in terms:
- value = str(self._get_library_attribute(libNode, term))
- if value is not None:
- self.model.add_statement(RDF.Statement(submissionView, term, value))
-
# add file specific information
+ logger.debug("Updating file md5sum")
fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
submission_pathname = os.path.join(submission_dir, filename)
md5 = make_md5sum(submission_pathname)
self.model.add_statement(
RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
-
+ logger.debug("Done.")
+
def _add_library_details_to_model(self, libNode):
parser = RDF.Parser(name='rdfa')
new_statements = parser.parse_as_stream(libNode.uri)
for s in new_statements:
# don't override things we already have in the model
- q = RDF.Statement(s.subject, s.predicate, None)
- if len(list(self.model.find_statements(q))) == 0:
+ targets = list(self.model.get_targets(s.subject, s.predicate))
+ if len(targets) == 0:
self.model.append(s)
-
- statements = list(self.model.find_statements(q))
- if len(statements) == 0:
- logger.warning("Nothing known about %s" % (str(libNode),))
def get_daf_variables(self):
"""Returns simple variables names that to include in the ddf
if not isinstance(attribute, RDF.Node):
attribute = libraryOntology[attribute]
- # search through the model twice (adding in data from website)
- for i in xrange(2):
- targets = list(self.model.get_targets(libNode, attribute))
- if len(targets) > 0:
- return self._format_library_attribute(targets)
+ targets = list(self.model.get_targets(libNode, attribute))
+ if len(targets) > 0:
+ return self._format_library_attribute(targets)
+ else:
+ return None
- targets = self._search_same_as(libNode, attribute)
- if targets is not None:
- return self._format_library_attribute(targets)
-
- # we don't know anything about this attribute
- self._add_library_details_to_model(libNode)
+ #targets = self._search_same_as(libNode, attribute)
+ #if targets is not None:
+ # return self._format_library_attribute(targets)
+
+ # we don't know anything about this attribute
+ self._add_library_details_to_model(libNode)
+
+ targets = list(self.model.get_targets(libNode, attribute))
+ if len(targets) > 0:
+ return self._format_library_attribute(targets)
return None
-
+
def _format_library_attribute(self, targets):
if len(targets) == 0:
return None
else:
return None
-
+ def get_view_name(self, view):
+ names = list(self.model.get_targets(view, submissionOntology['view_name']))
+ if len(names) == 1:
+ return fromTypedNode(names[0])
+ else:
+ msg = "Found wrong number of view names for {0} len = {1}"
+ msg = msg.format(str(view), len(names))
+ logger.error(msg)
+ raise RuntimeError(msg)
+
+
def _get_filename_view_map(self):
"""Query our model for filename patterns
name = model.get_target(signal_view_node, dafTermOntology['name'])
self.failUnlessEqual(fromTypedNode(name), u'Signal')
-def load_daf_mapper(name, extra_statements=None):
+def load_daf_mapper(name, extra_statements=None, ns=None):
"""Load test model in
"""
model = get_model()
+ if ns is None:
+ ns="http://extra"
+
if extra_statements is not None:
parser = RDF.Parser(name='turtle')
parser.parse_string_into_model(model, extra_statements,
- 'http://extra.extra')
+ ns)
test_daf_stream = StringIO(test_daf)
mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
#self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
def test_find_one_view(self):
+ name='testfind'
extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
+@prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
-<%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
-<%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
-''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
-
- daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
+thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
+thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
+'''.format(name)
+ daf_mapper = load_daf_mapper(name, extra_statements = extra)
view = daf_mapper.find_view('filename_r1.fastq')
- self.failUnlessEqual(str(view),
- str(submissionLog['testfind/view/FastqRd1']))
-
- #writer = get_serializer()
- #turtle = writer.serialize_model_to_string(model)
- #print turtle
+
+ # dump_model(daf_mapper.model)
+ view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
+ view_root = view_root.format(name)
+ self.failUnlessEqual(str(view), '<{0}{1}>'.format(view_root,'FastqRd1'))
def test_find_overlapping_view(self):
+ name = 'testfind'
extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
+@prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
-<%(submissionLog)s/testfind/view/fastq> dafTerm:filename_re ".*\\\\.fastq" .
-<%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
-''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
-
- daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
+thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
+thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
+'''.format(name)
+ daf_mapper = load_daf_mapper(name, extra_statements = extra)
self.failUnlessRaises(daf.ModelException,
daf_mapper.find_view,
lib_id = '11204'
lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
+@prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
+@prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-<%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
-<%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*\\\\.fastq" .
+thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
+ submissionOntology:view_name "Signal" .
+thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
+ submissionOntology:view_name "FastqRd1" .
<%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
-''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog',
- 'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
+''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
'libUrl': lib_url}
daf_mapper = load_daf_mapper('testfind', extra)
libNode,
filename)
+ #dump_model(daf_mapper.model)
+
sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
submission_name = sub_root + analysis_name
source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
-
self.failUnlessEqual(str(source.uri), submission_name)
view_name = submission_name + '/Signal'
view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
self.failUnlessEqual(str(view.uri), view_name)
+
def test_library_url(self):
daf_mapper = load_daf_mapper('urltest')
os.close(fd)
os.unlink(pathname)
print "unmade", pathname
+
def suite():
suite = unittest.makeSuite(TestDAF, 'test')