LOGGER = logging.getLogger(__name__)
class Submission(object):
- def __init__(self, name, model):
+ def __init__(self, name, model, host):
self.name = name
self.model = model
self.submissionSet = get_submission_uri(self.name)
self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
- self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
+ self.libraryNS = RDF.NS('{0}/library/'.format(host))
self.__view_map = None
submission_files = os.listdir(analysis_dir)
for filename in submission_files:
- self.construct_file_attributes(analysis_dir, libNode, filename)
+ pathname = os.path.abspath(os.path.join(analysis_dir, filename))
+ self.construct_file_attributes(analysis_dir, libNode, pathname)
def construct_file_attributes(self, analysis_dir, libNode, pathname):
"""Looking for the best extension
rdfNS['type'])
if file_classification is None:
errmsg = 'Could not find class for {0}'
- logger.warning(errmsg.format(str(file_type)))
+ LOGGER.warning(errmsg.format(str(file_type)))
return
self.model.add_statement(
an_analysis))
# add file specific information
- fileNode = self.link_file_to_classes(filename,
+ fileNode = self.link_file_to_classes(pathname,
an_analysis,
an_analysis_uri,
analysis_dir)
file_type))
LOGGER.debug("Done.")
- def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
+ def link_file_to_classes(self, pathname, submissionNode, submission_uri, analysis_dir):
# add file specific information
- fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
+ path, filename = os.path.split(pathname)
+ fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
self.model.add_statement(
RDF.Statement(submissionNode,
dafTermOntology['has_file'],
RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
def _add_library_details_to_model(self, libNode):
+ # attributes that can have multiple values
+ set_attributes = set((libraryOntology['has_lane'],
+ libraryOntology['has_mappings'],
+ dafTermOntology['has_file']))
parser = RDF.Parser(name='rdfa')
new_statements = parser.parse_as_stream(libNode.uri)
+ toadd = []
for s in new_statements:
+ # always add "collections"
+ if s.predicate in set_attributes:
+ toadd.append(s)
+ continue
# don't override things we already have in the model
targets = list(self.model.get_targets(s.subject, s.predicate))
if len(targets) == 0:
- self.model.append(s)
+ toadd.append(s)
+
+ for s in toadd:
+ self.model.append(s)
+
+ self._add_lane_details(libNode)
+
+ def _add_lane_details(self, libNode):
+ """Import lane details
+ """
+ query = RDF.Statement(libNode, libraryOntology['has_lane'], None)
+ lanes = []
+ for lane_stmt in self.model.find_statements(query):
+ lanes.append(lane_stmt.object)
+
+ parser = RDF.Parser(name='rdfa')
+ for lane in lanes:
+ LOGGER.debug("Importing %s" % (lane.uri,))
+ try:
+ parser.parse_into_model(self.model, lane.uri)
+ except RDF.RedlandError, e:
+ LOGGER.error("Error accessing %s" % (lane.uri,))
+ raise e
def find_best_match(self, filename):
query = RDF.SPARQLQuery(str(formatted_query))
rdfstream = query.execute(self.model)
results = []
- for r in rdfstream:
- results.append(r)
+ for record in rdfstream:
+ d = {}
+ for key, value in record.items():
+ d[key] = fromTypedNode(value)
+ results.append(d)
return results