extractor.create_scripts(results)
if opts.scan_submission:
+ if opts.name is None:
+ parser.error("Please define a submission name")
mapper.scan_submission_dirs(results)
if opts.make_soft:
rdfNS['type'],
submissionOntology['submission']))
self.model.add_statement(RDF.Statement(submissionNode,
- submissionOntology['library'],
+ libraryOntology['library'],
libNode))
LOGGER.debug("Adding statements to {0}".format(str(submissionView)))
for lib_id, result_dir in result_map.items():
an_analysis = self.get_submission_node(result_dir)
metadata = self.get_sample_metadata(an_analysis)
- if len(metadata) > 1:
+ if len(metadata) == 0:
+ errmsg = 'No metadata found for {0}'
+ LOGGER.error(errmsg.format(str(an_analysis),))
+ continue
+ elif len(metadata) > 1:
errmsg = 'Confused there are more than one samples for %s'
- LOGGER.debug(errmsg % (str(an_analysis,)))
+ LOGGER.debug(errmsg % (str(an_analysis),))
metadata = metadata[0]
metadata['raw'] = self.get_raw_files(an_analysis)
metadata['supplimental'] = self.get_sample_files(an_analysis)
rdfNS['type'])
if file_classification is None:
errmsg = 'Could not find class for {0}'
- logger.warning(errmsg.format(str(file_type)))
+ LOGGER.warning(errmsg.format(str(file_type)))
return
self.model.add_statement(
select distinct ?name ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate, ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?experiment_type ?library_selection ?library_source
WHERE {
- <{{submission}}> a submissionOntology:submission .
+ <{{submission}}> a submissionOntology:submission ;
+ submissionOntology:library ?library ;
+ submissionOntology:name ?name .
OPTIONAL { <{{submission}}> ucscDaf:control ?control }
OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId }
OPTIONAL { ?library libraryOntology:antibody ?antibody }
OPTIONAL { ?library libraryOntology:cell_line ?cell .
- ?cell_line cells:cell ?cell ;
- cells:documents ?growthProtocol . }
+ OPTIONAL { ?cell_line cells:cell ?cell ;
+ cells:documents ?growthProtocol . }}
OPTIONAL { ?library ucscDaf:sex ?sex }
OPTIONAL { ?library libraryOntology:library_id ?library_id }
OPTIONAL { ?library libraryOntology:replicate ?replicate }
- OPTIONAL { ?library libraryOntology:species ?species_name }
+ OPTIONAL { ?library libraryOntology:species ?species_name .
+ ?species libraryOntology:species ?species_name ;
+ libraryOntology:taxon_id ?taxon_id . }
OPTIONAL { ?library libraryOntology:condition_term ?treatment }
OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type }
OPTIONAL { ?library libraryOntology:librarySelection ?library_selection }
OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
- <{{submission}}> submissionOntology:library ?library ;
- submissionOntology:name ?name .
?species libraryOntology:species ?species_name ;
libraryOntology:taxon_id ?taxon_id .
if len(url_parts[0]) == 0 or url_parts[0] == 'file':
url_parts[0] = 'file'
url_parts[2] = os.path.abspath(url_parts[2])
- if parser_name is None or parser_name == 'guess':
- parser_name = guess_parser_by_extension(path)
+ if parser_name is None or parser_name == 'guess':
+ parser_name = guess_parser_by_extension(path)
url = urlunparse(url_parts)
logger.info("Opening {0} with parser {1}".format(url, parser_name))
rdf_parser = RDF.Parser(name=parser_name)
+ statements = []
retries = 3
while retries > 0:
try:
retries = 0
except RDF.RedlandError, e:
errmsg = "RDF.RedlandError: {0} {1} tries remaining"
- logger.error(errmsg.format(str(e), tries))
+ logger.error(errmsg.format(str(e), retries))
for s in statements:
conditionally_add_statement(model, s, ns)
return 'turtle'
elif content_type in ('text/html',):
return 'rdfa'
- elif content_type is None:
+ elif content_type is None or content_type in ('text/plain',):
return guess_parser_by_extension(pathname)
def guess_parser_by_extension(pathname):
_, ext = os.path.splitext(pathname)
if ext in ('.xml', '.rdf'):
return 'rdfxml'
- elif ext in ('.html'):
+ elif ext in ('.html',):
return 'rdfa'
- elif ext in ('.turtle'):
+ elif ext in ('.turtle',):
return 'turtle'
return 'guess'
('/a/b/c.rdf', 'rdfxml'),
('/a/b/c.xml', 'rdfxml'),
('/a/b/c.html', 'rdfa'),
- ('/a/b/c.turtle', 'turtle')]
+ ('/a/b/c.turtle', 'turtle'),
+ ('http://foo.bar/bleem.turtle', 'turtle')]
for path, parser in DATA:
self.assertEqual(guess_parser_by_extension(path), parser)
self.assertEqual(guess_parser(None, path), parser)
DATA = [
('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
('application/x-turtle', 'http://a.org/b/c', 'turtle'),
- ('text/html', 'http://a.org/b/c', 'rdfa')
+ ('text/html', 'http://a.org/b/c', 'rdfa'),
+ ('text/html', 'http://a.org/b/c.html', 'rdfa'),
+ ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
+ ('text/plain', 'http://a.org/b/c', 'guess')
]
for contenttype, url, parser in DATA:
self.assertEqual(guess_parser(contenttype, url), parser)