model = get_model(opts.model, opts.db_path)
mapper = None
if opts.name:
- mapper = GEOSubmission(opts.name, model)
- if opts.library_url is not None:
- mapper.library_url = opts.library_url
+ mapper = GEOSubmission(opts.name, model, host=opts.host)
submission_uri = get_submission_uri(opts.name)
imported = False
a_lane = self.model.get_target(r['flowcell'],
libraryOntology['has_lane'])
- print a_lane
if a_lane is None:
imported = True
# we lack information about which lanes were on this flowcell
LOGGER = logging.getLogger(__name__)
class GEOSubmission(Submission):
- def __init__(self, name, model):
- super(GEOSubmission, self).__init__(name, model)
+ def __init__(self, name, model, host):
+ super(GEOSubmission, self).__init__(name, model, host)
def make_soft(self, result_map):
samples = []
LOGGER.error(errmsg.format(str(an_analysis),))
continue
elif len(metadata) > 1:
- errmsg = 'Confused there are more than one samples for %s'
+ errmsg = 'Confused there are more than one sample for %s'
LOGGER.debug(errmsg % (str(an_analysis),))
metadata = metadata[0]
metadata['raw'] = self.get_raw_files(an_analysis)
LOGGER = logging.getLogger(__name__)
class Submission(object):
- def __init__(self, name, model):
+ def __init__(self, name, model, host):
self.name = name
self.model = model
self.submissionSet = get_submission_uri(self.name)
self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
- self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
+ self.libraryNS = RDF.NS('{0}/library/'.format(host))
self.__view_map = None
submission_files = os.listdir(analysis_dir)
for filename in submission_files:
- self.construct_file_attributes(analysis_dir, libNode, filename)
+ pathname = os.path.abspath(os.path.join(analysis_dir, filename))
+ self.construct_file_attributes(analysis_dir, libNode, pathname)
def construct_file_attributes(self, analysis_dir, libNode, pathname):
"""Looking for the best extension
an_analysis))
# add file specific information
- fileNode = self.link_file_to_classes(filename,
+ fileNode = self.link_file_to_classes(pathname,
an_analysis,
an_analysis_uri,
analysis_dir)
file_type))
LOGGER.debug("Done.")
- def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
+ def link_file_to_classes(self, pathname, submissionNode, submission_uri, analysis_dir):
# add file specific information
- fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename)))
+ path, filename = os.path.split(pathname)
+ fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
self.model.add_statement(
RDF.Statement(submissionNode,
dafTermOntology['has_file'],
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
- <{{submission}}> submissionOntology:library ?library ;
+ <{{submission}}> ucscDaf:has_file ?file ;
+ submissionOntology:library ?library ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
- libraryOntology:library ?library ;
+ dc:source ?source ;
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
- ?library libraryOntology:has_lane ?lane .
- ?lane libraryOntology:flowcell ?flowcell .
+ ?source libraryOntology:flowcell ?flowcell .
+
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
select distinct ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
<{{submission}}> ucscDaf:has_file ?file ;
+ submissionOntology:library ?library ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
- libraryOntology:has_lane ?lane ;
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
- OPTIONAL { ?lane libraryOntology:flowcell ?flowcell .
+ OPTIONAL { ?file dc:source ?source_file .
+ ?source_file libraryOntology:flowcell ?flowcell .
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;