To make working with the development server easier, I changed
the submission class to take a host which it will use to generate
the base library url.
When constructing URLs for files, I'm now using the actual path names
instead of synthesizing something based on the submission name.
This is to limit the amount of knowledge that needs to be passed
between the fastq generation code.
For fastq files it looks at the source file to find the flowcell
information. For supplemental files it looks at the submission
class for that analysis directory and grabs the library id
from there.
model = get_model(opts.model, opts.db_path)
mapper = None
if opts.name:
model = get_model(opts.model, opts.db_path)
mapper = None
if opts.name:
- mapper = GEOSubmission(opts.name, model)
- if opts.library_url is not None:
- mapper.library_url = opts.library_url
+ mapper = GEOSubmission(opts.name, model, host=opts.host)
submission_uri = get_submission_uri(opts.name)
submission_uri = get_submission_uri(opts.name)
imported = False
a_lane = self.model.get_target(r['flowcell'],
libraryOntology['has_lane'])
imported = False
a_lane = self.model.get_target(r['flowcell'],
libraryOntology['has_lane'])
if a_lane is None:
imported = True
# we lack information about which lanes were on this flowcell
if a_lane is None:
imported = True
# we lack information about which lanes were on this flowcell
LOGGER = logging.getLogger(__name__)
class GEOSubmission(Submission):
LOGGER = logging.getLogger(__name__)
class GEOSubmission(Submission):
- def __init__(self, name, model):
- super(GEOSubmission, self).__init__(name, model)
+ def __init__(self, name, model, host):
+ super(GEOSubmission, self).__init__(name, model, host)
def make_soft(self, result_map):
samples = []
def make_soft(self, result_map):
samples = []
LOGGER.error(errmsg.format(str(an_analysis),))
continue
elif len(metadata) > 1:
LOGGER.error(errmsg.format(str(an_analysis),))
continue
elif len(metadata) > 1:
- errmsg = 'Confused there are more than one samples for %s'
+ errmsg = 'Confused there are more than one sample for %s'
LOGGER.debug(errmsg % (str(an_analysis),))
metadata = metadata[0]
metadata['raw'] = self.get_raw_files(an_analysis)
LOGGER.debug(errmsg % (str(an_analysis),))
metadata = metadata[0]
metadata['raw'] = self.get_raw_files(an_analysis)
LOGGER = logging.getLogger(__name__)
class Submission(object):
LOGGER = logging.getLogger(__name__)
class Submission(object):
- def __init__(self, name, model):
+ def __init__(self, name, model, host):
self.name = name
self.model = model
self.submissionSet = get_submission_uri(self.name)
self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
self.name = name
self.model = model
self.submissionSet = get_submission_uri(self.name)
self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
- self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
+ self.libraryNS = RDF.NS('{0}/library/'.format(host))
submission_files = os.listdir(analysis_dir)
for filename in submission_files:
submission_files = os.listdir(analysis_dir)
for filename in submission_files:
- self.construct_file_attributes(analysis_dir, libNode, filename)
+ pathname = os.path.abspath(os.path.join(analysis_dir, filename))
+ self.construct_file_attributes(analysis_dir, libNode, pathname)
def construct_file_attributes(self, analysis_dir, libNode, pathname):
"""Looking for the best extension
def construct_file_attributes(self, analysis_dir, libNode, pathname):
"""Looking for the best extension
an_analysis))
# add file specific information
an_analysis))
# add file specific information
- fileNode = self.link_file_to_classes(filename,
+ fileNode = self.link_file_to_classes(pathname,
an_analysis,
an_analysis_uri,
analysis_dir)
an_analysis,
an_analysis_uri,
analysis_dir)
file_type))
LOGGER.debug("Done.")
file_type))
LOGGER.debug("Done.")
- def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
+ def link_file_to_classes(self, pathname, submissionNode, submission_uri, analysis_dir):
# add file specific information
# add file specific information
- fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename)))
+ path, filename = os.path.split(pathname)
+ fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
self.model.add_statement(
RDF.Statement(submissionNode,
dafTermOntology['has_file'],
self.model.add_statement(
RDF.Statement(submissionNode,
dafTermOntology['has_file'],
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
- <{{submission}}> submissionOntology:library ?library ;
+ <{{submission}}> ucscDaf:has_file ?file ;
+ submissionOntology:library ?library ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
- libraryOntology:library ?library ;
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
- ?library libraryOntology:has_lane ?lane .
- ?lane libraryOntology:flowcell ?flowcell .
+ ?source libraryOntology:flowcell ?flowcell .
+
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
select distinct ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
<{{submission}}> ucscDaf:has_file ?file ;
select distinct ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
WHERE {
<{{submission}}> ucscDaf:has_file ?file ;
+ submissionOntology:library ?library ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
a submissionOntology:submission .
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
- libraryOntology:has_lane ?lane ;
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
- OPTIONAL { ?lane libraryOntology:flowcell ?flowcell .
+ OPTIONAL { ?file dc:source ?source_file .
+ ?source_file libraryOntology:flowcell ?flowcell .
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;
?flowcell libraryOntology:flowcell_id ?flowcell_id ;
libraryOntology:read_length ?read_length ;
libraryOntology:flowcell_type ?flowcell_type ;