From c7c79846910f2f37928b7e51102d4ff6e2c72f36 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 30 Sep 2011 15:57:37 -0700 Subject: [PATCH] Configure daf variables in only one place. (daf.py) When adding things to the daf variable list, check to see if they're present before adding them (control duplicates). Incorporate a couple more terms to select (mapVariable, strain) --- extra/ucsc_encode_submission/ucsc_gather.py | 11 +++++++---- htsworkflow/submission/daf.py | 20 ++++++++++---------- htsworkflow/submission/test/test_daf.py | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/extra/ucsc_encode_submission/ucsc_gather.py b/extra/ucsc_encode_submission/ucsc_gather.py index f1e0920..1e6b6a6 100755 --- a/extra/ucsc_encode_submission/ucsc_gather.py +++ b/extra/ucsc_encode_submission/ucsc_gather.py @@ -215,7 +215,7 @@ def make_ddf(view_map, submissionNode, daf_name, make_condor=False, outdir=None) PREFIX submissionOntology: PREFIX ucscDaf: -select ?submitView ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength ?replicate +select ?submitView ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?strain ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength ?replicate ?mapAlgorithm WHERE { ?file ucscDaf:filename ?files ; ucscDaf:md5sum ?md5sum . @@ -236,7 +236,9 @@ WHERE { OPTIONAL { ?library libraryOntology:condition ?treatment } OPTIONAL { ?library ucscDaf:protocol ?protocol } OPTIONAL { ?library ucscDaf:readType ?readType } + OPTIONAL { ?library ucscDaf:strain ?strain } OPTIONAL { ?library libraryOntology:insert_size ?insertLength } + OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm } } ORDER BY ?submitView""" dag_fragments = [] @@ -251,6 +253,7 @@ ORDER BY ?submitView""" outfile = os.path.join(outdir, ddf_name) output = open(outfile,'w') else: + outfile = 'stdout:' output = sys.stdout formatted_query = query_template % {'submission': str(submissionNode.uri)} @@ -258,11 +261,9 @@ ORDER BY ?submitView""" query = RDF.SPARQLQuery(formatted_query) results = query.execute(view_map.model) - variables = ['files'] # filename goes first - variables.extend(view_map.get_daf_variables()) + variables = view_map.get_daf_variables() # 'controlId', - variables += [ 'labExpId', 'md5sum'] output.write('\t'.join(variables)) output.write(os.linesep) @@ -273,6 +274,8 @@ ORDER BY ?submitView""" current = all_views.setdefault(viewname, {}) for variable_name in variables: value = str(fromTypedNode(row[variable_name])) + if value is None or value == 'None': + logging.warn("{0}: {1} was None".format(outfile, variable_name)) if variable_name in ('files', 'md5sum'): current.setdefault(variable_name,[]).append(value) else: diff --git a/htsworkflow/submission/daf.py b/htsworkflow/submission/daf.py index 7b17f69..c59c45c 100644 --- a/htsworkflow/submission/daf.py +++ b/htsworkflow/submission/daf.py @@ -26,6 +26,9 @@ logger = logging.getLogger(__name__) DAF_VARIABLE_NAMES = ("variables", "extraVariables") VARIABLES_TERM_NAME = 'variables' +DAF_PRE_VARIABLES = ['files', 'view'] +DAF_POST_VARIABLES = [ 'labExpId', 'md5sum'] + class ModelException(RuntimeError): """Assumptions about the RDF model failed""" @@ -353,12 +356,6 @@ class DAFMapper(object): dafTermOntology['submission'], submissionNode)) - # extra information - terms = [dafTermOntology['type'], - dafTermOntology['filename_re'], - ] - terms.extend((dafTermOntology[v] for v in self.get_daf_variables())) - # add file specific information self.create_file_attributes(filename, submissionView, submission_uri, submission_dir) @@ -399,14 +396,17 @@ class DAFMapper(object): """Returns simple variables names that to include in the ddf """ variables_term = dafTermOntology[VARIABLES_TERM_NAME] - results = ['view'] - if self.need_replicate(): + results = [] + results.extend([v for v in DAF_PRE_VARIABLES if v not in results]) + results = DAF_PRE_VARIABLES[:] + if self.need_replicate() and 'replicate' not in results: results.append('replicate') for obj in self.model.get_targets(self.submissionSet, variables_term): value = str(fromTypedNode(obj)) - results.append(value) - results.append('labVersion') + if value not in results: + results.append(value) + results.extend([v for v in DAF_POST_VARIABLES if v not in results]) return results def make_submission_name(self, submission_dir): diff --git a/htsworkflow/submission/test/test_daf.py b/htsworkflow/submission/test/test_daf.py index 93c6e99..8b16312 100644 --- a/htsworkflow/submission/test/test_daf.py +++ b/htsworkflow/submission/test/test_daf.py @@ -282,7 +282,7 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ; def test_daf_with_extra(self): daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra) variables = daf_mapper.get_daf_variables() - self.assertEqual(len(variables), 9) + self.assertEqual(len(variables), 11) self.failUnless('treatment' in variables) self.failUnless('controlId' in variables) -- 2.30.2