From 6841c02ae11b5396a88210b7f5a93d3039c08ab8 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 30 Sep 2011 14:22:06 -0700 Subject: [PATCH] Add support for new "extraVariables" term in DAF. It's purpose is to list variables that should be included in the DAF for a particular experiment but aren't experimental control variables. --- htsworkflow/submission/daf.py | 16 ++++++----- htsworkflow/submission/test/test_daf.py | 35 ++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/htsworkflow/submission/daf.py b/htsworkflow/submission/daf.py index 1be882b..7b17f69 100644 --- a/htsworkflow/submission/daf.py +++ b/htsworkflow/submission/daf.py @@ -24,6 +24,8 @@ from htsworkflow.util.hashfile import make_md5sum logger = logging.getLogger(__name__) +DAF_VARIABLE_NAMES = ("variables", "extraVariables") +VARIABLES_TERM_NAME = 'variables' class ModelException(RuntimeError): """Assumptions about the RDF model failed""" @@ -108,7 +110,7 @@ def parse_stream(stream): view_name = None view_attributes = {} state = DAF_HEADER - elif state == DAF_HEADER and name == 'variables': + elif state == DAF_HEADER and name in DAF_VARIABLE_NAMES: attributes[name] = [x.strip() for x in value.split(',')] elif state == DAF_HEADER and name == 'view': view_name = value @@ -162,6 +164,7 @@ def convert_to_rdf_statements(attributes, subject): The statements are attached to the provided subject node """ + variables_term = dafTermOntology[VARIABLES_TERM_NAME] statements = [] for daf_key in attributes: predicate = dafTermOntology[daf_key] @@ -169,11 +172,10 @@ def convert_to_rdf_statements(attributes, subject): statements.extend(_views_to_statements(subject, dafTermOntology, attributes[daf_key])) - elif daf_key == 'variables': - #predicate = ddfNS['variables'] - for var in attributes.get('variables', []): + elif daf_key in DAF_VARIABLE_NAMES: + for var in attributes.get(daf_key, []): obj = toTypedNode(var) - statements.append(RDF.Statement(subject, predicate, obj)) + statements.append(RDF.Statement(subject, variables_term, obj)) else: value = attributes[daf_key] obj = toTypedNode(value) @@ -396,12 +398,12 @@ class DAFMapper(object): def get_daf_variables(self): """Returns simple variables names that to include in the ddf """ - variableTerm = dafTermOntology['variables'] + variables_term = dafTermOntology[VARIABLES_TERM_NAME] results = ['view'] if self.need_replicate(): results.append('replicate') - for obj in self.model.get_targets(self.submissionSet, variableTerm): + for obj in self.model.get_targets(self.submissionSet, variables_term): value = str(fromTypedNode(obj)) results.append(value) results.append('labVersion') diff --git a/htsworkflow/submission/test/test_daf.py b/htsworkflow/submission/test/test_daf.py index 3d31c95..93c6e99 100644 --- a/htsworkflow/submission/test/test_daf.py +++ b/htsworkflow/submission/test/test_daf.py @@ -59,6 +59,26 @@ hasReplicates no required no """ +test_daf_extra = """# Lab and general info +grant Hardison +lab Caltech-m +dataType ChipSeq +variables cell,antibody,sex,age,strain +extraVariables controlId,treatment +compositeSuffix CaltechHistone +assembly mm9 +dafVersion 2.0 +validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000 + +# Track/view definition +view FastqRd1 +longLabelPrefix Caltech Fastq Read 1 +type fastq +hasReplicates no +required no +""" + + class TestDAF(unittest.TestCase): def test_parse(self): @@ -75,6 +95,7 @@ class TestDAF(unittest.TestCase): self.failUnlessEqual(signal['longLabelPrefix'], 'Caltech Histone Signal') + def test_rdf(self): parsed = daf.fromstring(test_daf) @@ -142,6 +163,7 @@ def dump_model(model): turtle = writer.serialize_model_to_string(model) print turtle + class TestDAFMapper(unittest.TestCase): def test_create_mapper_add_pattern(self): name = 'testsub' @@ -257,22 +279,27 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ; self.failUnlessEqual(daf_mapper.need_replicate(), False) self.failUnless('replicate' not in daf_mapper.get_daf_variables()) + def test_daf_with_extra(self): + daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra) + variables = daf_mapper.get_daf_variables() + self.assertEqual(len(variables), 9) + self.failUnless('treatment' in variables) + self.failUnless('controlId' in variables) + + @contextmanager def mktempdir(prefix='tmp'): d = tempfile.mkdtemp(prefix=prefix) - print "made", d yield d shutil.rmtree(d) - print "unmade", d + @contextmanager def mktempfile(suffix='', prefix='tmp', dir=None): fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir) yield pathname - print "made", pathname os.close(fd) os.unlink(pathname) - print "unmade", pathname def suite(): -- 2.30.2