Add support for new "extraVariables" term in DAF.
authorDiane Trout <diane@caltech.edu>
Fri, 30 Sep 2011 21:22:06 +0000 (14:22 -0700)
committerDiane Trout <diane@caltech.edu>
Fri, 30 Sep 2011 21:22:06 +0000 (14:22 -0700)
It's purpose is to list variables that should be included in the DAF
for a particular experiment but aren't experimental control variables.

htsworkflow/submission/daf.py
htsworkflow/submission/test/test_daf.py

index 1be882b051eba71ffe274c8b2b905500c70e67ca..7b17f6931a6a291b15173aa732806f9561ccfdfd 100644 (file)
@@ -24,6 +24,8 @@ from htsworkflow.util.hashfile import make_md5sum
 
 logger = logging.getLogger(__name__)
 
+DAF_VARIABLE_NAMES = ("variables", "extraVariables")
+VARIABLES_TERM_NAME = 'variables'
 
 class ModelException(RuntimeError):
     """Assumptions about the RDF model failed"""
@@ -108,7 +110,7 @@ def parse_stream(stream):
                 view_name = None
                 view_attributes = {}
             state = DAF_HEADER
-        elif state == DAF_HEADER and name == 'variables':
+        elif state == DAF_HEADER and name in DAF_VARIABLE_NAMES:
             attributes[name] = [x.strip() for x in value.split(',')]
         elif state == DAF_HEADER and name == 'view':
             view_name = value
@@ -162,6 +164,7 @@ def convert_to_rdf_statements(attributes, subject):
 
     The statements are attached to the provided subject node
     """
+    variables_term = dafTermOntology[VARIABLES_TERM_NAME]
     statements = []
     for daf_key in attributes:
         predicate = dafTermOntology[daf_key]
@@ -169,11 +172,10 @@ def convert_to_rdf_statements(attributes, subject):
             statements.extend(_views_to_statements(subject,
                                                    dafTermOntology,
                                                    attributes[daf_key]))
-        elif daf_key == 'variables':
-            #predicate = ddfNS['variables']
-            for var in attributes.get('variables', []):
+        elif daf_key in DAF_VARIABLE_NAMES:
+            for var in attributes.get(daf_key, []):
                 obj = toTypedNode(var)
-                statements.append(RDF.Statement(subject, predicate, obj))
+                statements.append(RDF.Statement(subject, variables_term, obj))
         else:
             value = attributes[daf_key]
             obj = toTypedNode(value)
@@ -396,12 +398,12 @@ class DAFMapper(object):
     def get_daf_variables(self):
         """Returns simple variables names that to include in the ddf
         """
-        variableTerm = dafTermOntology['variables']
+        variables_term = dafTermOntology[VARIABLES_TERM_NAME]
         results = ['view']
         if self.need_replicate():
             results.append('replicate')
 
-        for obj in self.model.get_targets(self.submissionSet, variableTerm):
+        for obj in self.model.get_targets(self.submissionSet, variables_term):
             value = str(fromTypedNode(obj))
             results.append(value)
         results.append('labVersion')
index 3d31c95807ad38753c4c49735933916c924208e8..93c6e9932b23a03e2d0b5cc86bfe17a9775fcfda 100644 (file)
@@ -59,6 +59,26 @@ hasReplicates    no
 required         no
 """
 
+test_daf_extra = """# Lab and general info
+grant             Hardison
+lab               Caltech-m
+dataType          ChipSeq
+variables         cell,antibody,sex,age,strain
+extraVariables    controlId,treatment
+compositeSuffix   CaltechHistone
+assembly          mm9
+dafVersion        2.0
+validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
+
+# Track/view definition
+view             FastqRd1
+longLabelPrefix  Caltech Fastq Read 1
+type             fastq
+hasReplicates    no
+required         no
+"""
+
+
 class TestDAF(unittest.TestCase):
     def test_parse(self):
 
@@ -75,6 +95,7 @@ class TestDAF(unittest.TestCase):
         self.failUnlessEqual(signal['longLabelPrefix'],
                              'Caltech Histone Signal')
 
+
     def test_rdf(self):
 
         parsed = daf.fromstring(test_daf)
@@ -142,6 +163,7 @@ def dump_model(model):
     turtle =  writer.serialize_model_to_string(model)
     print turtle
 
+
 class TestDAFMapper(unittest.TestCase):
     def test_create_mapper_add_pattern(self):
         name = 'testsub'
@@ -257,22 +279,27 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
         self.failUnlessEqual(daf_mapper.need_replicate(), False)
         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
 
+    def test_daf_with_extra(self):
+        daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
+        variables = daf_mapper.get_daf_variables()
+        self.assertEqual(len(variables), 9)
+        self.failUnless('treatment' in variables)
+        self.failUnless('controlId' in variables)
+
+
 @contextmanager
 def mktempdir(prefix='tmp'):
     d = tempfile.mkdtemp(prefix=prefix)
-    print "made", d
     yield d
     shutil.rmtree(d)
-    print "unmade", d
+
 
 @contextmanager
 def mktempfile(suffix='', prefix='tmp', dir=None):
     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
     yield pathname
-    print "made", pathname
     os.close(fd)
     os.unlink(pathname)
-    print "unmade", pathname
 
 
 def suite():