Examine the DAF to determine if the DDF needs to include replicate information
authorDiane Trout <diane@caltech.edu>
Thu, 28 Jul 2011 18:40:24 +0000 (11:40 -0700)
committerDiane Trout <diane@caltech.edu>
Thu, 28 Jul 2011 18:40:24 +0000 (11:40 -0700)
extra/ucsc_encode_submission/ucsc_gather.py
htsworkflow/submission/daf.py
htsworkflow/submission/test/test_daf.py

index f7cdcf71b938d06306264ec296fa8ff4bf472ffe..bd5ad71180059b0b457a11c5df71646e4f9c9c56 100755 (executable)
@@ -213,7 +213,7 @@ def make_ddf(view_map, submissionNode, daf_name, make_condor=False, outdir=None)
 PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
 PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
 
-select ?submitView  ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength
+select ?submitView  ?files ?md5sum ?view ?cell ?antibody ?sex ?control ?controlId ?labExpId ?labVersion ?treatment ?protocol ?readType ?insertLength ?replicate
 WHERE {
   ?file ucscDaf:filename ?files ;
         ucscDaf:md5sum ?md5sum .
@@ -230,6 +230,7 @@ WHERE {
   OPTIONAL { ?library ucscDaf:sex ?sex }
   OPTIONAL { ?library libraryOntology:library_id ?labExpId }
   OPTIONAL { ?library libraryOntology:library_id ?labVersion }
+  OPTIONAL { ?library libraryOntology:replicate ?replicate }
   OPTIONAL { ?library libraryOntology:condition ?treatment }
   OPTIONAL { ?library ucscDaf:protocol ?protocol }
   OPTIONAL { ?library ucscDaf:readType ?readType }
index 1c19339243290f77af81c9a7006d011894d2ce84..b8a4177fd46fd164ea9f1940c1ecc99ce8c92eab 100644 (file)
@@ -326,6 +326,9 @@ class DAFMapper(object):
         """
         variableTerm = dafTermOntology['variables']
         results = ['view']
+        if self.need_replicate():
+            results.append('replicate')
+            
         for obj in self.model.get_targets(self.submissionSet, variableTerm):
             value = str(fromTypedNode(obj))
             results.append(value)
@@ -438,6 +441,12 @@ class DAFMapper(object):
             patterns[literal_re] = view_name
         return patterns
 
+    def _get_library_url(self):
+        return str(self.libraryNS[''].uri)
+    def _set_library_url(self, value):
+        self.libraryNS = RDF.NS(str(value))
+    library_url = property(_get_library_url, _set_library_url)
+
     def _is_paired(self, libNode):
         """Determine if a library is paired end"""
         library_type = self._get_library_attribute(libNode, 'library_type')
@@ -456,8 +465,15 @@ class DAFMapper(object):
                 "Unrecognized library type %s for %s" % \
                 (library_type, str(libNode)))
 
-    def _get_library_url(self):
-        return str(self.libraryNS[''].uri)
-    def _set_library_url(self, value):
-        self.libraryNS = RDF.NS(str(value))
-    library_url = property(_get_library_url, _set_library_url)
+    def need_replicate(self):
+        viewTerm = dafTermOntology['views']
+        replicateTerm = dafTermOntology['hasReplicates']
+
+        views = self.model.get_targets(self.submissionSet, viewTerm)
+
+        for view in views:
+            replicate = self.model.get_target(view, replicateTerm)
+            if fromTypedNode(replicate):
+                return True
+            
+        return False
index 913b0813c8bffcf1c37aa33b082ab8f1c69902b2..a5c3719b0331fc521a0b858f8168bac24e881f0e 100644 (file)
@@ -41,6 +41,24 @@ hasReplicates    yes
 required         no
 """
 
+test_daf_no_rep = """# Lab and general info
+grant             Hardison
+lab               Caltech-m
+dataType          ChipSeq 
+variables         cell, antibody,sex,age,strain,control
+compositeSuffix   CaltechHistone
+assembly          mm9
+dafVersion        2.0
+validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
+
+# Track/view definition
+view             FastqRd1
+longLabelPrefix  Caltech Fastq Read 1
+type             fastq
+hasReplicates    no
+required         no
+"""
+
 class TestDAF(unittest.TestCase):
     def test_parse(self):
 
@@ -83,7 +101,7 @@ class TestDAF(unittest.TestCase):
         name = model.get_target(signal_view_node, dafTermOntology['name'])
         self.failUnlessEqual(fromTypedNode(name), u'Signal')
 
-def load_daf_mapper(name, extra_statements=None, ns=None):
+def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
     """Load test model in
     """
     model = get_model()
@@ -122,6 +140,7 @@ class TestDAFMapper(unittest.TestCase):
                              str(dafTermOntology['filename_re']))
         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
 
+        
     def test_find_one_view(self):
         name='testfind'
         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
@@ -208,6 +227,16 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
         daf_mapper.library_url = 'http://google.com'
         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
 
+    def test_daf_with_replicate(self):
+        daf_mapper = load_daf_mapper('test_rep')
+        self.failUnlessEqual(daf_mapper.need_replicate(), True)
+        self.failUnless('replicate' in daf_mapper.get_daf_variables())
+                        
+    def test_daf_without_replicate(self):
+        daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
+        self.failUnlessEqual(daf_mapper.need_replicate(), False)
+        self.failUnless('replicate' not in daf_mapper.get_daf_variables())
+        
 @contextmanager
 def mktempdir(prefix='tmp'):
     d = tempfile.mkdtemp(prefix=prefix)