Add function to list the names for submissions from the RDF model.
authorDiane Trout <diane@ghic.org>
Wed, 17 Jul 2013 22:28:16 +0000 (15:28 -0700)
committerDiane Trout <diane@ghic.org>
Thu, 18 Jul 2013 00:09:16 +0000 (17:09 -0700)
Currently the model is ill-specified and the name entries
are just pointing at the list of per-library directory names.

Make sure the end of the submission name doesn't have URL seperator
characters.

htsworkflow/submission/submission.py
htsworkflow/submission/test/test_submission.py [new file with mode: 0644]

index 3320f1caf3174e6d5636c4dd20841188771050d9..640443557f8d50a574816ce33ba417de35054da5 100644 (file)
@@ -8,16 +8,12 @@ import RDF
 
 from htsworkflow.util.rdfhelp import \
      blankOrUri, \
-     dafTermOntology, \
      dump_model, \
+     fromTypedNode, \
      get_model, \
-     libraryOntology, \
-     owlNS, \
-     rdfNS, \
-     submissionLog, \
-     submissionOntology, \
-     toTypedNode, \
-     fromTypedNode
+     stripNamespace, \
+     toTypedNode
+from htsworkflow.util.rdfns import *
 from htsworkflow.util.hashfile import make_md5sum
 from htsworkflow.submission.fastqname import FastqName
 from htsworkflow.submission.daf import \
@@ -350,3 +346,21 @@ class Submission(object):
                 d[key] = fromTypedNode(value)
             results.append(d)
         return results
+
+
+def list_submissions(model):
+    """Return generator of submissions in this model.
+    """
+    query_body = """
+      PREFIX subns: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
+
+      select distinct ?submission
+      where { ?submission subns:has_submission ?library_dir }
+    """
+    query = RDF.SPARQLQuery(query_body)
+    rdfstream = query.execute(model)
+    for row in rdfstream:
+        s = stripNamespace(submissionLog, row['submission'])
+        if s[-1] in ['#', '/', '?']:
+            s = s[:-1]
+        yield s
diff --git a/htsworkflow/submission/test/test_submission.py b/htsworkflow/submission/test/test_submission.py
new file mode 100644 (file)
index 0000000..f362cea
--- /dev/null
@@ -0,0 +1,68 @@
+import os
+from StringIO import StringIO
+import shutil
+import tempfile
+from unittest2 import TestCase, TestSuite, defaultTestLoader
+
+from htsworkflow.submission import daf, results
+from htsworkflow.util.rdfhelp import \
+     dafTermOntology, \
+     fromTypedNode, \
+     load_string_into_model, \
+     rdfNS, \
+     submissionLog, \
+     submissionOntology, \
+     get_model, \
+     get_serializer
+from htsworkflow.submission.submission import list_submissions
+import RDF
+
+class TestSubmissionModule(TestCase):
+    def test_empty_list_submission(self):
+        model = get_model()
+        self.assertEqual(len(list(list_submissions(model))), 0)
+
+    def test_one_submission(self):
+        model = get_model()
+        load_string_into_model(model, "turtle",
+            """
+            @prefix subns: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
+            @prefix test: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test#> .
+
+            <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test#>
+               subns:has_submission test:lib1 ;
+               subns:has_submission test:lib2.
+            """)
+        submissions = list(list_submissions(model))
+        self.assertEqual(len(submissions), 1)
+        self.assertEqual(submissions[0], "test")
+
+    def test_two_submission(self):
+        model = get_model()
+        load_string_into_model(model, "turtle",
+            """
+            @prefix subns: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
+            @prefix test: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test#> .
+
+            <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test1#>
+               subns:has_submission test:lib1 .
+            <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test2#>
+               subns:has_submission test:lib2 .
+            """)
+        submissions = list(list_submissions(model))
+        self.assertEqual(len(submissions), 2)
+        truth = set(["test1", "test2"])
+        testset = set()
+        for name in submissions:
+            testset.add(name)
+        self.assertEqual(testset, truth)
+
+def suite():
+    suite = TestSuite()
+    suite.addTests(
+        defaultTestLoader.loadTestsFromTestCase(TestSubmissionModule))
+    return suite
+
+if __name__ == "__main__":
+    from unittest2 import main
+    main(defaultTest='suite')