Test changes to submission code.
authorDiane Trout <diane@ghic.org>
Mon, 22 Jul 2013 19:53:39 +0000 (12:53 -0700)
committerDiane Trout <diane@ghic.org>
Mon, 22 Jul 2013 19:53:39 +0000 (12:53 -0700)
Also there's some commonality in simulating a submission directory
so refactor that code out to a common module.

htsworkflow/submission/test/submission_test_common.py [new file with mode: 0644]
htsworkflow/submission/test/test_results.py
htsworkflow/submission/test/test_submission.py

diff --git a/htsworkflow/submission/test/submission_test_common.py b/htsworkflow/submission/test/submission_test_common.py
new file mode 100644 (file)
index 0000000..1770b87
--- /dev/null
@@ -0,0 +1,70 @@
+"""Code shared between test cases.
+"""
+import RDF
+import os
+import tempfile
+import htsworkflow.util.rdfhelp
+
+S1_NAME = '1000-sample'
+S2_NAME = '2000-sample'
+
+S1_FILES = [
+    os.path.join(S1_NAME, 'file1_l8_r1.fastq'),
+    os.path.join(S1_NAME, 'file1_l8_r2.fastq'),
+]
+
+S2_FILES = [
+    os.path.join(S2_NAME, 'file1.bam'),
+    os.path.join(S2_NAME, 'file1_l5.fastq'),
+]
+
+TURTLE_PREFIX = htsworkflow.util.rdfhelp.get_turtle_header()
+
+S1_TURTLE = TURTLE_PREFIX + """
+<http://localhost/library/1000/>
+  htswlib:cell_line "Cell1000" ;
+  htswlib:library_id "1000" ;
+  htswlib:library_type "Single" ;
+  htswlib:replicate "1" ;
+  htswlib:has_lane <http://localhost/lane/1> ;
+  a htswlib:IlluminaLibrary .
+
+<http://localhost/lane/1>
+  htswlib:flowcell <http://localhost/flowcel/1234ABXXX> ;
+  htswlib:lane_number "1"@en;
+  a htswlib:IlluminaLane .
+"""
+
+S2_TURTLE = TURTLE_PREFIX + """
+<http://localhost/library/2000/>
+  htswlib:cell_line "Cell2000" ;
+  htswlib:library_id "2000" ;
+  htswlib:library_type "Paired" ;
+  htswlib:replicate "2" ;
+  htswlib:has_lane <http://localhost/lane/2> ;
+  a htswlib:Library .
+
+<http://localhost/lane/2>
+  htswlib:flowcell <http://localhost/flowcel/1234ABXXX> ;
+  htswlib:lane_number "2"@en ;
+  a htswlib:IlluminaLane .
+"""
+
+class MockAddDetails(object):
+    def __init__(self, model, turtle=None):
+        self.model = model
+        if turtle:
+            self.add_turtle(turtle)
+
+    def add_turtle(self, turtle):
+        parser = RDF.Parser('turtle')
+        parser.parse_string_into_model(self.model, turtle, "http://localhost")
+
+    def __call__(self, libNode):
+        q = RDF.Statement(libNode, None, None)
+        found = False
+        for s in self.model.find_statements(q):
+            found = True
+            break
+        assert found
+
index b7d6accd73af229fe12ac43dd3db07785a5c8740..e4d28cfd20c1da4f5b5a642f4b02511c808a08b0 100644 (file)
@@ -1,27 +1,12 @@
 #!/usr/bin/env python
 
-import copy
-import os
 from pprint import pprint
 import shutil
-import tempfile
 
 from unittest2 import TestCase, defaultTestLoader
 
 from htsworkflow.submission.results import ResultMap
-
-S1_NAME = '1000-sample'
-S2_NAME = '2000-sample'
-
-S1_FILES = [
-    os.path.join(S1_NAME, 'file1_l8_r1.fastq'),
-    os.path.join(S1_NAME, 'file1_l8_r2.fastq'),
-]
-
-S2_FILES = [
-    os.path.join(S2_NAME, 'file1.bam'),
-    os.path.join(S2_NAME, 'file1_l5.fastq'),
-]
+from submission_test_common import *
 
 def generate_sample_results_tree(obj):
     obj.tempdir = tempfile.mkdtemp(prefix="results_test")
@@ -42,6 +27,7 @@ def generate_sample_results_tree(obj):
         stream.write(f)
         stream.close()
 
+
 class TestResultMap(TestCase):
     def setUp(self):
         generate_sample_results_tree(self)
index f362cea0f2e2a7235024e05296aed127fbf9f1ea..90852ceb766f56a871fc199d675982fc095b98c2 100644 (file)
@@ -7,15 +7,40 @@ from unittest2 import TestCase, TestSuite, defaultTestLoader
 from htsworkflow.submission import daf, results
 from htsworkflow.util.rdfhelp import \
      dafTermOntology, \
+     dump_model, \
      fromTypedNode, \
+     get_turtle_header, \
      load_string_into_model, \
      rdfNS, \
      submissionLog, \
      submissionOntology, \
      get_model, \
      get_serializer
-from htsworkflow.submission.submission import list_submissions
+from htsworkflow.submission.submission import list_submissions, Submission
+from htsworkflow.submission.results import ResultMap
+from submission_test_common import *
+
 import RDF
+#import logging
+#logging.basicConfig(level=logging.DEBUG)
+
+def generate_sample_results_tree(obj):
+    obj.tempdir = tempfile.mkdtemp(prefix="submission_test")
+    obj.sourcedir = os.path.join(obj.tempdir, 'source')
+    obj.resultdir = os.path.join(obj.tempdir, 'results')
+
+    for d in [os.path.join(obj.tempdir, S1_NAME),
+              os.path.join(obj.tempdir, S2_NAME),
+              ]:
+        os.mkdir(os.path.join(obj.tempdir, d))
+
+    tomake = []
+    tomake.extend(S1_FILES)
+    tomake.extend(S2_FILES)
+    for f in tomake:
+        stream = open(os.path.join(obj.tempdir, f), 'w')
+        stream.write(f)
+        stream.close()
 
 class TestSubmissionModule(TestCase):
     def test_empty_list_submission(self):
@@ -57,10 +82,110 @@ class TestSubmissionModule(TestCase):
             testset.add(name)
         self.assertEqual(testset, truth)
 
+class TestSubmission(TestCase):
+    def setUp(self):
+        generate_sample_results_tree(self)
+        self.model = get_model()
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+    def test_create_submission(self):
+        model = get_model()
+        s = Submission('foo', self.model, 'http://localhost')
+        self.assertEqual(str(s.submissionSet),
+                         "http://jumpgate.caltech.edu/wiki/SubmissionsLog/foo")
+        self.assertEqual(str(s.submissionSetNS['']),
+                         str(RDF.NS(str(s.submissionSet) + '#')['']))
+        self.assertEqual(str(s.libraryNS['']),
+                         str(RDF.NS('http://localhost/library/')['']))
+
+    def test_scan_submission_dirs(self):
+        turtle = get_turtle_header() + """
+@prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test/view/> .
+thisView:Fastq ucscDaf:filename_re ".*[^12]\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read" .
+thisView:FastqRead1 ucscDaf:filename_re ".*r1\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read1" .
+thisView:FastqRead2 ucscDaf:filename_re ".*r2\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read2" .
+thisView:alignments ucscDaf:filename_re ".*\\.bam$" ;
+               a geoSoft:supplemental ;
+               geoSoft:fileTypeLabel "bam" ;
+               ucscDaf:output_type "alignments" .
+
+        """
+        map = ResultMap()
+        print self.tempdir
+        print os.listdir(self.tempdir)
+        map['1000'] = os.path.join(self.tempdir, S1_NAME)
+        map['2000'] = os.path.join(self.tempdir, S2_NAME)
+
+        s = Submission('foo', self.model, 'http://localhost')
+        mock = MockAddDetails(self.model, turtle)
+        mock.add_turtle(S1_TURTLE)
+        mock.add_turtle(S2_TURTLE)
+        s._add_library_details_to_model =  mock
+        s.scan_submission_dirs(map)
+
+        nodes = list(s.analysis_nodes(map))
+        self.assertEqual(len(nodes), 2)
+        expected = set((
+            'http://jumpgate.caltech.edu/wiki/SubmissionsLog/foo#1000-sample',
+            'http://jumpgate.caltech.edu/wiki/SubmissionsLog/foo#2000-sample',
+        ))
+        got = set((str(nodes[0]), str(nodes[1])))
+        self.assertEqual(expected, got)
+
+    def test_find_best_match(self):
+        turtle = get_turtle_header() + """
+@prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/test/view/> .
+thisView:Fastq ucscDaf:filename_re ".*[^12]\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read" .
+thisView:FastqRead1 ucscDaf:filename_re ".*r1\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read1" .
+thisView:FastqRead2 ucscDaf:filename_re ".*r2\\.fastq\\.bz2$" ;
+               a geoSoft:raw ;
+               geoSoft:fileTypeLabel "fastq" ;
+               ucscDaf:output_type "read2" .
+thisView:alignments ucscDaf:filename_re ".*\\.bam$" ;
+               a geoSoft:supplemental ;
+               geoSoft:fileTypeLabel "bam" ;
+               ucscDaf:output_type "alignments" .
+
+        """
+        load_string_into_model(self.model, 'turtle', turtle)
+        s = Submission('foo', self.model, 'http://localhost')
+        q = RDF.Statement(None, dafTermOntology['filename_re'], None)
+        view_map = s._get_filename_view_map()
+        self.assertEqual(len(view_map), 4)
+
+        fastq = s.find_best_match("asdf.fastq.bz2")
+        self.assertEqual(
+            str(fastq),
+            "http://jumpgate.caltech.edu/wiki/SubmissionsLog/test/view/Fastq")
+
+        fastq = s.find_best_match("asdf.r2.fastq.bz2")
+        self.assertEqual(
+            str(fastq),
+            "http://jumpgate.caltech.edu/wiki/SubmissionsLog/test/view/FastqRead2")
+
 def suite():
     suite = TestSuite()
     suite.addTests(
         defaultTestLoader.loadTestsFromTestCase(TestSubmissionModule))
+    suite.addTests(
+        defaultTestLoader.loadTestsFromTestCase(TestSubmission))
     return suite
 
 if __name__ == "__main__":