X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=htsworkflow%2Fsubmission%2Ftest%2Ftest_condorfastq.py;h=2d232715e5289d93c17b68da92c7dfdac45f6b04;hb=d078bd2653d1d702d54a0ff12f91ef2bd85d8d0e;hp=24d6e4982018dd23ac8e8746d7ade1856ae3d4e7;hpb=20a7487b3db5104392a70ffb62ad92a00e0057c1;p=htsworkflow.git diff --git a/htsworkflow/submission/test/test_condorfastq.py b/htsworkflow/submission/test/test_condorfastq.py index 24d6e49..2d23271 100644 --- a/htsworkflow/submission/test/test_condorfastq.py +++ b/htsworkflow/submission/test/test_condorfastq.py @@ -5,11 +5,18 @@ import os from pprint import pprint import shutil import tempfile -import unittest + +from django.test import TestCase +from django.test.utils import setup_test_environment, \ + teardown_test_environment +from django.db import connection +from django.conf import settings from htsworkflow.submission.condorfastq import CondorFastqExtract from htsworkflow.submission.results import ResultMap -from htsworkflow.util.rdfhelp import load_string_into_model, dump_model +from htsworkflow.util.rdfhelp import \ + add_default_schemas, load_string_into_model, dump_model +from htsworkflow.util.rdfinfer import Infer FCDIRS = [ 'C02F9ACXX', @@ -96,8 +103,13 @@ lib_turtle = """@prefix : . @prefix seqns: . @prefix invns: . + a libns:Library . + a libns:Library . + a libns:Library . + a libns:Library . + - a libns:illumina_flowcell ; + a libns:IlluminaFlowcell ; libns:read_length 33 ; libns:flowcell_type "Single"@en ; libns:date "2012-01-19T20:23:26"^^xsd:dateTime; @@ -112,43 +124,51 @@ lib_turtle = """@prefix : . libns:flowcell_id "30221AAXX"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . # paired_end 1; # read_length 33; # status "Unknown"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . - a libns:illumina_flowcell ; + a libns:IlluminaFlowcell ; libns:read_length 76 ; libns:flowcell_type "Paired"@en ; libns:date "2012-01-19T20:23:26"^^xsd:dateTime; @@ -163,43 +183,51 @@ lib_turtle = """@prefix : . libns:flowcell_id "42JUYAAXX"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . # paired_end 1; # read_length 76; # status "Unknown"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . - a libns:illumina_flowcell ; + a libns:IlluminaFlowcell ; libns:read_length 76 ; libns:flowcell_type "Single"@en ; libns:date "2012-01-19T20:23:26"^^xsd:dateTime; @@ -214,43 +242,51 @@ lib_turtle = """@prefix : . libns:flowcell_id "61MJTAAXX"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . # paired_end 1; # read_length 76; # status "Unknown"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . - a libns:illumina_flowcell ; + a libns:IlluminaFlowcell ; libns:read_length 76 ; libns:flowcell_type "Paired"@en ; libns:date "2012-01-19T20:23:26"^^xsd:dateTime; @@ -265,43 +301,51 @@ lib_turtle = """@prefix : . libns:flowcell_id "30DY0AAXX"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . # paired_end 1; # read_length 76; # status "Unknown"@en . - a libns:illumina_flowcell ; + a libns:IlluminaFlowcell ; libns:read_length 101 ; libns:flowcell_type "Paired"@en ; libns:date "2012-01-19T20:23:26"^^xsd:dateTime; @@ -310,23 +354,25 @@ lib_turtle = """@prefix : . libns:flowcell_id "C02F9ACXX"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . # paired_end 1; # read_length 101; # status "Unknown"@en . + a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . # paired_end 1; # read_length 101; # status "Unknown"@en . - a libns:library ; + a libns:Library ; libns:affiliation "TSR"@en; libns:concentration "29.7"; libns:date "2012-12-28T00:00:00"^^xsd:dateTime ; @@ -343,14 +389,13 @@ lib_turtle = """@prefix : . libns:made_by "Gary Gygax"@en ; libns:name "Paired Ends ASDF"@en ; libns:replicate "1"@en; - libns:species "Mus musculus"@en ; + libns:species_name "Mus musculus"@en ; libns:stopping_point "Completed"@en ; libns:total_unique_locations 8841201 . # cell_line - - a libns:library ; + a libns:Library ; libns:affiliation "TSR"@en; libns:concentration "12.345"; libns:cell_line "Unknown"@en ; @@ -364,14 +409,14 @@ lib_turtle = """@prefix : . libns:made_by "Gary Gygax"@en ; libns:name "Paired Ends THING"@en ; libns:replicate "1"@en; - libns:species "Mus musculus"@en ; + libns:species_name "Mus musculus"@en ; libns:stopping_point "Completed"@en ; libns:total_unique_locations 8841201 . # cell_line """ HOST = "http://localhost" -class TestCondorFastq(unittest.TestCase): +class TestCondorFastq(TestCase): def setUp(self): self.cwd = os.getcwd() @@ -401,39 +446,44 @@ class TestCondorFastq(unittest.TestCase): self.flowcelldir, self.logdir) load_string_into_model(self.extract.model, 'turtle', lib_turtle) + add_default_schemas(self.extract.model) + inference = Infer(self.extract.model) + errmsgs = list(inference.run_validation()) + self.assertEqual(len(errmsgs), 0) + os.chdir(self.tempdir) def tearDown(self): shutil.rmtree(self.tempdir) os.chdir(self.cwd) - def test_find_relavant_flowcell_ids(self): + def test_find_relevant_flowcell_ids(self): expected = set(('30221AAXX', '42JUYAAXX', '61MJTAAXX', '30DY0AAXX', 'C02F9ACXX')) - flowcell_ids = self.extract.find_relavant_flowcell_ids() + flowcell_ids = self.extract.find_relevant_flowcell_ids() self.assertEqual(flowcell_ids, expected) def test_find_archive_sequence(self): seqs = self.extract.find_archive_sequence_files(self.result_map) expected = set([ - (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'), - (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'), - (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'), - (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'), - (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf') + (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'), + (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'), + (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'), + (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf') ]) found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs]) self.assertEqual(expected, found) @@ -441,7 +491,7 @@ class TestCondorFastq(unittest.TestCase): def test_find_needed_targets(self): lib_db = self.extract.find_archive_sequence_files(self.result_map) - needed_targets = self.extract.find_missing_targets(self.result_map, + needed_targets = self.extract.update_fastq_targets(self.result_map, lib_db) self.assertEqual(len(needed_targets), 9) srf_30221 = needed_targets[ @@ -631,9 +681,11 @@ class TestCondorFastq(unittest.TestCase): def suite(): - suite = unittest.makeSuite(TestCondorFastq, 'test') + from unittest import TestSuite, defaultTestLoader + suite = TestSuite() + suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq)) return suite if __name__ == "__main__": - unittest.main(defaultTest='suite') - + from unittest import main + main(defaultTest='suite')