From 28242245c13a034387a0e0a000c45e814ee12945 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Mon, 24 Sep 2012 16:43:33 -0700 Subject: [PATCH] Merge changing lane_number to string and sequence finding code changes I started using actual file paths instead synthetic submission paths for naming where my sequence files are. This one still one generate geo submissions correctly as I'm pretty sure not all of the queries have been updated yet. --- htsworkflow/pipelines/sequences.py | 38 +++---- htsworkflow/pipelines/test/test_sequences.py | 34 +++--- htsworkflow/submission/condorfastq.py | 26 +++-- htsworkflow/submission/geo.py | 4 +- htsworkflow/submission/submission.py | 2 +- .../submission/test/test_condorfastq.py | 103 +++++++++--------- 6 files changed, 106 insertions(+), 101 deletions(-) diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py index 479ce3d..acd1005 100644 --- a/htsworkflow/pipelines/sequences.py +++ b/htsworkflow/pipelines/sequences.py @@ -164,7 +164,7 @@ class SequenceFile(object): # a bit unreliable... assumes filesystem is encoded in utf-8 path = os.path.abspath(self.path.encode('utf-8')) fileNode = RDF.Node(RDF.Uri('file://' + path)) - add(model, fileNode, rdfNS['type'], libNS['illumina_result']) + add(model, fileNode, rdfNS['type'], libNS['IlluminaResult']) add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell) add_lit(model, fileNode, libNS['lane_number'], self.lane) if self.read is not None: @@ -217,7 +217,7 @@ class SequenceFile(object): seq_id = RDF.Node(RDF.Uri(seq_id)) result_statement = RDF.Statement(seq_id, rdfNS['type'], - libNS['illumina_result']) + libNS['IlluminaResult']) if not model.contains_statement(result_statement): raise KeyError(u"%s not found" % (unicode(seq_id),)) @@ -275,7 +275,7 @@ def parse_srf(path, filename): basename, ext = os.path.splitext(filename) records = basename.split('_') flowcell = records[4] - lane = int(records[5][0]) + lane = records[5][0] fullpath = os.path.join(path, filename) if flowcell_dir != flowcell: @@ -290,7 +290,7 @@ def parse_qseq(path, filename): records = basename.split('_') fullpath = os.path.join(path, filename) flowcell = records[4] - lane = int(records[5][1]) + lane = records[5][1] read = int(records[6][1]) if flowcell_dir != flowcell: @@ -309,7 +309,7 @@ def parse_fastq(path, filename): if project is not None: # demultiplexed sample! flowcell = flowcell_dir - lane = int(records[2][-1]) + lane = records[2][-1] read = int(records[3][-1]) pf = True # as I understand it hiseq runs toss the ones that fail filter index = records[1] @@ -318,7 +318,7 @@ def parse_fastq(path, filename): sequence_type = 'split_fastq' else: flowcell = records[4] - lane = int(records[5][1]) + lane = records[5][1] read = int(records[6][1]) pf = parse_fastq_pf_flag(records) index = None @@ -362,7 +362,7 @@ def parse_eland(path, filename, eland_match=None): fullpath = os.path.join(path, filename) flowcell, start, stop, project = get_flowcell_cycle(path) if eland_match.group('lane'): - lane = int(eland_match.group('lane')) + lane = eland_match.group('lane') else: lane = None if eland_match.group('read'): @@ -415,15 +415,15 @@ def update_model_sequence_library(model, base_url): """Find sequence objects and add library information if its missing """ file_body = """ - prefix libNS: + prefix libns: select ?filenode ?flowcell_id ?lane_id ?library_id ?flowcell ?library where { - ?filenode a libNS:illumina_result ; - libNS:flowcell_id ?flowcell_id ; - libNS:lane_number ?lane_id . - OPTIONAL { ?filenode libNS:flowcell ?flowcell . } - OPTIONAL { ?filenode libNS:library ?library .} - OPTIONAL { ?filenode libNS:library_id ?library_id .} + ?filenode a libns:IlluminaResult ; + libns:flowcell_id ?flowcell_id ; + libns:lane_number ?lane_id . + OPTIONAL { ?filenode libns:flowcell ?flowcell . } + OPTIONAL { ?filenode libns:library ?library .} + OPTIONAL { ?filenode libns:library_id ?library_id .} } """ LOGGER.debug("update_model_sequence_library query %s", file_body) @@ -470,16 +470,16 @@ def guess_library_from_model(model, base_url, flowcell, lane_id): flowcellNode = RDF.Node(flowcell) flowcell = str(flowcell.uri) lane_body = """ - prefix libNS: + prefix libns: prefix rdf: prefix xsd: select ?library ?lane where {{ - <{flowcell}> libNS:has_lane ?lane ; - a libNS:IlluminaFlowcell . - ?lane libNS:lane_number {lane_id} ; - libNS:library ?library . + <{flowcell}> libns:has_lane ?lane ; + a libns:IlluminaFlowcell . + ?lane libns:lane_number "{lane_id}" ; + libns:library ?library . }} """ lane_body = lane_body.format(flowcell=flowcell, lane_id=lane_id) diff --git a/htsworkflow/pipelines/test/test_sequences.py b/htsworkflow/pipelines/test/test_sequences.py index e621875..accd43e 100644 --- a/htsworkflow/pipelines/test/test_sequences.py +++ b/htsworkflow/pipelines/test/test_sequences.py @@ -74,7 +74,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 4) + self.assertEqual(f0.lane, '4') self.assertEqual(f0.read, None) self.assertEqual(f0.pf, None) self.assertEqual(f0.cycle, 38) @@ -99,7 +99,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 4) + self.assertEqual(f0.lane, '4') self.assertEqual(f0.read, 1) self.assertEqual(f0.pf, None) self.assertEqual(f0.cycle, 36) @@ -121,7 +121,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(f0.path, pathname) self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) - self.assertEqual(f0.lane, 1) + self.assertEqual(f0.lane, '1') self.assertEqual(f0.read, 1) self.assertEqual(f0.pf, None) self.assertEqual(f0.cycle, 202) @@ -145,7 +145,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 4) + self.assertEqual(f0.lane, '4') self.assertEqual(f0.read, 1) self.assertEqual(f0.pf, True) self.assertEqual(f0.cycle, 38) @@ -167,7 +167,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 4) + self.assertEqual(f0.lane, '4') self.assertEqual(f0.read, 2) self.assertEqual(f0.pf, False) self.assertEqual(f0.cycle, 38) @@ -191,7 +191,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 1) + self.assertEqual(f0.lane, '1') self.assertEqual(f0.read, 1) self.assertEqual(f0.pf, True) self.assertEqual(f0.project, '11111') @@ -215,7 +215,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(unicode(f0), unicode(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') - self.assertEqual(f0.lane, 1) + self.assertEqual(f0.lane, '1') self.assertEqual(f0.read, 2) self.assertEqual(f0.pf, True) self.assertEqual(f0.project, '11112') @@ -275,7 +275,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(f.filetype, 'eland') self.assertEqual(f.path, pathname) self.assertEqual(f.flowcell, '42BW9AAXX') - self.assertEqual(f.lane, 4) + self.assertEqual(f.lane, '4') self.assertEqual(f.read, None) self.assertEqual(f.pf, None) self.assertEqual(f.cycle, 38) @@ -290,7 +290,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(f.filetype, 'eland') self.assertEqual(f.path, pathname) self.assertEqual(f.flowcell, '42BW9AAXX') - self.assertEqual(f.lane, 4) + self.assertEqual(f.lane, '4') self.assertEqual(f.read, 1) self.assertEqual(f.pf, None) self.assertEqual(f.cycle, 152) @@ -346,7 +346,7 @@ class SequenceFileTests(unittest.TestCase): files = list(model.find_statements( RDF.Statement(None, rdfNS['type'], - libraryOntology['illumina_result']))) + libraryOntology['IlluminaResult']))) self.assertEqual(len(files), 5) files = list(model.find_statements( RDF.Statement(None, @@ -411,13 +411,13 @@ class SequenceFileTests(unittest.TestCase): a libns:IlluminaFlowcell . <{base}/lane/1169> - libns:lane_number 1 ; libns:library <{base}/library/10923/> . + libns:lane_number "1" ; libns:library <{base}/library/10923/> . <{base}/lane/1170> - libns:lane_number 2 ; libns:library <{base}/library/10924/> . + libns:lane_number "2" ; libns:library <{base}/library/10924/> . <{base}/lane/1171> - libns:lane_number 3 ; libns:library <{base}/library/12345/> . + libns:lane_number "3" ; libns:library <{base}/library/12345/> . <{base}/lane/1172> - libns:lane_number 3 ; libns:library <{base}/library/10930/> . + libns:lane_number "3" ; libns:library <{base}/library/10930/> . """.format(base=base_url) model = get_model() load_string_into_model(model, 'turtle', flowcellInfo) @@ -472,7 +472,7 @@ class SequenceFileTests(unittest.TestCase): self.assertEqual(seq.filetype, seq2.filetype) self.assertEqual(seq2.filetype, 'split_fastq') self.assertEqual(seq.lane, seq2.lane) - self.assertEqual(seq2.lane, 3) + self.assertEqual(seq2.lane, '3') self.assertEqual(seq.read, seq2.read) self.assertEqual(seq2.read, 1) self.assertEqual(seq.project, seq2.project) @@ -491,7 +491,7 @@ class SequenceFileTests(unittest.TestCase): file_types_seen = set() file_types_to_see = set(['fastq', 'srf', 'eland', 'qseq']) lanes = set() - lanes_to_see = set((1,2,3)) + lanes_to_see = set(('1','2','3')) with SimulateSimpleTree() as tree: seqs = sequences.scan_for_sequences([tree.root, '/a/b/c/98345']) for s in seqs: @@ -512,7 +512,7 @@ class SequenceFileTests(unittest.TestCase): file_types_seen = set() file_types_to_see = set(['split_fastq']) lanes = set() - lanes_to_see = set((1,2)) + lanes_to_see = set(('1','2')) projects_seen = set() projects_to_see = set(('11111', '21111', '31111')) with SimulateHiSeqTree() as tree: diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py index 9f4f136..01fe6c5 100644 --- a/htsworkflow/submission/condorfastq.py +++ b/htsworkflow/submission/condorfastq.py @@ -117,7 +117,7 @@ class CondorFastqExtract(object): Find archived sequence files associated with our results. """ self.import_libraries(result_map) - flowcell_ids = self.find_relavant_flowcell_ids() + flowcell_ids = self.find_relevant_flowcell_ids() self.import_sequences(flowcell_ids) query_text = """ @@ -138,7 +138,7 @@ class CondorFastqExtract(object): libns:library ?library ; libns:library_id ?library_id ; libns:file_type ?filetype ; - a libns:illumina_result . + a libns:IlluminaResult . ?flowcell libns:read_length ?read_length ; libns:flowcell_type ?flowcell_type . OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status } @@ -174,12 +174,12 @@ class CondorFastqExtract(object): if not self.model.contains_statement(q): present = True load_into_model(self.model, 'rdfa', library) - LOGGER.debug("Did we import %s: %s", library, present) + LOGGER.debug("Did we import %s: %s", library.uri, present) - def find_relavant_flowcell_ids(self): + def find_relevant_flowcell_ids(self): """Generate set of flowcell ids that had samples of interest on them """ - flowcell_query =RDF.SPARQLQuery(""" + flowcell_query = RDF.SPARQLQuery(""" prefix libns: select distinct ?flowcell ?flowcell_id @@ -192,13 +192,17 @@ WHERE { flowcell_ids = set() for r in flowcell_query.execute(self.model): flowcell_ids.add( fromTypedNode(r['flowcell_id']) ) - LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids))) - flowcell_test = RDF.Statement(r['flowcell'], - rdfNS['type'], - libraryOntology['IlluminaFlowcell']) - if not self.model.contains_statement(flowcell_test): - # we probably lack full information about the flowcell. + imported = False + a_lane = self.model.get_target(r['flowcell'], + libraryOntology['has_lane']) + print a_lane + if a_lane is None: + imported = True + # we lack information about which lanes were on this flowcell load_into_model(self.model, 'rdfa', r['flowcell']) + LOGGER.debug("Did we imported %s: %s" % (r['flowcell'].uri, + imported)) + return flowcell_ids def import_sequences(self, flowcell_ids): diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py index 413d2c3..8594715 100644 --- a/htsworkflow/submission/geo.py +++ b/htsworkflow/submission/geo.py @@ -111,7 +111,7 @@ class GEOSubmission(Submission): context = Context({ 'submission': str(analysis_node.uri), - 'file_class': str(geoSoftNS['supplemental']) + 'file_class': str(geoSoftNS['supplemental'].uri) }) return self.execute_query(query_template, context) @@ -123,7 +123,7 @@ class GEOSubmission(Submission): context = Context({ 'submission': str(analysis_node.uri), - 'file_class': str(geoSoftNS['raw']), + 'file_class': str(geoSoftNS['raw'].uri), }) lanes = {} diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py index 18fa3b2..2b04ff4 100644 --- a/htsworkflow/submission/submission.py +++ b/htsworkflow/submission/submission.py @@ -126,7 +126,7 @@ class Submission(object): def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir): # add file specific information - fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename)) + fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename))) self.model.add_statement( RDF.Statement(submissionNode, dafTermOntology['has_file'], diff --git a/htsworkflow/submission/test/test_condorfastq.py b/htsworkflow/submission/test/test_condorfastq.py index f4d2e2c..dd41c61 100644 --- a/htsworkflow/submission/test/test_condorfastq.py +++ b/htsworkflow/submission/test/test_condorfastq.py @@ -122,22 +122,22 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . # paired_end 1; # read_length 33; # status "Unknown"@en . @@ -145,22 +145,22 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . a libns:IlluminaFlowcell ; @@ -181,27 +181,27 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . # paired_end 1; # read_length 76; # status "Unknown"@en . @@ -209,17 +209,17 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . a libns:IlluminaFlowcell ; @@ -240,32 +240,32 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . # paired_end 1; # read_length 76; # status "Unknown"@en . @@ -273,12 +273,12 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . a libns:IlluminaFlowcell ; @@ -299,42 +299,42 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 1 . + libns:lane_number "1" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 2 . + libns:lane_number "2" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 4 . + libns:lane_number "4" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 5 . + libns:lane_number "5" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 6 . + libns:lane_number "6" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 7 . + libns:lane_number "7" . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 8 . + libns:lane_number "8" . # paired_end 1; # read_length 76; # status "Unknown"@en . @@ -352,7 +352,7 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . # paired_end 1; # read_length 101; # status "Unknown"@en . @@ -361,7 +361,7 @@ lib_turtle = """@prefix : . a libns:IlluminaLane ; libns:flowcell ; libns:library ; - libns:lane_number 3 . + libns:lane_number "3" . # paired_end 1; # read_length 101; # status "Unknown"@en . @@ -446,39 +446,40 @@ class TestCondorFastq(unittest.TestCase): inference = Infer(self.extract.model) errmsgs = list(inference.run_validation()) self.assertEqual(len(errmsgs), 0) + os.chdir(self.tempdir) def tearDown(self): shutil.rmtree(self.tempdir) os.chdir(self.cwd) - def test_find_relavant_flowcell_ids(self): + def test_find_relevant_flowcell_ids(self): expected = set(('30221AAXX', '42JUYAAXX', '61MJTAAXX', '30DY0AAXX', 'C02F9ACXX')) - flowcell_ids = self.extract.find_relavant_flowcell_ids() + flowcell_ids = self.extract.find_relevant_flowcell_ids() self.assertEqual(flowcell_ids, expected) def test_find_archive_sequence(self): seqs = self.extract.find_archive_sequence_files(self.result_map) expected = set([ - (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'), - (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'), - (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'), - (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'), - (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'), - (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'), - (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf') + (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'), + (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'), + (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'), + (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf') ]) found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs]) self.assertEqual(expected, found) -- 2.30.2