From e0e75500a1a22df83207fe327da65591aa0a47b6 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 12 Apr 2012 14:43:10 -0700 Subject: [PATCH] Extend the adapter id field to support the dual indexed libraries this is implimented by using a - to indicate that two indexes are linked together. --- .../experiments/fixtures/test_flowcells.json | 42 +++++++ .../samples/fixtures/initial_data.json | 110 +++++++++++++++++- htsworkflow/frontend/samples/models.py | 40 +++++-- htsworkflow/pipelines/retrieve_config.py | 3 +- .../pipelines/test/test_retrive_config.py | 40 ++++--- 5 files changed, 209 insertions(+), 26 deletions(-) diff --git a/htsworkflow/frontend/experiments/fixtures/test_flowcells.json b/htsworkflow/frontend/experiments/fixtures/test_flowcells.json index e6ad6e3..63cf30b 100644 --- a/htsworkflow/frontend/experiments/fixtures/test_flowcells.json +++ b/htsworkflow/frontend/experiments/fixtures/test_flowcells.json @@ -883,6 +883,36 @@ "antibody": null } }, + { + "pk": "13044", + "model": "samples.library", + "fields": { + "ten_nM_dilution": false, + "gel_cut_size": 225, + "library_name": "Dual Indexed Test", + "creation_date": "2009-08-26", + "cell_line": null, + "library_species": 9, + "library_type": 9, + "multiplex_id": "N701-N501", + "made_by": "Lorian", + "affiliations": [ + 2 + ], + "replicate": 1, + "condition": null, + "hidden": false, + "stopping_point": "2A", + "tags": [], + "made_for": "", + "amplified_from_sample": 11043, + "notes": "8/21/2009 11:57:54\tColor: Orange", + "undiluted_concentration": "22.4", + "successful_pM": null, + "experiment_type": 2, + "antibody": null + } + }, { "pk": "11045", "model": "samples.library", @@ -935,6 +965,18 @@ "pM": "7" } }, + {"pk": 1379, + "model": "experiments.lane", + "fields": { + "comment": "", + "library": "13044", + "cluster_estimate": 196000, + "flowcell": 151, + "lane_number": 4, + "pM": "7" + } + }, + { "pk": "11044", "model": "samples.library", diff --git a/htsworkflow/frontend/samples/fixtures/initial_data.json b/htsworkflow/frontend/samples/fixtures/initial_data.json index 2a5af14..76a221e 100644 --- a/htsworkflow/frontend/samples/fixtures/initial_data.json +++ b/htsworkflow/frontend/samples/fixtures/initial_data.json @@ -50,6 +50,15 @@ "is_paired_end": true } }, + { + "model": "samples.LibraryType", + "pk": 9, + "fields": { + "name": "Dual Index Illumina", + "can_multiplex": true, + "is_paired_end": true + } + }, { "model": "samples.ExperimentType", "pk": 1, @@ -792,5 +801,104 @@ "adapter_type": 5, "sequence": "CTTGTA" } - } + }, + {"fields": {"adapter_type": 9, + "multiplex_id": "N501", + "sequence": "TAGATCGC"}, + "model": "samples.multiplexindex", + "pk": 74 + }, + {"fields": {"adapter_type": 9, + "multiplex_id": "N502", + "sequence": "CTCTCTAT"}, + "model": "samples.multiplexindex", + "pk": 75}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N503", + "sequence": "TATCCTCT"}, + "model": "samples.multiplexindex", + "pk": 76}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N504", + "sequence": "AGAGTAGA"}, + "model": "samples.multiplexindex", + "pk": 77}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N505", + "sequence": "GTAAGGAG"}, + "model": "samples.multiplexindex", + "pk": 78}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N506", + "sequence": "ACTGCATA"}, + "model": "samples.multiplexindex", + "pk": 79}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N507", + "sequence": "AAGGAGTA"}, + "model": "samples.multiplexindex", + "pk": 80}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N508", + "sequence": "CTAAGCCT"}, + "model": "samples.multiplexindex", + "pk": 81}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N701", + "sequence": "TAAGGCGA"}, + "model": "samples.multiplexindex", + "pk": 82}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N702", + "sequence": "CGTACTAG"}, + "model": "samples.multiplexindex", + "pk": 83}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N703", + "sequence": "AGGCAGAA"}, + "model": "samples.multiplexindex", + "pk": 84}, + {"fields": {"adapter_type": 9, "multiplex_id": "N704", "sequence": "TCCTGA"}, + "model": "samples.multiplexindex", + "pk": 85}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N705", + "sequence": "GGACTCCT"}, + "model": "samples.multiplexindex", + "pk": 86}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N706", + "sequence": "TAGGCATG"}, + "model": "samples.multiplexindex", + "pk": 87}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N707", + "sequence": "CTCTCTAC"}, + "model": "samples.multiplexindex", + "pk": 88}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N708", + "sequence": "CAGAGAGG"}, + "model": "samples.multiplexindex", + "pk": 89}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N709", + "sequence": "GCTACGCT"}, + "model": "samples.multiplexindex", + "pk": 90}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N710", + "sequence": "CGAGGCTG"}, + "model": "samples.multiplexindex", + "pk": 91}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N711", + "sequence": "AAGAGGCA"}, + "model": "samples.multiplexindex", + "pk": 92}, + {"fields": {"adapter_type": 9, + "multiplex_id": "N712", + "sequence": "GTAGAGGA"}, + "model": "samples.multiplexindex", + "pk": 93} ] diff --git a/htsworkflow/frontend/samples/models.py b/htsworkflow/frontend/samples/models.py index 2eeaaec..de6fc11 100644 --- a/htsworkflow/frontend/samples/models.py +++ b/htsworkflow/frontend/samples/models.py @@ -144,7 +144,7 @@ class LibraryType(models.Model): class MultiplexIndex(models.Model): """Map adapter types to the multiplex sequence""" adapter_type = models.ForeignKey(LibraryType) - multiplex_id = models.CharField(max_length=3, null=False) + multiplex_id = models.CharField(max_length=6, null=False) sequence = models.CharField(max_length=12, blank=True, null=True) class Meta: @@ -244,23 +244,41 @@ class Library(models.Model): if self.multiplex_id is None or len(self.multiplex_id) == 0: return 'Err: id empty' sequences = {} - multiplex_ids = self.multiplex_id.split(',') - for multiplex_id in multiplex_ids: - try: - multiplex = MultiplexIndex.objects.get( - adapter_type = self.library_type.id, - multiplex_id = multiplex_id) - sequences[multiplex_id] = multiplex.sequence - except MultiplexIndex.DoesNotExist, e: - sequences[multiplex_id] = 'Err: index not found' + multiplex_expressions = self.multiplex_id.split(',') + for multiplex_term in multiplex_expressions: + pairs = multiplex_term.split('-') + if len(pairs) == 1: + key = pairs[0] + seq = self._lookup_index(pairs[0]) + elif len(pairs) == 2: + key = pairs[0] + '-' + pairs[1] + seq0 = self._lookup_index(pairs[0]) + seq1 = self._lookup_index(pairs[1]) + if seq0 is None or seq1 is None: + seq = None + else: + seq = seq0 + '-' + seq1 + else: + raise RuntimeError("Too many - seperated sequences") + if seq is None: + seq = 'Err: index not found' + sequences[key] = seq return sequences + def _lookup_index(self, multiplex_id): + try: + multiplex = MultiplexIndex.objects.get( + adapter_type = self.library_type.id, + multiplex_id = multiplex_id) + return multiplex.sequence + except MultiplexIndex.DoesNotExist, e: + return None + def index_sequence_text(self, seperator=' '): """Return formatted multiplex index sequences""" sequences = self.index_sequences() if sequences is None: return "" - multiplex_ids = sequences.keys() multiplex_ids.sort() return seperator.join(( "%s:%s" %(i,sequences[i]) for i in multiplex_ids)) diff --git a/htsworkflow/pipelines/retrieve_config.py b/htsworkflow/pipelines/retrieve_config.py index 94d8f50..bd220a0 100644 --- a/htsworkflow/pipelines/retrieve_config.py +++ b/htsworkflow/pipelines/retrieve_config.py @@ -17,6 +17,7 @@ except ImportError, e: from htsworkflow.frontend.auth import apidata from htsworkflow.util import api +from htsworkflow.util import alphanum from htsworkflow.util.url import normalize_url from htsworkflow.pipelines.genome_mapper import \ getAvailableGenomes, \ @@ -410,7 +411,7 @@ def format_pooled_libraries(shared, library): elif (type(sequences) == types.DictType): pooled = [] multiplex_ids = sequences.keys() - multiplex_ids.sort(key=int) + multiplex_ids.sort(cmp=alphanum.alphanum) for multiplex_id in multiplex_ids: sample = {} sample.update(shared) diff --git a/htsworkflow/pipelines/test/test_retrive_config.py b/htsworkflow/pipelines/test/test_retrive_config.py index cb56501..1d0404a 100644 --- a/htsworkflow/pipelines/test/test_retrive_config.py +++ b/htsworkflow/pipelines/test/test_retrive_config.py @@ -54,19 +54,33 @@ class RetrieveTestCases(TestCase): output = StringIO() save_sample_sheet(output, options, flowcell_info) + print output.buf + output.seek(0) sheet = list(csv.DictReader(output)) - expected = [{'SampleProject': '12044_index1', 'Index': 'ATCACG'}, - {'SampleProject': '12044_index2', 'Index': 'CGATGT'}, - {'SampleProject': '12044_index3', 'Index': 'TTAGGC'}, - {'SampleProject': '11045_index1', 'Index': 'ATCACG'}, + expected = [{'SampleProject': '12044_index1', + 'Index': 'ATCACG', + 'Lane': '3', + }, + {'SampleProject': '12044_index2', + 'Index': 'CGATGT', + 'Lane': '3', + }, + {'SampleProject': '12044_index3', + 'Index': 'TTAGGC', + 'Lane': '3', + }, + {'SampleProject': '11045_index1', + 'Index': 'ATCACG', + 'Lane': '3', + }, + {'SampleProject': '13044_indexN701-N501', + 'Index': 'TAAGGCGA-TAGATCGC', + 'Lane': '4', + } ] - for i in range(4): - self.assertEqual(sheet[i]['SampleProject'], - expected[i]['SampleProject']) - self.assertEqual(sheet[i]['Index'], - expected[i]['Index']) - self.assertEqual(sheet[i]['FCID'], fcid) - self.assertEqual(sheet[i]['Lane'], '3') - - + self.failUnlessEqual(len(sheet), len(expected)) + for s, e in zip(sheet, expected): + for key in e.keys(): + self.failUnlessEqual(s[key], e[key], + "%s != %s for key %s" % (s[key],e[key], key)) -- 2.30.2