convert to print_function, and start adding unicode_literals future as well
[htsworkflow.git] / htsworkflow / pipelines / test / simulate_runfolder.py
index c6b4d587bbc245daa638b81971a2bee49c4bfd72..0d48787abb25dfe8d6c77f4933e5ef6ce6c2706a 100644 (file)
@@ -1,6 +1,8 @@
 """
 Create simulated solexa/illumina runfolders for testing
 """
+from __future__ import print_function, unicode_literals
+
 import gzip
 import os
 import shutil
@@ -50,6 +52,31 @@ def make_flowcell_id(runfolder_dir, flowcell_id=None):
     f.write(config)
     f.close()
 
+def make_runinfo(runfolder_dir, flowcell_id):
+    """Simulate a RunInfo.xml file created by >= RTA 1.9
+    """
+    xml = '''<?xml version="1.0"?>
+<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
+  <Run Id="{runfolder}" Number="101">
+    <Flowcell>{flowcell}</Flowcell>
+    <Instrument>SN787</Instrument>
+    <Date>110815</Date>
+    <Reads>
+      <Read Number="1" NumCycles="50" IsIndexedRead="N" />
+      <Read Number="2" NumCycles="7" IsIndexedRead="Y" />
+    </Reads>
+    <FlowcellLayout LaneCount="8" SurfaceCount="2" SwathCount="3" TileCount="8" />
+    <AlignToPhiX />
+  </Run>
+</RunInfo>
+'''
+    path, runfolder = os.path.split(runfolder_dir)
+    runinfo = os.path.join(runfolder_dir, 'RunInfo.xml')
+    stream = open(runinfo, 'w')
+    stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id))
+    stream.close()
+    return runinfo
+
 def make_bustard_config132(image_dir):
     source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
     destination = os.path.join(image_dir, 'config.xml')
@@ -75,6 +102,17 @@ def make_unaligned_config_1_12(unaligned_dir):
     ]
     for src, dest in demultiplex_pairs:
         shutil.copy(src, dest)
+        
+def make_unaligned_status_1_12(unaligned_dir, flowcell_id):
+    basecall_status = ['All.htm', 'Demultiplex_Stats.htm', 'IVC.htm']
+    test_data_root = os.path.join(TESTDATA_DIR, '1_12', 'basecall_stats')
+    basecall_stats = os.path.join(unaligned_dir, 
+                                  'Basecall_Stats_{0}'.format(flowcell_id))
+    os.mkdir(basecall_stats)
+    for filename in basecall_status:
+        source = os.path.join(test_data_root, filename)
+        destination = os.path.join(basecall_stats, filename)
+        shutil.copy(source, destination)
 
 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
     """
@@ -495,7 +533,7 @@ def make_aligned_eland_export(aligned_dir, flowcell_id):
         summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
         shutil.copy(summary_source, summary_dest)
 
-        body = get_unaligned_sample_export(lane, index_seq)
+        body = get_aligned_sample_export(lane, index_seq)
         for split in ['001','002']:
             for read in UNALIGNED_READS:
                 suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
@@ -617,12 +655,12 @@ class DemultiplexedPaths(object):
         return pathname
 
     def dump(self):
-        print ('index seq: {0}'.format(self.index_seq))
+        print('index seq: {0}'.format(self.index_seq))
 
-        print ('project dir: {0}'.format(self.project_dir))
-        print ('sample dir: {0}'.format(self.sample_dir))
-        print ('rootname: {0}'.format(self.rootname))
-        print ('path: {0}'.format(
+        print('project dir: {0}'.format(self.project_dir))
+        print('sample dir: {0}'.format(self.sample_dir))
+        print('rootname: {0}'.format(self.rootname))
+        print('path: {0}'.format(
             os.path.join(self.project_dir,
                          self.sample_dir,
                          self.rootname+'R1_001.fastq.gz')))
@@ -648,17 +686,20 @@ CCCFFFFFHHHFHJGIGHIJHIIGHIGIGIGEHFIJJJIHIJHJIIJJIH
 """.format(flowcell=flowcell_id, lane=lane, index=index_seq)
     return seq
 
-def get_unaligned_sample_export(lane, index_seq):
+def get_aligned_sample_export(lane, index_seq):
     body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
-HWI-ST0787\t102     {lane}       1101    1478    1997    {index}  1       CAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT       BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB       NM                                                                                      N
-HWI-ST0787      102     {lane}       1101    1625    1994    {index}  1       AANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA       \^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c       chrII.fa
+HWI-ST0787\t102\t{lane}\t1101\t1478\t1997\t{index}\t1\tCAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\tNM\t\t\t\t\t\t\t\t\t\t\tN
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\tchr9.fa\t67717938\tR\t99\t72
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\t3:4:3\t\t\t\t\t\t\t\t\t\t\tY
 """.format(lane=lane, index=index_seq)
     return body
 
-def ls_tree(root):
+def print_ls_tree(root):
+    """List tree contents, useful for debugging.
+    """
     for dirpath, dirnames, filenames in os.walk(root):
         for filename in filenames:
-            print os.path.join(dirpath, filename)
+            print(os.path.join(dirpath, filename))
 
 
 class BaseCallInfo(object):