Initial port to python3
[htsworkflow.git] / htsworkflow / pipelines / test / simulate_runfolder.py
index 0b57631e58f5cb53696968f0feafe23dc947fbcd..b94b91d22b7216fc246d08e2d2717317593356ab 100644 (file)
@@ -7,8 +7,8 @@ import shutil
 
 TEST_CODE_DIR = os.path.split(__file__)[0]
 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
-LANE_LIST = range(1,9)
-TILE_LIST = range(1,101)
+LANE_LIST = list(range(1,9))
+TILE_LIST = list(range(1,101))
 HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
                    1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
                    2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
@@ -50,6 +50,31 @@ def make_flowcell_id(runfolder_dir, flowcell_id=None):
     f.write(config)
     f.close()
 
+def make_runinfo(runfolder_dir, flowcell_id):
+    """Simulate a RunInfo.xml file created by >= RTA 1.9
+    """
+    xml = '''<?xml version="1.0"?>
+<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
+  <Run Id="{runfolder}" Number="101">
+    <Flowcell>{flowcell}</Flowcell>
+    <Instrument>SN787</Instrument>
+    <Date>110815</Date>
+    <Reads>
+      <Read Number="1" NumCycles="50" IsIndexedRead="N" />
+      <Read Number="2" NumCycles="7" IsIndexedRead="Y" />
+    </Reads>
+    <FlowcellLayout LaneCount="8" SurfaceCount="2" SwathCount="3" TileCount="8" />
+    <AlignToPhiX />
+  </Run>
+</RunInfo>
+'''
+    path, runfolder = os.path.split(runfolder_dir)
+    runinfo = os.path.join(runfolder_dir, 'RunInfo.xml')
+    stream = open(runinfo, 'w')
+    stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id))
+    stream.close()
+    return runinfo
+
 def make_bustard_config132(image_dir):
     source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
     destination = os.path.join(image_dir, 'config.xml')
@@ -57,22 +82,35 @@ def make_bustard_config132(image_dir):
 
 def make_aligned_config_1_12(aligned_dir):
     """This is rouglhly equivalent to the old gerald file"""
-    source = os.path.join(TESTDATA_DIR, 'aligned_config_1_12.xml')
+    source = os.path.join(TESTDATA_DIR, '1_12', 'aligned_config_1_12.xml')
     destination = os.path.join(aligned_dir, 'config.xml')
     shutil.copy(source, destination)
 
 def make_unaligned_config_1_12(unaligned_dir):
     demultiplex_pairs = [ # (src,
       # dest),
-        (os.path.join(TESTDATA_DIR, 'demultiplex_1.12.4.2.xml'),
-         os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
-        (os.path.join(TESTDATA_DIR, 'demultiplexed_bustard_1.12.4.2.xml'),
-         os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
-        (os.path.join(TESTDATA_DIR, 'demultiplexed_summary_1.12.4.2.xml'),
+        (os.path.join(TESTDATA_DIR, '1_12', 'demultiplex_1.12.4.2.xml'),
          os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
+        (os.path.join(TESTDATA_DIR, '1_12',
+                      'demultiplexed_bustard_1.12.4.2.xml'),
+         os.path.join(unaligned_dir, 'DemultiplexedBustardConfig.xml')),
+        (os.path.join(TESTDATA_DIR, '1_12',
+                      'demultiplexed_summary_1.12.4.2.xml'),
+         os.path.join(unaligned_dir, 'DemultiplexedBustardSummary.xml')),
     ]
     for src, dest in demultiplex_pairs:
         shutil.copy(src, dest)
+        
+def make_unaligned_status_1_12(unaligned_dir, flowcell_id):
+    basecall_status = ['All.htm', 'Demultiplex_Stats.htm', 'IVC.htm']
+    test_data_root = os.path.join(TESTDATA_DIR, '1_12', 'basecall_stats')
+    basecall_stats = os.path.join(unaligned_dir, 
+                                  'Basecall_Stats_{0}'.format(flowcell_id))
+    os.mkdir(basecall_stats)
+    for filename in basecall_status:
+        source = os.path.join(test_data_root, filename)
+        destination = os.path.join(basecall_stats, filename)
+        shutil.copy(source, destination)
 
 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
     """
@@ -134,7 +172,8 @@ def make_rta_intensities_1_12(data_dir, version='1.12.4.2'):
     if not os.path.exists(intensities_dir):
       os.mkdir(intensities_dir)
 
-    param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.12.4.2.xml')
+    param_file = os.path.join(TESTDATA_DIR, '1_12',
+                              'rta_intensities_config_1.12.4.2.xml')
     shutil.copy(param_file, os.path.join(intensities_dir, 'RTAConfig.xml'))
 
     return intensities_dir
@@ -175,7 +214,8 @@ def make_rta_basecalls_1_12(intensities_dir):
         os.mkdir(basecalls_dir)
 
     make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
-    param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.12.4.2.xml')
+    param_file = os.path.join(TESTDATA_DIR, '1_12',
+                              'rta_basecalls_config_1.12.4.2.xml')
     shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
 
     return basecalls_dir
@@ -357,6 +397,19 @@ def make_summary_casava1_7_xml(gerald_dir):
     destination = os.path.join(gerald_dir, 'Summary.xml')
     shutil.copy(source, destination)
 
+def make_status_rta1_12(datadir):
+    sourcedir = os.path.join(TESTDATA_DIR, '1_12')
+    status_htm = os.path.join(sourcedir, 'Status.htm')
+    destination = os.path.join(datadir, 'Status.htm')
+    shutil.copy(status_htm, destination)
+
+    status_dir = os.path.join(datadir, 'Status_Files')
+    status_source_dir = os.path.join(sourcedir, 'Status_Files')
+    shutil.copytree(status_source_dir, status_dir)
+
+    report_source_dir = os.path.join(sourcedir, 'reports')
+    report_dir = os.path.join(datadir, 'reports')
+    shutil.copytree(report_source_dir, report_dir)
 
 def make_eland_results(gerald_dir):
     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
@@ -478,7 +531,7 @@ def make_aligned_eland_export(aligned_dir, flowcell_id):
         summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
         shutil.copy(summary_source, summary_dest)
 
-        body = get_unaligned_sample_export(lane, index_seq)
+        body = get_aligned_sample_export(lane, index_seq)
         for split in ['001','002']:
             for read in UNALIGNED_READS:
                 suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
@@ -582,7 +635,6 @@ class DemultiplexedPaths(object):
         self.sample_dir = os.path.join(self.project_dir, self.sample_dir)
         self.summary_dir = 'Summary_Stats_{0}'.format(self.flowcell_id)
         self.summary_dir = os.path.join(self.project_dir, self.summary_dir)
-        print "HI:", self.summary_dir
 
 
     def make_sample_dirs(self):
@@ -592,7 +644,6 @@ class DemultiplexedPaths(object):
             os.mkdir(self.sample_dir)
 
     def make_summary_dirs(self):
-        print "HI:", self.summary_dir
         if not os.path.isdir(self.summary_dir):
             os.mkdir(self.summary_dir)
 
@@ -600,16 +651,17 @@ class DemultiplexedPaths(object):
         filename = self.rootname + suffix
         pathname = os.path.join(self.sample_dir, filename)
         return pathname
+
     def dump(self):
-        print ('index seq: {0}'.format(self.index_seq))
+        print(('index seq: {0}'.format(self.index_seq)))
 
-        print ('project dir: {0}'.format(self.project_dir))
-        print ('sample dir: {0}'.format(self.sample_dir))
-        print ('rootname: {0}'.format(self.rootname))
-        print ('path: {0}'.format(
+        print(('project dir: {0}'.format(self.project_dir)))
+        print(('sample dir: {0}'.format(self.sample_dir)))
+        print(('rootname: {0}'.format(self.rootname)))
+        print(('path: {0}'.format(
             os.path.join(self.project_dir,
                          self.sample_dir,
-                         self.rootname+'R1_001.fastq.gz')))
+                         self.rootname+'R1_001.fastq.gz'))))
 
 
 def get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq):
@@ -632,17 +684,20 @@ CCCFFFFFHHHFHJGIGHIJHIIGHIGIGIGEHFIJJJIHIJHJIIJJIH
 """.format(flowcell=flowcell_id, lane=lane, index=index_seq)
     return seq
 
-def get_unaligned_sample_export(lane, index_seq):
+def get_aligned_sample_export(lane, index_seq):
     body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
-HWI-ST0787\t102     {lane}       1101    1478    1997    {index}  1       CAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT       BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB       NM                                                                                      N
-HWI-ST0787      102     {lane}       1101    1625    1994    {index}  1       AANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA       \^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c       chrII.fa
+HWI-ST0787\t102\t{lane}\t1101\t1478\t1997\t{index}\t1\tCAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\tNM\t\t\t\t\t\t\t\t\t\t\tN
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\tchr9.fa\t67717938\tR\t99\t72
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\t3:4:3\t\t\t\t\t\t\t\t\t\t\tY
 """.format(lane=lane, index=index_seq)
     return body
 
-def ls_tree(root):
+def print_ls_tree(root):
+    """List tree contents, useful for debugging.
+    """
     for dirpath, dirnames, filenames in os.walk(root):
         for filename in filenames:
-            print os.path.join(dirpath, filename)
+            print(os.path.join(dirpath, filename))
 
 
 class BaseCallInfo(object):