convert to unicode_literals
[htsworkflow.git] / samples / results.py
1 from __future__ import unicode_literals
2
3 from django.conf import settings
4
5 import glob
6 import os
7 import re
8
9 s_paren = re.compile("^\w+")
10
11 def get_flowcell_result_dict(flowcell_id):
12     """
13     returns a dictionary following the following pattern for
14     a given flowcell_id:
15     
16      
17     d['C1-33']['summary']           # Summary.htm file path
18     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
19     d['C1-33']['run_xml']           # run_*.xml file path
20     d['C1-33']['scores']            # scores.tar.gz file path
21     """
22     flowcell_id = flowcell_id.strip()
23     
24     d = {}
25     
26     ################################
27     # Flowcell Directory
28     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
29     
30     # Not found
31     if len(fc_dir) == 0:
32         return None
33     
34     # No duplicates!
35     assert len(fc_dir) <= 1
36     
37     # Found fc dir
38     fc_dir = fc_dir[0]
39     
40     ################################
41     # C#-## dirs
42     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
43     
44     # Not found
45     if len(c_dir_list) == 0:
46         return d
47     
48     for c_dir_path in c_dir_list:
49         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
50         pathdir, c_dir = os.path.split(c_dir_path)
51         
52         # Create sub-dictionary
53         d[c_dir] = {}
54         
55         
56         ###############################
57         # Summary.htm file
58         
59         # Not found
60         if len(summary_file) == 0:
61             d[c_dir]['summary'] = None
62             
63         # Found
64         else:
65             # No duplicates!
66             assert len(summary_file) == 1
67             
68             summary_file = summary_file[0]
69             d[c_dir]['summary'] = summary_file
70             
71         ###############################
72         # Result files
73         
74         d[c_dir]['eland_results'] = {}
75         
76         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
77         
78         for filepath in result_filepaths:
79             
80             junk, result_name = os.path.split(filepath)
81             
82             #lanes 1-8, single digit, therefore s_#; # == index 2
83             lane = int(result_name[2])
84             d[c_dir]['eland_results'][lane] = filepath
85             
86         ###############################
87         # run*.xml file
88         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
89         
90         if len(run_xml_filepath) == 0:
91             d[c_dir]['run_xml'] = None
92         else:
93             # No duplicates
94             assert len(run_xml_filepath) == 1
95             
96             d[c_dir]['run_xml'] = run_xml_filepath[0]
97             
98         ###############################
99         # scores.tar.gz
100         # restrict to only compressed files, so in case there are *.md5 files
101         # we don't get confused.
102         scores_filepath = []
103         for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
104             scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
105         
106         if len(scores_filepath) == 0:
107             d[c_dir]['scores'] = None
108         else:
109             # No duplicates
110             assert len(scores_filepath) == 1
111             
112             d[c_dir]['scores'] = scores_filepath[0]
113         
114     return d
115
116     
117 def cn_mTobp(cn_m):
118     """
119     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
120     number of base pairs.
121     """
122     pass
123
124