85e1fd752ef74e039a0618783042a570a0bd30f3
[htsworkflow.git] / gaworkflow / frontend / fctracker / results.py
1 from gaworkflow.frontend import settings
2
3 import glob
4 import os
5 import re
6
7 s_paren = re.compile("^\w+")
8
9 def get_flowcell_result_dict(flowcell_id):
10     """
11     returns a dictionary following the following pattern for
12     a given flowcell_id:
13     
14      
15     d['C1-33']['summary'] = #Summary.htm file path
16     d['C1-33']['eland_results'][5] # C1-33 lane 5 file eland results file path
17     """
18     
19     flowcell_id = flowcell_id.strip()
20     
21     d = {}
22     
23     ################################
24     # Flowcell Directory
25     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
26     
27     # Not found
28     if len(fc_dir) == 0:
29         return None
30     
31     # No duplicates!
32     assert len(fc_dir) <= 1
33     
34     # Found fc dir
35     fc_dir = fc_dir[0]
36     
37     ################################
38     # C#-## dirs
39     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
40     
41     # Not found
42     if len(c_dir_list) == 0:
43         return d
44     
45     for c_dir_path in c_dir_list:
46         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
47         pathdir, c_dir = os.path.split(c_dir_path)
48         
49         # Create sub-dictionary
50         d[c_dir] = {}
51         
52         
53         ###############################
54         # Summary.htm file
55         
56         # Not found
57         if len(summary_file) == 0:
58             d[c_dir]['summary'] = None
59             
60         # Found
61         else:
62             # No duplicates!
63             assert len(summary_file) == 1
64             
65             summary_file = summary_file[0]
66             d[c_dir]['summary'] = summary_file
67             
68         ###############################
69         # Result files
70         
71         d[c_dir]['eland_results'] = {}
72         
73         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
74         
75         for filepath in result_filepaths:
76             
77             junk, result_name = os.path.split(filepath)
78             
79             #lanes 1-8, single digit, therefore s_#; # == index 2
80             lane = int(result_name[2])
81             d[c_dir]['eland_results'][lane] = filepath
82             
83         ###############################
84         # run*.xml file
85         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
86         
87         if len(run_xml_filepath) == 0:
88             d[c_dir]['run_xml'] = None
89         else:
90             # No duplicates
91             assert len(run_xml_filepath) == 1
92             
93             d[c_dir]['run_xml'] = run_xml_filepath[0]
94             
95         ###############################
96         # scores.tar.gz
97         scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
98         
99         if len(scores_filepath) == 0:
100             d[c_dir]['scores'] = None
101         else:
102             # No duplicates
103             assert len(scores_filepath) == 1
104             
105             d[c_dir]['scores'] = scores_filepath[0]
106         
107     return d
108
109     
110 def cn_mTobp(cn_m):
111     """
112     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
113     number of base pairs.
114     """
115     pass
116
117
118 def flowcellIdStrip(flowcell_id):
119     """
120     Removes (<words>) from flowcell id
121     """
122     mo = s_paren.search(flowcell_id)
123     
124     if mo:
125         return flowcell_id[mo.start():mo.end()]
126     else:
127         return flowcell_id
128