Ultra-raw output of run_xml data implemented. The sorting of columns
[htsworkflow.git] / gaworkflow / frontend / fctracker / results.py
1 from gaworkflow.frontend import settings
2
3 import glob
4 import os
5 import re
6
7 s_paren = re.compile("^\w+")
8
9 def get_flowcell_result_dict(flowcell_id):
10     """
11     returns a dictionary following the following pattern for
12     a given flowcell_id:
13     
14      
15     d['C1-33']['summary']           # Summary.htm file path
16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
17     d['C1-33']['run_xml']           # run_*.xml file path
18     d['C1-33']['scores']            # scores.tar.gz file path
19     """
20     
21     flowcell_id = flowcell_id.strip()
22     
23     d = {}
24     
25     ################################
26     # Flowcell Directory
27     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
28     
29     # Not found
30     if len(fc_dir) == 0:
31         return None
32     
33     # No duplicates!
34     assert len(fc_dir) <= 1
35     
36     # Found fc dir
37     fc_dir = fc_dir[0]
38     
39     ################################
40     # C#-## dirs
41     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
42     
43     # Not found
44     if len(c_dir_list) == 0:
45         return d
46     
47     for c_dir_path in c_dir_list:
48         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
49         pathdir, c_dir = os.path.split(c_dir_path)
50         
51         # Create sub-dictionary
52         d[c_dir] = {}
53         
54         
55         ###############################
56         # Summary.htm file
57         
58         # Not found
59         if len(summary_file) == 0:
60             d[c_dir]['summary'] = None
61             
62         # Found
63         else:
64             # No duplicates!
65             assert len(summary_file) == 1
66             
67             summary_file = summary_file[0]
68             d[c_dir]['summary'] = summary_file
69             
70         ###############################
71         # Result files
72         
73         d[c_dir]['eland_results'] = {}
74         
75         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
76         
77         for filepath in result_filepaths:
78             
79             junk, result_name = os.path.split(filepath)
80             
81             #lanes 1-8, single digit, therefore s_#; # == index 2
82             lane = int(result_name[2])
83             d[c_dir]['eland_results'][lane] = filepath
84             
85         ###############################
86         # run*.xml file
87         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
88         
89         if len(run_xml_filepath) == 0:
90             d[c_dir]['run_xml'] = None
91         else:
92             # No duplicates
93             assert len(run_xml_filepath) == 1
94             
95             d[c_dir]['run_xml'] = run_xml_filepath[0]
96             
97         ###############################
98         # scores.tar.gz
99         scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
100         
101         if len(scores_filepath) == 0:
102             d[c_dir]['scores'] = None
103         else:
104             # No duplicates
105             assert len(scores_filepath) == 1
106             
107             d[c_dir]['scores'] = scores_filepath[0]
108         
109     return d
110
111     
112 def cn_mTobp(cn_m):
113     """
114     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
115     number of base pairs.
116     """
117     pass
118
119
120 def flowcellIdStrip(flowcell_id):
121     """
122     Removes (<words>) from flowcell id
123     """
124     mo = s_paren.search(flowcell_id)
125     
126     if mo:
127         return flowcell_id[mo.start():mo.end()]
128     else:
129         return flowcell_id
130