Show the "Amplified from sample" as the parent library id and allow
[htsworkflow.git] / htsworkflow / frontend / samples / results.py
1 from htsworkflow.frontend import settings
2
3 import glob
4 import os
5 import re
6
7 s_paren = re.compile("^\w+")
8
9 def get_flowcell_result_dict(flowcell_id):
10     """
11     returns a dictionary following the following pattern for
12     a given flowcell_id:
13     
14      
15     d['C1-33']['summary']           # Summary.htm file path
16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
17     d['C1-33']['run_xml']           # run_*.xml file path
18     d['C1-33']['scores']            # scores.tar.gz file path
19     """
20     flowcell_id = flowcell_id.strip()
21     
22     d = {}
23     
24     ################################
25     # Flowcell Directory
26     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
27     
28     # Not found
29     if len(fc_dir) == 0:
30         return None
31     
32     # No duplicates!
33     assert len(fc_dir) <= 1
34     
35     # Found fc dir
36     fc_dir = fc_dir[0]
37     
38     ################################
39     # C#-## dirs
40     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
41     
42     # Not found
43     if len(c_dir_list) == 0:
44         return d
45     
46     for c_dir_path in c_dir_list:
47         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
48         pathdir, c_dir = os.path.split(c_dir_path)
49         
50         # Create sub-dictionary
51         d[c_dir] = {}
52         
53         
54         ###############################
55         # Summary.htm file
56         
57         # Not found
58         if len(summary_file) == 0:
59             d[c_dir]['summary'] = None
60             
61         # Found
62         else:
63             # No duplicates!
64             assert len(summary_file) == 1
65             
66             summary_file = summary_file[0]
67             d[c_dir]['summary'] = summary_file
68             
69         ###############################
70         # Result files
71         
72         d[c_dir]['eland_results'] = {}
73         
74         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
75         
76         for filepath in result_filepaths:
77             
78             junk, result_name = os.path.split(filepath)
79             
80             #lanes 1-8, single digit, therefore s_#; # == index 2
81             lane = int(result_name[2])
82             d[c_dir]['eland_results'][lane] = filepath
83             
84         ###############################
85         # run*.xml file
86         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
87         
88         if len(run_xml_filepath) == 0:
89             d[c_dir]['run_xml'] = None
90         else:
91             # No duplicates
92             assert len(run_xml_filepath) == 1
93             
94             d[c_dir]['run_xml'] = run_xml_filepath[0]
95             
96         ###############################
97         # scores.tar.gz
98         scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
99         
100         if len(scores_filepath) == 0:
101             d[c_dir]['scores'] = None
102         else:
103             # No duplicates
104             assert len(scores_filepath) == 1
105             
106             d[c_dir]['scores'] = scores_filepath[0]
107         
108     return d
109
110     
111 def cn_mTobp(cn_m):
112     """
113     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
114     number of base pairs.
115     """
116     pass
117
118
119 def parse_flowcell_id(flowcell_id):
120     """
121     Return flowcell id and any status encoded in the id
122   
123     We stored the status information in the flowcell id name.
124     this was dumb, but database schemas are hard to update.
125     """
126     fields = flowcell_id.split()
127     fcid = None
128     status = None
129     if len(fields) > 0:
130         fcid = fields[0]
131     if len(fields) > 1:
132         status = fields[1]
133     return fcid, status
134