Move parse_flowcell_id out of frontend.samples.results
[htsworkflow.git] / htsworkflow / frontend / samples / results.py
1 from django.conf import settings
2
3 import glob
4 import os
5 import re
6
7 s_paren = re.compile("^\w+")
8
9 def get_flowcell_result_dict(flowcell_id):
10     """
11     returns a dictionary following the following pattern for
12     a given flowcell_id:
13     
14      
15     d['C1-33']['summary']           # Summary.htm file path
16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
17     d['C1-33']['run_xml']           # run_*.xml file path
18     d['C1-33']['scores']            # scores.tar.gz file path
19     """
20     flowcell_id = flowcell_id.strip()
21     
22     d = {}
23     
24     ################################
25     # Flowcell Directory
26     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
27     
28     # Not found
29     if len(fc_dir) == 0:
30         return None
31     
32     # No duplicates!
33     assert len(fc_dir) <= 1
34     
35     # Found fc dir
36     fc_dir = fc_dir[0]
37     
38     ################################
39     # C#-## dirs
40     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
41     
42     # Not found
43     if len(c_dir_list) == 0:
44         return d
45     
46     for c_dir_path in c_dir_list:
47         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
48         pathdir, c_dir = os.path.split(c_dir_path)
49         
50         # Create sub-dictionary
51         d[c_dir] = {}
52         
53         
54         ###############################
55         # Summary.htm file
56         
57         # Not found
58         if len(summary_file) == 0:
59             d[c_dir]['summary'] = None
60             
61         # Found
62         else:
63             # No duplicates!
64             assert len(summary_file) == 1
65             
66             summary_file = summary_file[0]
67             d[c_dir]['summary'] = summary_file
68             
69         ###############################
70         # Result files
71         
72         d[c_dir]['eland_results'] = {}
73         
74         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
75         
76         for filepath in result_filepaths:
77             
78             junk, result_name = os.path.split(filepath)
79             
80             #lanes 1-8, single digit, therefore s_#; # == index 2
81             lane = int(result_name[2])
82             d[c_dir]['eland_results'][lane] = filepath
83             
84         ###############################
85         # run*.xml file
86         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
87         
88         if len(run_xml_filepath) == 0:
89             d[c_dir]['run_xml'] = None
90         else:
91             # No duplicates
92             assert len(run_xml_filepath) == 1
93             
94             d[c_dir]['run_xml'] = run_xml_filepath[0]
95             
96         ###############################
97         # scores.tar.gz
98         # restrict to only compressed files, so in case there are *.md5 files
99         # we don't get confused.
100         scores_filepath = []
101         for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
102             scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
103         
104         if len(scores_filepath) == 0:
105             d[c_dir]['scores'] = None
106         else:
107             # No duplicates
108             assert len(scores_filepath) == 1
109             
110             d[c_dir]['scores'] = scores_filepath[0]
111         
112     return d
113
114     
115 def cn_mTobp(cn_m):
116     """
117     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
118     number of base pairs.
119     """
120     pass
121
122