1 from htsworkflow.frontend import settings
7 s_paren = re.compile("^\w+")
9 def get_flowcell_result_dict(flowcell_id):
11 returns a dictionary following the following pattern for
15 d['C1-33']['summary'] # Summary.htm file path
16 d['C1-33']['eland_results'][5] # C1-33 lane 5 file eland results file path
17 d['C1-33']['run_xml'] # run_*.xml file path
18 d['C1-33']['scores'] # scores.tar.gz file path
20 flowcell_id = flowcell_id.strip()
24 ################################
26 fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
33 assert len(fc_dir) <= 1
38 ################################
40 c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
43 if len(c_dir_list) == 0:
46 for c_dir_path in c_dir_list:
47 summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
48 pathdir, c_dir = os.path.split(c_dir_path)
50 # Create sub-dictionary
54 ###############################
58 if len(summary_file) == 0:
59 d[c_dir]['summary'] = None
64 assert len(summary_file) == 1
66 summary_file = summary_file[0]
67 d[c_dir]['summary'] = summary_file
69 ###############################
72 d[c_dir]['eland_results'] = {}
74 result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
76 for filepath in result_filepaths:
78 junk, result_name = os.path.split(filepath)
80 #lanes 1-8, single digit, therefore s_#; # == index 2
81 lane = int(result_name[2])
82 d[c_dir]['eland_results'][lane] = filepath
84 ###############################
86 run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
88 if len(run_xml_filepath) == 0:
89 d[c_dir]['run_xml'] = None
92 assert len(run_xml_filepath) == 1
94 d[c_dir]['run_xml'] = run_xml_filepath[0]
96 ###############################
98 scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
100 if len(scores_filepath) == 0:
101 d[c_dir]['scores'] = None
104 assert len(scores_filepath) == 1
106 d[c_dir]['scores'] = scores_filepath[0]
113 Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
114 number of base pairs.
119 def parse_flowcell_id(flowcell_id):
121 Return flowcell id and any status encoded in the id
123 We stored the status information in the flowcell id name.
124 this was dumb, but database schemas are hard to update.
126 fields = flowcell_id.split()