1 from htsworkflow.frontend import settings
7 s_paren = re.compile("^\w+")
9 def get_flowcell_result_dict(flowcell_id):
11 returns a dictionary following the following pattern for
15 d['C1-33']['summary'] # Summary.htm file path
16 d['C1-33']['eland_results'][5] # C1-33 lane 5 file eland results file path
17 d['C1-33']['run_xml'] # run_*.xml file path
18 d['C1-33']['scores'] # scores.tar.gz file path
20 flowcell_id = flowcell_id.strip()
24 ################################
26 fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
33 assert len(fc_dir) <= 1
38 ################################
40 c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
43 if len(c_dir_list) == 0:
46 for c_dir_path in c_dir_list:
47 summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
48 pathdir, c_dir = os.path.split(c_dir_path)
50 # Create sub-dictionary
54 ###############################
58 if len(summary_file) == 0:
59 d[c_dir]['summary'] = None
64 assert len(summary_file) == 1
66 summary_file = summary_file[0]
67 d[c_dir]['summary'] = summary_file
69 ###############################
72 d[c_dir]['eland_results'] = {}
74 result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
76 for filepath in result_filepaths:
78 junk, result_name = os.path.split(filepath)
80 #lanes 1-8, single digit, therefore s_#; # == index 2
81 lane = int(result_name[2])
82 d[c_dir]['eland_results'][lane] = filepath
84 ###############################
86 run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
88 if len(run_xml_filepath) == 0:
89 d[c_dir]['run_xml'] = None
92 assert len(run_xml_filepath) == 1
94 d[c_dir]['run_xml'] = run_xml_filepath[0]
96 ###############################
98 # restrict to only compressed files, so in case there are *.md5 files
99 # we don't get confused.
101 for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
102 scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
104 if len(scores_filepath) == 0:
105 d[c_dir]['scores'] = None
108 assert len(scores_filepath) == 1
110 d[c_dir]['scores'] = scores_filepath[0]
117 Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
118 number of base pairs.
123 def parse_flowcell_id(flowcell_id):
125 Return flowcell id and any status encoded in the id
127 We stored the status information in the flowcell id name.
128 this was dumb, but database schemas are hard to update.
130 fields = flowcell_id.split()