Move code out of ucsc_gather and into the htsworkflow tree.
[htsworkflow.git] / htsworkflow / submission / results.py
1 """Help collect and process results for submission
2 """
3 import os
4 import logging
5
6 from collections import namedtuple
7
8 LOGGER = logging.getLogger(__name__)
9
10 class ResultMap(object):
11     """Store list of results
12     """
13     def __init__(self):
14         self.results_order = []
15         self.results = {}
16
17     def keys(self):
18         return self.results_order
19
20     def values(self):
21         return ( self.results[r] for r in self.results_order )
22
23     def items(self):
24         return ( (r, self.results[r]) for r in self.results_order )
25
26     def __getitem__(self, key):
27         return self.results[key]
28
29     def add_results_from_file(self, filename):
30         pathname = os.path.abspath(filename)
31         basepath, name = os.path.split(pathname)
32         results = read_result_list(filename)
33         for lib_id, lib_path in results:
34             if not os.path.isabs(lib_path):
35                 lib_path = os.path.join(basepath, lib_path)
36             self.add_result(lib_id, lib_path)
37
38     def add_result(self, lib_id, lib_path):
39         self.results_order.append(lib_id)
40         self.results[lib_id] = lib_path
41
42     def make_tree_from(self, source_path, destpath = None):
43         """Create a tree using data files from source path.
44         """
45         print source_path, destpath
46         if destpath is None:
47             destpath = os.getcwd()
48
49         for lib_id in self.results_order:
50             lib_path = self.results[lib_id]
51             lib_destination = os.path.join(destpath, lib_path)
52             if not os.path.exists(lib_destination):
53                 LOGGER.info("Making dir {0}".format(lib_destination))
54                 os.mkdir(lib_destination)
55
56             source_rel_dir = os.path.join(source_path, lib_path)
57             source_lib_dir = os.path.abspath(source_rel_dir)
58
59             print "source_lib_dir", source_lib_dir
60             for filename in os.listdir(source_lib_dir):
61                 source_pathname = os.path.join(source_lib_dir, filename)
62                 target_pathname = os.path.join(lib_destination, filename)
63                 if not os.path.exists(source_pathname):
64                     raise IOError(
65                         "{0} does not exist".format(source_pathname))
66                 print target_pathname
67                 if not os.path.exists(target_pathname):
68                     os.symlink(source_pathname, target_pathname)
69                     LOGGER.info(
70                         'LINK {0} to {1}'.format(source_pathname,
71                                                  target_pathname))
72
73 def read_result_list(filename):
74     """
75     Read a file that maps library id to result directory.
76     Does not support spaces in filenames.
77
78     For example:
79       10000 result/foo/bar
80     """
81     stream = open(filename, 'r')
82     results = parse_result_list(stream)
83     stream.close()
84     return results
85
86
87 def parse_result_list(stream):
88     results = []
89     for line in stream:
90         line = line.rstrip()
91         if not line.startswith('#') and len(line) > 0:
92             library_id, result_dir = line.split()
93             results.append((library_id, result_dir))
94     return results