3 from pprint import pformat
9 from htsworkflow.submission.submission import Submission
11 from htsworkflow.util.rdfhelp import \
15 from htsworkflow.util.url import parse_ssh_url
16 from htsworkflow.util.ucsc import bigWigInfo
18 from django.conf import settings
19 from django.template import Context, loader
20 from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
21 from trackhub.track import TRACKTYPES, SubGroupDefinition
22 from trackhub.helpers import show_rendered_files
23 from trackhub.upload import upload_track, upload_hub
25 LOGGER = logging.getLogger(__name__)
27 class TrackHubSubmission(Submission):
28 def __init__(self, name, model, baseurl, baseupload, host):
29 """Create a trackhub based submission
32 - `name`: Name of submission
33 - `model`: librdf model reference
34 - `baseurl`: web root where trackhub will be hosted
35 - `baseupload`: filesystem root where trackhub will be hosted
36 - `host`: hostname for library pages.
38 super(TrackHubSubmission, self).__init__(name, model, host)
40 raise ValueError("Need a web root to make a track hub")
41 self.baseurl = os.path.join(baseurl, self.name)
43 sshurl = parse_ssh_url(baseupload)
45 self.user = sshurl.user
46 self.host = sshurl.host
47 self.uploadpath = sshurl.path
49 self.uploadpath = None
51 def make_hub_template(self, result_map):
53 for an_analysis in self.analysis_nodes(result_map):
54 metadata = self.get_sample_metadata(an_analysis)
55 if len(metadata) == 0:
56 errmsg = 'No metadata found for {0}'
57 LOGGER.error(errmsg.format(str(an_analysis),))
59 elif len(metadata) > 1:
60 errmsg = 'Confused there are more than one sample for %s'
61 LOGGER.debug(errmsg % (str(an_analysis),))
62 metadata = metadata[0]
63 samples.append(metadata)
65 template = loader.get_template('trackDb.txt')
69 return str(template.render(context))
71 def make_hub(self, result_map):
73 hub_url = self.baseurl + '/'
74 hub, genomes_file, genome, trackdb = default_hub(
76 short_label=self.name,
81 hub.remote_dir = self.uploadpath
83 # build higher order track types
84 composite = CompositeTrack(
85 name=self.sanitize_name(self.name),
86 short_label = self.sanitize_name(self.name),
87 long_label = str(self.name),
89 dragAndDrop='subtracks',
92 trackdb.add_tracks(composite)
94 subgroups = self.add_subgroups(composite)
99 for track in self.get_tracks():
100 if track['file_type'] not in TRACKTYPES:
101 LOGGER.info('Unrecognized file type %s', track['file_type'])
104 view = self.add_new_view_if_needed(composite, view, track)
105 track_name = self.make_track_name(track)
107 track_subgroup = self.make_track_subgroups(subgroups, track)
108 track_type = self.make_track_type(track)
110 if 'file_label' in track:
111 track_label = self.sanitize_name(track['file_label'])
113 track_label = track_name
117 'tracktype': track_type,
118 'url': hub_url + str(track['relative_path']),
119 'short_label': str(track['library_id']),
120 'long_label': str(track_label),
121 'subgroups': track_subgroup,
124 LOGGER.debug('track attributes: %s', pformat(attributes))
125 newtrack = Track(**attributes)
126 view.add_tracks([newtrack])
128 results = hub.render()
130 LOGGER.info("Uploading to %s @ %s : %s",
131 self.user, self.host, hub.remote_dir)
132 upload_hub(hub=hub, host=self.host, user='diane')
134 def add_new_view_if_needed(self, composite, view, track):
135 """Add new trakkhub view if we've hit a new type of track.
138 - `composite`: composite track to attach to
139 - `view_type`: name of view type
140 - `track`: current track record
142 current_view_type = str(track['output_type'])
143 if not view or current_view_type != view.name:
145 'name': current_view_type,
146 'view': current_view_type,
147 'visibility': str(track.get('visibility', 'squish')),
148 'short_label': current_view_type,
149 'tracktype': str(track['file_type'])
151 maxHeightPixels = track.get('maxHeightPixels')
153 attributes['maxHeightPixels'] = str(maxHeightPixels)
154 autoScale = track.get('autoScale')
156 attributes['autoScale'] = str(autoScale)
157 view = ViewTrack(**attributes)
158 composite.add_view(view)
159 view_type = current_view_type
162 def make_manifest(self, result_map):
164 for an_analysis in self.analysis_nodes(result_map):
165 metadata = self.get_manifest_metadata(an_analysis)
166 files.extend(metadata)
168 template = loader.get_template('manifest.txt')
172 return str(template.render(context))
174 def make_track_name(self, track):
175 return '{}_{}_{}'.format(
178 track['output_type'],
181 def make_track_subgroups(self, subgroups, track):
184 if k in track and track[k]:
185 value = self.sanitize_name(track[k])
186 track_subgroups[k] = value
187 return track_subgroups
189 def make_track_type(self, track):
190 """Further annotate tracktype.
192 bigWig files can have additional information. Add it if we can
194 track_type = track['file_type']
195 if track_type.lower() == 'bigwig':
196 # something we can enhance
197 info = bigWigInfo(track['relative_path'])
198 if info.min is not None and info.max is not None:
199 track_type = '{} {} {}'.format(track_type, int(info.min), int(info.max))
201 LOGGER.debug("track_type: %s", track_type)
202 return str(track_type)
204 def add_subgroups(self, composite):
205 """Add subgroups to composite track"""
206 search = [ ('htswlib:cell_line', 'cell'),
207 ('encode3:rna_type', 'rna_type'),
208 ('encode3:protocol', 'protocol'),
209 ('htswlib:replicate', 'replicate'),
210 ('encode3:library_id', 'library_id'),
211 ('encode3:assay', 'assay'),
216 dimnames = ('dim{}'.format(x) for x in string.ascii_uppercase)
219 for term, name in search:
220 definitions = self.make_subgroupdefinition(term, name)
222 subgroups.append(definitions)
224 sortorder.append("{}=+".format(name))
226 dimensions.append("{}={}".format(d, name))
227 filtercomposite.append("{}=multi".format(d))
229 composite.add_subgroups(subgroups)
230 composite.add_params(sortOrder=' '.join(sortorder))
231 composite.add_params(dimensions=' '.join(dimensions))
232 composite.add_params(filterComposite=' '.join(filtercomposite))
236 def make_subgroupdefinition(self, term, name):
237 """Subgroup attributes need to be an attribute of the library.
239 template = loader.get_template('trackhub_term_values.sparql')
240 context = Context({'term': term})
241 results = self.execute_query(template, context)
244 value = str(row['name'])
245 values[self.sanitize_name(value)] = value
248 return SubGroupDefinition(
256 def get_tracks(self):
257 """Collect information needed to describe trackhub tracks.
259 query_template = loader.get_template('trackhub_samples.sparql')
261 context = Context({ })
263 results = self.execute_query(query_template, context)
266 def sanitize_name(self, name):
267 replacements = [('poly-?a\+', 'PolyAplus'),
268 ('poly-?a-', 'PolyAminus'),
269 ('RNA-Seq', 'RNASeq'),
270 ('rna-seq', 'rnaseq'),
285 for regex, substitution in replacements:
286 name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
290 def get_manifest_metadata(self, analysis_node):
291 query_template = loader.get_template('trackhub_manifest.sparql')
294 'submission': str(analysis_node.uri),
295 'submissionSet': str(self.submissionSetNS[''].uri),
297 results = self.execute_query(query_template, context)
298 LOGGER.info("scanned %s for results found %s",
299 str(analysis_node), len(results))