3 from pprint import pformat
9 from htsworkflow.submission.submission import Submission
11 from htsworkflow.util.rdfhelp import \
16 from htsworkflow.util.url import parse_ssh_url
17 from htsworkflow.util.ucsc import bigWigInfo
19 from django.conf import settings
20 from django.template import Context, loader
21 from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
22 from trackhub.track import TRACKTYPES, SubGroupDefinition
23 from trackhub.helpers import show_rendered_files
24 from trackhub.upload import upload_track, upload_hub
26 LOGGER = logging.getLogger(__name__)
28 class TrackHubSubmission(Submission):
29 def __init__(self, name, model, baseurl, baseupload, host):
30 """Create a trackhub based submission
33 - `name`: Name of submission
34 - `model`: librdf model reference
35 - `baseurl`: web root where trackhub will be hosted
36 - `baseupload`: filesystem root where trackhub will be hosted
37 - `host`: hostname for library pages.
39 super(TrackHubSubmission, self).__init__(name, model, host)
41 raise ValueError("Need a web root to make a track hub")
42 self.baseurl = os.path.join(baseurl, self.name)
44 sshurl = parse_ssh_url(baseupload)
46 self.user = sshurl.user
47 self.host = sshurl.host
48 self.uploadpath = sshurl.path
50 self.uploadpath = None
52 def make_hub_template(self, result_map):
54 for an_analysis in self.analysis_nodes(result_map):
55 metadata = self.get_sample_metadata(an_analysis)
56 if len(metadata) == 0:
57 errmsg = 'No metadata found for {0}'
58 LOGGER.error(errmsg.format(str(an_analysis),))
60 elif len(metadata) > 1:
61 errmsg = 'Confused there are more than one sample for %s'
62 LOGGER.debug(errmsg % (str(an_analysis),))
63 metadata = metadata[0]
64 samples.append(metadata)
66 template = loader.get_template('trackDb.txt')
70 return str(template.render(context))
72 def make_hub(self, result_map):
74 hub_url = self.baseurl + '/'
75 hub, genomes_file, genome, trackdb = default_hub(
77 short_label=self.name,
82 hub.remote_dir = self.uploadpath
84 # build higher order track types
85 composite = CompositeTrack(
86 name=self.sanitize_name(self.name),
87 short_label = self.sanitize_name(self.name),
88 long_label = str(self.name),
90 dragAndDrop='subtracks',
93 trackdb.add_tracks(composite)
95 subgroups = self.add_subgroups(composite)
100 for track in self.get_tracks():
101 if track['file_type'] not in TRACKTYPES:
102 LOGGER.info('Unrecognized file type %s', track['file_type'])
105 view = self.add_new_view_if_needed(composite, view, track)
106 track_name = self.make_track_name(track)
108 track_subgroup = self.make_track_subgroups(subgroups, track)
109 track_type = self.make_track_type(track)
111 if 'file_label' in track:
112 track_label = self.sanitize_name(track['file_label'])
114 track_label = track_name
118 'tracktype': track_type,
119 'url': hub_url + str(track['relative_path']),
120 'short_label': str(track['library_id']),
121 'long_label': str(track_label),
122 'subgroups': track_subgroup,
125 LOGGER.debug('track attributes: %s', pformat(attributes))
126 newtrack = Track(**attributes)
127 view.add_tracks([newtrack])
129 results = hub.render()
131 LOGGER.info("Uploading to %s @ %s : %s",
132 self.user, self.host, hub.remote_dir)
133 upload_hub(hub=hub, host=self.host, user='diane')
135 def add_new_view_if_needed(self, composite, view, track):
136 """Add new trakkhub view if we've hit a new type of track.
139 - `composite`: composite track to attach to
140 - `view_type`: name of view type
141 - `track`: current track record
143 current_view_type = str(track['output_type'])
144 if not view or current_view_type != view.name:
146 'name': current_view_type,
147 'view': current_view_type,
148 'visibility': str(track.get('visibility', 'squish')),
149 'short_label': current_view_type,
150 'tracktype': str(track['file_type'])
152 maxHeightPixels = track.get('maxHeightPixels')
154 attributes['maxHeightPixels'] = str(maxHeightPixels)
155 autoScale = track.get('autoScale')
157 attributes['autoScale'] = str(autoScale)
158 view = ViewTrack(**attributes)
159 composite.add_view(view)
160 view_type = current_view_type
163 def make_manifest(self, result_map):
165 for an_analysis in self.analysis_nodes(result_map):
166 metadata = self.get_manifest_metadata(an_analysis)
167 files.extend(metadata)
169 template = loader.get_template('manifest.txt')
173 return str(template.render(context))
175 def make_track_name(self, track):
176 return '{}_{}_{}'.format(
179 track['output_type'],
182 def make_track_subgroups(self, subgroups, track):
185 if k in track and track[k]:
186 value = self.sanitize_name(track[k])
187 track_subgroups[k] = value
188 return track_subgroups
190 def make_track_type(self, track):
191 """Further annotate tracktype.
193 bigWig files can have additional information. Add it if we can
195 track_type = track['file_type']
196 if track_type.lower() == 'bigwig':
197 # something we can enhance
198 info = bigWigInfo(track['relative_path'])
199 if info.min is not None and info.max is not None:
200 track_type = '{} {} {}'.format(track_type, int(info.min), int(info.max))
202 LOGGER.debug("track_type: %s", track_type)
203 return str(track_type)
205 def add_subgroups(self, composite):
206 """Add subgroups to composite track"""
207 search = [ ('htswlib:cell_line', 'cell'),
208 ('encode3:rna_type', 'rna_type'),
209 ('encode3:protocol', 'protocol'),
210 ('htswlib:replicate', 'replicate'),
211 ('encode3:library_id', 'library_id'),
212 ('encode3:assay', 'assay'),
217 dimnames = ('dim{}'.format(x) for x in string.ascii_uppercase)
220 for term, name in search:
221 definitions = self.make_subgroupdefinition(term, name)
223 subgroups.append(definitions)
225 sortorder.append("{}=+".format(name))
227 dimensions.append("{}={}".format(d, name))
228 filtercomposite.append("{}=multi".format(d))
230 composite.add_subgroups(subgroups)
231 composite.add_params(sortOrder=' '.join(sortorder))
232 composite.add_params(dimensions=' '.join(dimensions))
233 composite.add_params(filterComposite=' '.join(filtercomposite))
237 def make_subgroupdefinition(self, term, name):
238 """Subgroup attributes need to be an attribute of the library.
240 template = loader.get_template('trackhub_term_values.sparql')
241 context = Context({'term': term})
242 results = self.execute_query(template, context)
245 value = str(row['name'])
246 values[self.sanitize_name(value)] = value
249 return SubGroupDefinition(
257 def get_tracks(self):
258 """Collect information needed to describe trackhub tracks.
260 query_template = loader.get_template('trackhub_samples.sparql')
262 context = Context({ })
264 results = self.execute_query(query_template, context)
267 def sanitize_name(self, name):
268 replacements = [('poly-?a\+', 'PolyAplus'),
269 ('poly-?a-', 'PolyAminus'),
270 ('RNA-Seq', 'RNASeq'),
271 ('rna-seq', 'rnaseq'),
286 for regex, substitution in replacements:
287 name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
291 def get_manifest_metadata(self, analysis_node):
292 query_template = loader.get_template('trackhub_manifest.sparql')
295 'submission': str(analysis_node.uri),
296 'submissionSet': str(self.submissionSetNS[''].uri),
298 results = self.execute_query(query_template, context)
299 LOGGER.info("scanned %s for results found %s",
300 str(analysis_node), len(results))