8 from htsworkflow.submission.submission import Submission
10 from htsworkflow.util.rdfhelp import \
15 from htsworkflow.util.url import parse_ssh_url
17 from django.conf import settings
18 from django.template import Context, loader
19 from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
20 from trackhub.track import TRACKTYPES, SubGroupDefinition
21 from trackhub.helpers import show_rendered_files
22 from trackhub.upload import upload_track, upload_hub
24 LOGGER = logging.getLogger(__name__)
26 class TrackHubSubmission(Submission):
27 def __init__(self, name, model, baseurl, baseupload, host):
28 """Create a trackhub based submission
31 - `name`: Name of submission
32 - `model`: librdf model reference
33 - `baseurl`: web root where trackhub will be hosted
34 - `baseupload`: filesystem root where trackhub will be hosted
35 - `host`: hostname for library pages.
37 super(TrackHubSubmission, self).__init__(name, model, host)
39 raise ValueError("Need a web root to make a track hub")
40 self.baseurl = os.path.join(baseurl, self.name)
42 sshurl = parse_ssh_url(baseupload)
44 self.user = sshurl.user
45 self.host = sshurl.host
46 self.uploadpath = sshurl.path
48 self.uploadpath = None
50 def make_hub_template(self, result_map):
52 for an_analysis in self.analysis_nodes(result_map):
53 metadata = self.get_sample_metadata(an_analysis)
54 if len(metadata) == 0:
55 errmsg = 'No metadata found for {0}'
56 LOGGER.error(errmsg.format(str(an_analysis),))
58 elif len(metadata) > 1:
59 errmsg = 'Confused there are more than one sample for %s'
60 LOGGER.debug(errmsg % (str(an_analysis),))
61 metadata = metadata[0]
62 samples.append(metadata)
64 template = loader.get_template('trackDb.txt')
68 return str(template.render(context))
70 def make_hub(self, result_map):
72 hub_url = self.baseurl + '/'
73 hub, genomes_file, genome, trackdb = default_hub(
75 short_label=self.name,
80 hub.remote_dir = self.uploadpath
82 # build higher order track types
83 composite = CompositeTrack(
84 name=self.sanitize_name(self.name),
85 short_label = self.sanitize_name(self.name),
86 long_label = str(self.name),
88 dragAndDrop='subtracks',
91 trackdb.add_tracks(composite)
93 subgroups = self.add_subgroups(composite)
98 for track in self.get_tracks():
99 if track['file_type'] not in TRACKTYPES:
100 LOGGER.info('Unrecognized file type %s', track['file_type'])
103 view = self.add_new_view_if_needed(composite, view, track)
104 track_name = self.make_track_name(track)
106 track_subgroup = self.make_track_subgroups(subgroups, track)
108 if 'file_label' in track:
109 track_label = self.sanitize_name(track['file_label'])
111 track_label = track_name
115 tracktype = str(track['file_type']),
116 url= hub_url + str(track['relative_path']),
117 short_label=str(track['library_id']),
118 long_label=str(track_label),
119 subgroups=track_subgroup,
121 view.add_tracks([newtrack])
123 results = hub.render()
125 LOGGER.info("Uploading to %s @ %s : %s",
126 self.user, self.host, hub.remote_dir)
127 upload_hub(hub=hub, host=self.host, user='diane')
129 def add_new_view_if_needed(self, composite, view, track):
130 """Add new trakkhub view if we've hit a new type of track.
133 - `composite`: composite track to attach to
134 - `view_type`: name of view type
135 - `track`: current track record
137 current_view_type = str(track['output_type'])
138 if not view or current_view_type != view.name:
140 name=current_view_type,
141 view=current_view_type,
143 short_label=current_view_type,
144 tracktype=str(track['file_type']),
146 composite.add_view(view)
147 view_type = current_view_type
150 def make_manifest(self, result_map):
152 for an_analysis in self.analysis_nodes(result_map):
153 metadata = self.get_manifest_metadata(an_analysis)
154 files.extend(metadata)
156 template = loader.get_template('manifest.txt')
160 return str(template.render(context))
162 def make_track_name(self, track):
163 return '{}_{}_{}'.format(
166 track['output_type'],
169 def make_track_subgroups(self, subgroups, track):
172 if k in track and track[k]:
173 value = self.sanitize_name(track[k])
174 track_subgroups[k] = value
175 return track_subgroups
177 def add_subgroups(self, composite):
178 """Add subgroups to composite track"""
179 search = [ ('htswlib:cell_line', 'cell'),
180 ('encode3:rna_type', 'rna_type'),
181 ('encode3:protocol', 'protocol'),
182 ('htswlib:replicate', 'replicate'),
183 ('encode3:library_id', 'library_id'),
184 ('encode3:assay', 'assay'),
189 dimnames = ('dim{}'.format(x) for x in string.ascii_uppercase)
192 for term, name in search:
193 definitions = self.make_subgroupdefinition(term, name)
195 subgroups.append(definitions)
197 sortorder.append("{}=+".format(name))
199 dimensions.append("{}={}".format(d, name))
200 filtercomposite.append("{}=multi".format(d))
202 composite.add_subgroups(subgroups)
203 composite.add_params(sortOrder=' '.join(sortorder))
204 composite.add_params(dimensions=' '.join(dimensions))
205 composite.add_params(filterComposite=' '.join(filtercomposite))
209 def make_subgroupdefinition(self, term, name):
210 """Subgroup attributes need to be an attribute of the library.
212 template = loader.get_template('trackhub_term_values.sparql')
213 context = Context({'term': term})
214 results = self.execute_query(template, context)
217 value = str(row['name'])
218 values[self.sanitize_name(value)] = value
221 return SubGroupDefinition(
229 def get_tracks(self):
230 """Collect information needed to describe trackhub tracks.
232 query_template = loader.get_template('trackhub_samples.sparql')
234 context = Context({ })
236 results = self.execute_query(query_template, context)
239 def sanitize_name(self, name):
240 replacements = [('poly-?a\+', 'PolyAplus'),
241 ('poly-?a-', 'PolyAminus'),
242 ('RNA-Seq', 'RNASeq'),
243 ('rna-seq', 'rnaseq'),
258 for regex, substitution in replacements:
259 name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
263 def get_manifest_metadata(self, analysis_node):
264 query_template = loader.get_template('trackhub_manifest.sparql')
267 'submission': str(analysis_node.uri),
268 'submissionSet': str(self.submissionSetNS[''].uri),
270 results = self.execute_query(query_template, context)
271 LOGGER.info("scanned %s for results found %s",
272 str(analysis_node), len(results))