8 from htsworkflow.submission.submission import Submission
10 from htsworkflow.util.rdfhelp import \
15 from htsworkflow.util.url import parse_ssh_url
17 from django.conf import settings
18 from django.template import Context, loader
19 from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
20 from trackhub.track import TRACKTYPES, SubGroupDefinition
21 from trackhub.helpers import show_rendered_files
22 from trackhub.upload import upload_track, upload_hub
24 LOGGER = logging.getLogger(__name__)
26 class TrackHubSubmission(Submission):
27 def __init__(self, name, model, baseurl, baseupload, host):
28 """Create a trackhub based submission
31 - `name`: Name of submission
32 - `model`: librdf model reference
33 - `baseurl`: web root where trackhub will be hosted
34 - `baseupload`: filesystem root where trackhub will be hosted
35 - `host`: hostname for library pages.
37 super(TrackHubSubmission, self).__init__(name, model, host)
39 raise ValueError("Need a web root to make a track hub")
40 self.baseurl = os.path.join(baseurl, self.name)
42 sshurl = parse_ssh_url(baseupload)
44 self.user = sshurl.user
45 self.host = sshurl.host
46 self.uploadpath = sshurl.path
48 self.uploadpath = None
50 def make_hub_template(self, result_map):
52 for an_analysis in self.analysis_nodes(result_map):
53 metadata = self.get_sample_metadata(an_analysis)
54 if len(metadata) == 0:
55 errmsg = 'No metadata found for {0}'
56 LOGGER.error(errmsg.format(str(an_analysis),))
58 elif len(metadata) > 1:
59 errmsg = 'Confused there are more than one sample for %s'
60 LOGGER.debug(errmsg % (str(an_analysis),))
61 metadata = metadata[0]
62 samples.append(metadata)
64 template = loader.get_template('trackDb.txt')
68 return str(template.render(context))
70 def make_hub(self, result_map):
72 hub_url = self.baseurl + '/'
73 hub, genomes_file, genome, trackdb = default_hub(
75 short_label=self.name,
80 hub.remote_dir = self.uploadpath
82 # build higher order track types
83 composite = CompositeTrack(
84 name=self.sanitize_name(self.name),
85 short_label = self.sanitize_name(self.name),
86 long_label = str(self.name),
88 dragAndDrop='subtracks',
91 trackdb.add_tracks(composite)
93 subgroups = self.add_subgroups(composite)
98 for track in self.get_tracks():
99 if track['file_type'] not in TRACKTYPES:
100 LOGGER.info('Unrecognized file type %s', track['file_type'])
103 view = self.add_new_view_if_needed(composite, view, track)
104 track_name = self.make_track_name(track)
106 track_subgroup = self.make_track_subgroups(subgroups, track)
110 tracktype = str(track['file_type']),
111 url= hub_url + str(track['relative_path']),
112 short_label=str(track['library_id']),
113 long_label=track_name,
114 subgroups=track_subgroup,
116 view.add_tracks([newtrack])
118 results = hub.render()
120 LOGGER.info("Uploading to %s @ %s : %s",
121 self.user, self.host, hub.remote_dir)
122 upload_hub(hub=hub, host=self.host, user='diane')
124 def add_new_view_if_needed(self, composite, view, track):
125 """Add new trakkhub view if we've hit a new type of track.
128 - `composite`: composite track to attach to
129 - `view_type`: name of view type
130 - `track`: current track record
132 current_view_type = str(track['output_type'])
133 if not view or current_view_type != view.name:
135 name=current_view_type,
136 view=current_view_type,
138 short_label=current_view_type,
139 tracktype=str(track['file_type']),
141 composite.add_view(view)
142 view_type = current_view_type
145 def make_manifest(self, result_map):
147 for an_analysis in self.analysis_nodes(result_map):
148 metadata = self.get_manifest_metadata(an_analysis)
149 files.extend(metadata)
151 template = loader.get_template('manifest.txt')
155 return str(template.render(context))
157 def make_track_name(self, track):
158 name = '{}_{}_{}'.format(
161 track['output_type'],
165 def make_track_subgroups(self, subgroups, track):
168 if k in track and track[k]:
169 value = self.sanitize_name(track[k])
170 track_subgroups[k] = value
171 return track_subgroups
173 def add_subgroups(self, composite):
174 """Add subgroups to composite track"""
175 search = [ ('htswlib:cell_line', 'cell'),
176 ('encode3:rna_type', 'rna_type'),
177 ('encode3:protocol', 'protocol'),
178 ('htswlib:replicate', 'replicate'),
179 ('encode3:library_id', 'library_id'),
180 ('encode3:assay', 'assay'),
185 dimnames = ('dim{}'.format(x) for x in string.ascii_uppercase)
188 for term, name in search:
189 definitions = self.make_subgroupdefinition(term, name)
191 subgroups.append(definitions)
193 sortorder.append("{}=+".format(name))
195 dimensions.append("{}={}".format(d, name))
196 filtercomposite.append("{}=multi".format(d))
198 composite.add_subgroups(subgroups)
199 composite.add_params(sortOrder=' '.join(sortorder))
200 composite.add_params(dimensions=' '.join(dimensions))
201 composite.add_params(filterComposite=' '.join(filtercomposite))
205 def make_subgroupdefinition(self, term, name):
206 """Subgroup attributes need to be an attribute of the library.
208 template = loader.get_template('trackhub_term_values.sparql')
209 context = Context({'term': term})
210 results = self.execute_query(template, context)
213 value = str(row['name'])
214 values[self.sanitize_name(value)] = value
217 return SubGroupDefinition(
225 def get_tracks(self):
226 """Collect information needed to describe trackhub tracks.
228 query_template = loader.get_template('trackhub_samples.sparql')
230 context = Context({ })
232 results = self.execute_query(query_template, context)
235 def sanitize_name(self, name):
236 replacements = [('poly-?a\+', 'PolyAplus'),
237 ('poly-?a-', 'PolyAminus'),
238 ('RNA-Seq', 'RNASeq'),
239 ('rna-seq', 'rnaseq'),
254 for regex, substitution in replacements:
255 name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
259 def get_manifest_metadata(self, analysis_node):
261 query_template = loader.get_template('trackhub_manifest.sparql')
264 'submission': str(analysis_node.uri),
265 'submissionSet': str(self.submissionSetNS[''].uri),
267 results = self.execute_query(query_template, context)
268 LOGGER.info("scanned %s for results found %s",
269 str(analysis_node), len(results))