7 from htsworkflow.submission.submission import Submission
9 from htsworkflow.util.rdfhelp import \
14 from htsworkflow.util.url import parse_ssh_url
16 from django.conf import settings
17 from django.template import Context, loader
18 from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
19 from trackhub.track import TRACKTYPES, SubGroupDefinition
20 from trackhub.helpers import show_rendered_files
21 from trackhub.upload import upload_track, upload_hub
23 LOGGER = logging.getLogger(__name__)
25 class TrackHubSubmission(Submission):
26 def __init__(self, name, model, baseurl, baseupload, host):
27 """Create a trackhub based submission
30 - `name`: Name of submission
31 - `model`: librdf model reference
32 - `baseurl`: web root where trackhub will be hosted
33 - `baseupload`: filesystem root where trackhub will be hosted
34 - `host`: hostname for library pages.
36 super(TrackHubSubmission, self).__init__(name, model, host)
38 raise ValueError("Need a web root to make a track hub")
39 self.baseurl = os.path.join(baseurl, self.name)
41 sshurl = parse_ssh_url(baseupload)
43 self.user = sshurl.user
44 self.host = sshurl.host
45 self.uploadpath = sshurl.path
47 self.uploadpath = None
49 def make_hub_template(self, result_map):
51 for an_analysis in self.analysis_nodes(result_map):
52 metadata = self.get_sample_metadata(an_analysis)
53 if len(metadata) == 0:
54 errmsg = 'No metadata found for {0}'
55 LOGGER.error(errmsg.format(str(an_analysis),))
57 elif len(metadata) > 1:
58 errmsg = 'Confused there are more than one sample for %s'
59 LOGGER.debug(errmsg % (str(an_analysis),))
60 metadata = metadata[0]
61 samples.append(metadata)
63 template = loader.get_template('trackDb.txt')
67 return str(template.render(context))
69 def make_hub(self, result_map):
71 hub_url = self.baseurl + '/'
72 hub, genomes_file, genome, trackdb = default_hub(
74 short_label=self.name,
79 hub.remote_dir = self.uploadpath
81 # build higher order track types
82 composite = CompositeTrack(
83 name=self.sanitize_name(self.name),
84 short_label = self.sanitize_name(self.name),
85 long_label = str(self.name),
87 dragAndDrop='subtracks',
90 trackdb.add_tracks(composite)
92 subgroups = self.add_subgroups(composite)
97 for track in self.get_tracks():
98 if track['file_type'] not in TRACKTYPES:
99 LOGGER.info('Unrecognized file type %s', track['file_type'])
102 view = self.add_new_view_if_needed(composite, view, track)
103 track_name = self.make_track_name(track)
105 track_subgroup = self.make_track_subgroups(subgroups, track)
109 tracktype = str(track['file_type']),
110 url= hub_url + str(track['relative_path']),
111 short_label=str(track['library_id']),
112 long_label=track_name,
113 subgroups=track_subgroup,
115 view.add_tracks([newtrack])
117 results = hub.render()
119 LOGGER.info("Uploading to %s @ %s : %s",
120 self.user, self.host, hub.remote_dir)
121 upload_hub(hub=hub, host=self.host, user='diane')
123 def add_new_view_if_needed(self, composite, view, track):
124 """Add new trakkhub view if we've hit a new type of track.
127 - `composite`: composite track to attach to
128 - `view_type`: name of view type
129 - `track`: current track record
131 current_view_type = str(track['output_type'])
132 if not view or current_view_type != view.name:
134 name=current_view_type,
135 view=current_view_type,
137 short_label=current_view_type,
138 tracktype=str(track['file_type']),
140 composite.add_view(view)
141 view_type = current_view_type
144 def make_manifest(self, result_map):
146 for an_analysis in self.analysis_nodes(result_map):
147 metadata = self.get_manifest_metadata(an_analysis)
148 files.extend(metadata)
150 template = loader.get_template('manifest.txt')
154 return str(template.render(context))
156 def make_track_name(self, track):
157 name = '{}_{}_{}'.format(
160 track['output_type'],
164 def make_track_subgroups(self, subgroups, track):
167 if k in track and track[k]:
168 value = self.sanitize_name(track[k])
169 track_subgroups[k] = value
170 return track_subgroups
172 def add_subgroups(self, composite):
173 """Add subgroups to composite track"""
174 search = [ ('htswlib:cell_line', 'cell'),
175 ('htswlib:replicate', 'replicate'),
176 ('encode3:library_id', 'library_id'),
177 ('encode3:assay', 'assay'),
178 ('encode3:rna_type', 'rna_type'),
179 ('encode3:protocol', 'protocol'),
183 for term, name in search:
184 subgroups.append(self.make_subgroupdefinition(term, name))
186 composite.add_subgroups(subgroups)
190 def make_subgroupdefinition(self, term, name):
191 """Subgroup attributes need to be an attribute of the library.
193 template = loader.get_template('trackhub_term_values.sparql')
194 context = Context({'term': term})
195 results = self.execute_query(template, context)
198 value = str(row['name'])
199 values[self.sanitize_name(value)] = value
201 return SubGroupDefinition(
207 def get_tracks(self):
208 """Collect information needed to describe trackhub tracks.
210 query_template = loader.get_template('trackhub_samples.sparql')
212 context = Context({ })
214 results = self.execute_query(query_template, context)
217 def sanitize_name(self, name):
218 replacements = [('poly-?a\+', 'PolyAplus'),
219 ('poly-?a-', 'PolyAminus'),
220 ('RNA-Seq', 'RNASeq'),
221 ('rna-seq', 'rnaseq'),
236 for regex, substitution in replacements:
237 name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
241 def get_manifest_metadata(self, analysis_node):
243 query_template = loader.get_template('trackhub_manifest.sparql')
246 'submission': str(analysis_node.uri),
247 'submissionSet': str(self.submissionSetNS[''].uri),
249 results = self.execute_query(query_template, context)
250 LOGGER.info("scanned %s for results found %s",
251 str(analysis_node), len(results))