sparql_query, \
submissionOntology
from htsworkflow.submission.daf import get_submission_uri
+from htsworkflow.submission.submission import list_submissions
from htsworkflow.submission.results import ResultMap
-from htsworkflow.submission.trackhub import TrackHubSubmission
+from htsworkflow.submission.trackhub_submission import TrackHubSubmission
from htsworkflow.submission.condorfastq import CondorFastqExtract
logger = logging.getLogger(__name__)
+INDENTED = " " + os.linesep
+
def main(cmdline=None):
parser = make_parser()
opts, args = parser.parse_args(cmdline)
apidata = api.make_auth_from_opts(opts, parser)
model = get_model(opts.model, opts.db_path)
+
+ submission_names = list(list_submissions(model))
+ name = opts.name
+ if len(submission_names) == 0 and opts.name is None:
+ parser.error("Please name this submission")
+ elif opts.name and submission_names and opts.name not in submission_names:
+ parser.error("{} is not in this model. Choose from: {}{}".format(
+ opts.name,
+ os.linesep,
+ INDENTED.join(submission_names)))
+ elif opts.name is None and len(submission_names) > 1:
+ parser.error("Please choose submission name from: {}{}".format(
+ os.linesep,
+ INDENTED.join(submission_names)))
+ elif len(submission_names) == 1:
+ name = submission_names[0]
+
mapper = None
- if opts.name:
- mapper = TrackHubSubmission(opts.name, model, host=opts.host)
- submission_uri = get_submission_uri(opts.name)
+ if opts.make_track_hub:
+ mapper = TrackHubSubmission(name,
+ model,
+ baseurl=opts.make_track_hub,
+ baseupload=opts.track_hub_upload,
+ host=opts.host)
+ submission_uri = get_submission_uri(name)
if opts.load_rdf is not None:
extractor.create_scripts(results)
if opts.scan_submission:
- if opts.name is None:
+ if name is None:
parser.error("Please define a submission name")
mapper.scan_submission_dirs(results)
- if opts.make_hub:
- make_hub(mapper, results, opts.make_hub)
+ if opts.make_track_hub:
+ trackdb = mapper.make_hub(results)
if opts.make_manifest:
make_manifest(mapper, results, opts.make_manifest)
print writer.serialize_model_to_string(model)
-def make_hub(mapper, results, filename=None):
- trackdb = mapper.make_hub(results)
-
- if filename is None or filename == '-':
- sys.stdout.write(trackdb)
- else:
- with open('trackDb.txt', 'w') as trackstream:
- trackstream.write(trackdb)
-
def make_manifest(mapper, results, filename=None):
manifest = mapper.make_manifest(results)
help="generate scripts for making fastq files")
commands.add_option('--scan-submission', default=False, action="store_true",
help="Import metadata for submission into our model")
- commands.add_option('--make-hub', default=None,
- help='name the hub file or - for stdout to create it')
+ commands.add_option('--make-track-hub', default=None,
+ help='web root that will host the trackhub.')
+ commands.add_option('--track-hub-upload', default=None,
+ help='where to upload track hub <host>:<path>')
commands.add_option('--make-manifest',
help='name the manifest file name or - for stdout to create it',
default=None)
import logging
import os
+import re
import RDF
geoSoftNS, \
stripNamespace, \
submissionOntology
+from htsworkflow.util.url import parse_ssh_url
from django.conf import settings
from django.template import Context, loader
+from trackhub import default_hub, CompositeTrack, Track, SuperTrack, ViewTrack
+from trackhub.track import TRACKTYPES, SubGroupDefinition
+from trackhub.helpers import show_rendered_files
+from trackhub.upload import upload_track, upload_hub
LOGGER = logging.getLogger(__name__)
class TrackHubSubmission(Submission):
- def __init__(self, name, model, host):
+ def __init__(self, name, model, baseurl, baseupload, host):
+ """Create a trackhub based submission
+
+ :Parameters:
+ - `name`: Name of submission
+ - `model`: librdf model reference
+ - `baseurl`: web root where trackhub will be hosted
+ - `baseupload`: filesystem root where trackhub will be hosted
+ - `host`: hostname for library pages.
+ """
super(TrackHubSubmission, self).__init__(name, model, host)
+ if baseurl is None:
+ raise ValueError("Need a web root to make a track hub")
+ self.baseurl = os.path.join(baseurl, self.name)
+ if baseupload:
+ sshurl = parse_ssh_url(baseupload)
+ print sshurl
+ self.user = sshurl.user
+ self.host = sshurl.host
+ self.uploadpath = sshurl.path
+ else:
+ self.uploadpath = None
- def make_hub(self, result_map):
+ def make_hub_template(self, result_map):
samples = []
- for lib_id, result_dir in result_map.items():
- an_analysis = self.get_submission_node(result_dir)
+ for an_analysis in self.analysis_nodes(result_map):
metadata = self.get_sample_metadata(an_analysis)
if len(metadata) == 0:
errmsg = 'No metadata found for {0}'
})
return str(template.render(context))
+ def make_hub(self, result_map):
+ genome_db = 'hg19'
+ hub_url = self.baseurl + '/'
+ hub, genomes_file, genome, trackdb = default_hub(
+ hub_name=self.name,
+ short_label=self.name,
+ long_label=self.name,
+ email='email',
+ genome=genome_db)
+
+ hub.remote_dir = self.uploadpath
+
+ # build higher order track types
+ composite = CompositeTrack(
+ name=self.sanitize_name(self.name),
+ short_label = self.sanitize_name(self.name),
+ long_label = str(self.name),
+ tracktype="bigWig",
+ dragAndDrop='subtracks',
+ visibility='full',
+ )
+ trackdb.add_tracks(composite)
+
+ subgroups = self.add_subgroups(composite)
+
+ view_type = None
+ view = None
+
+ for track in self.get_tracks():
+ if track['file_type'] not in TRACKTYPES:
+ LOGGER.info('Unrecognized file type %s', track['file_type'])
+ continue
+
+ view = self.add_new_view_if_needed(composite, view, track)
+ track_name = self.make_track_name(track)
+
+ track_subgroup = self.make_track_subgroups(subgroups, track)
+
+ newtrack = Track(
+ name=track_name,
+ tracktype = str(track['file_type']),
+ url= hub_url + str(track['relative_path']),
+ short_label=str(track['library_id']),
+ long_label=track_name,
+ subgroups=track_subgroup,
+ )
+ view.add_tracks([newtrack])
+
+ results = hub.render()
+ if hub.remote_dir:
+ LOGGER.info("Uploading to %s @ %s : %s",
+ self.user, self.host, hub.remote_dir)
+ upload_hub(hub=hub, host=self.host, user='diane')
+
+ def add_new_view_if_needed(self, composite, view, track):
+ """Add new trakkhub view if we've hit a new type of track.
+
+ :Parameters:
+ - `composite`: composite track to attach to
+ - `view_type`: name of view type
+ - `track`: current track record
+ """
+ current_view_type = str(track['output_type'])
+ if not view or current_view_type != view.name:
+ view = ViewTrack(
+ name=current_view_type,
+ view=current_view_type,
+ visibility='squish',
+ short_label=current_view_type,
+ tracktype=str(track['file_type']),
+ )
+ composite.add_view(view)
+ view_type = current_view_type
+ return view
+
def make_manifest(self, result_map):
files = []
- for lib_id, result_dir in result_map.items():
- an_analysis = self.get_submission_node(result_dir)
+ for an_analysis in self.analysis_nodes(result_map):
metadata = self.get_manifest_metadata(an_analysis)
files.extend(metadata)
'files': files
})
return str(template.render(context))
-
- def get_sample_metadata(self, analysis_node):
- """Gather information for filling out sample section of a SOFT file
+
+ def make_track_name(self, track):
+ name = '{}_{}_{}'.format(
+ track['library_id'],
+ track['replicate'],
+ track['output_type'],
+ )
+ return name
+
+ def make_track_subgroups(self, subgroups, track):
+ track_subgroups = {}
+ for k in subgroups:
+ if k in track and track[k]:
+ value = self.sanitize_name(track[k])
+ track_subgroups[k] = value
+ return track_subgroups
+
+ def add_subgroups(self, composite):
+ """Add subgroups to composite track"""
+ search = [ ('htswlib:cell_line', 'cell'),
+ ('htswlib:replicate', 'replicate'),
+ ('encode3:library_id', 'library_id'),
+ ('encode3:assay', 'assay'),
+ ('encode3:rna_type', 'rna_type'),
+ ('encode3:protocol', 'protocol'),
+ ]
+ subgroups = []
+ names = []
+ for term, name in search:
+ subgroups.append(self.make_subgroupdefinition(term, name))
+ names.append(name)
+ composite.add_subgroups(subgroups)
+ return names
+
+
+ def make_subgroupdefinition(self, term, name):
+ """Subgroup attributes need to be an attribute of the library.
+ """
+ template = loader.get_template('trackhub_term_values.sparql')
+ context = Context({'term': term})
+ results = self.execute_query(template, context)
+ values = {}
+ for row in results:
+ value = str(row['name'])
+ values[self.sanitize_name(value)] = value
+
+ return SubGroupDefinition(
+ name=name,
+ label=name,
+ mapping=values,
+ )
+
+ def get_tracks(self):
+ """Collect information needed to describe trackhub tracks.
"""
query_template = loader.get_template('trackhub_samples.sparql')
- context = Context({
- 'submission': str(analysis_node.uri),
- 'submissionSet': str(self.submissionSetNS[''].uri),
- })
+ context = Context({ })
results = self.execute_query(query_template, context)
return results
+ def sanitize_name(self, name):
+ replacements = [('poly-?a\+', 'PolyAplus'),
+ ('poly-?a-', 'PolyAminus'),
+ ('RNA-Seq', 'RNASeq'),
+ ('rna-seq', 'rnaseq'),
+ ('-', '_'),
+ (' ', '_'),
+ ('^0', 'Zero'),
+ ('^1', 'One'),
+ ('^2', 'Two'),
+ ('^3', 'Three'),
+ ('^4', 'Four'),
+ ('^5', 'Five'),
+ ('^6', 'Six'),
+ ('^7', 'Seven'),
+ ('^8', 'Eight'),
+ ('^9', 'Nine'),
+ ]
+
+ for regex, substitution in replacements:
+ name = re.sub(regex, substitution, name, flags=re.IGNORECASE)
+
+ return name
+
def get_manifest_metadata(self, analysis_node):
+
query_template = loader.get_template('trackhub_manifest.sparql')
context = Context({
'submissionSet': str(self.submissionSetNS[''].uri),
})
results = self.execute_query(query_template, context)
+ LOGGER.info("scanned %s for results found %s",
+ str(analysis_node), len(results))
return results
-PREFIX libraryOntology: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
+PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
-select distinct ?name ?bam ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?tier ?experiment_type ?library_selection ?library_source ?input_quantity
-WHERE {
- <{{submission}}> a submissionOntology:submission ;
- submissionOntology:library ?library ;
- submissionOntology:name ?name ;
- ucscDaf:has_file ?file .
- ?file ucscDaf:filename ?bam .
- OPTIONAL { <{{submission}}> ucscDaf:control ?control }
- OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId }
- OPTIONAL { ?library libraryOntology:antibody ?antibody }
- OPTIONAL { ?library libraryOntology:cell_line ?cell .
- OPTIONAL { ?cell_line cells:cell ?cell .
- OPTIONAL { ?cell_line cells:documents ?growthProtocol . }
- OPTIONAL { ?cell_line cells:tier ?tier . } } }
- OPTIONAL { ?library ucscDaf:sex ?sex }
- OPTIONAL { ?library libraryOntology:library_id ?library_id }
- OPTIONAL { ?library libraryOntology:replicate ?replicate }
- OPTIONAL { ?library libraryOntology:species ?species_name .
- ?species libraryOntology:species ?species_name ;
- libraryOntology:taxon_id ?taxon_id . }
- OPTIONAL { ?library libraryOntology:condition_term ?treatment }
- OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type }
- OPTIONAL { ?library libraryOntology:librarySelection ?library_selection }
- OPTIONAL { ?library libraryOntology:librarySource ?library_source }
- OPTIONAL { <{{submissionSet}}> geoSoft:data_processing ?dataProtocol }
- OPTIONAL { ?library libraryOntology:extractMolecule ?extractMolecule }
- OPTIONAL { ?library libraryOntology:extractProtocol ?extractProtocol }
- OPTIONAL { ?library ucscDaf:protocol ?protocol }
- OPTIONAL { ?library ucscDaf:readType ?readType }
- OPTIONAL { ?library ucscDaf:strain ?strain }
- OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
- OPTIONAL { ?library libraryOntology:inputQuantity ?input_quantity }
- OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
+select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol
-}
\ No newline at end of file
+WHERE {
+ ?trackType geoSoft:fileTypeLabel ?file_type ;
+ ucscDaf:output_type ?output_type .
+ ?file ucscDaf:filename ?filename ;
+ ucscDaf:relative_path ?relative_path ;
+ htswlib:library ?library ;
+ a ?trackType .
+ OPTIONAL { ?library htswlib:library_id ?lab_library_id }
+ OPTIONAL { ?library encode3:library_id ?library_id }
+ OPTIONAL { ?library htswlib:cell_line ?cell . }
+ OPTIONAL { ?library htswlib:replicate ?replicate }
+ OPTIONAL { ?library encode3:assay ?assay . }
+ OPTIONAL { ?library encode3:rna_type ?rna_type. }
+ OPTIONAL { ?library encode3:protocol ?protocol. }
+ #OPTIONAL { ?library ucscDaf:readType ?read_type }
+}
+order by ?trackType