+#!/usr/bin/env python
"""Create a track hub
"""
+from __future__ import print_function, unicode_literals
-#!/usr/bin/env python
from ConfigParser import SafeConfigParser
import fnmatch
from glob import glob
import os
from pprint import pprint, pformat
import shlex
-from StringIO import StringIO
+from six.moves import StringIO
import stat
import sys
import time
import types
-import urllib
-import urllib2
-import urlparse
from zipfile import ZipFile
import RDF
from htsworkflow.submission.daf import get_submission_uri
from htsworkflow.submission.submission import list_submissions
from htsworkflow.submission.results import ResultMap
- from htsworkflow.submission.trackhub_submission import TrackHubSubmission
from htsworkflow.submission.condorfastq import CondorFastqExtract
-
+ from htsworkflow.submission.aws_submission import AWSSubmission
logger = logging.getLogger(__name__)
INDENTED = " " + os.linesep
from django.conf import settings
if opts.debug:
- settings.LOGGING['loggers']['level'] = 'DEBUG'
+ settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
elif opts.verbose:
- settings.LOGGING['loggers']['level'] = 'INFO'
+ settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
- model = get_model(opts.model, opts.db_path)
+ django.setup()
+ model = get_model(opts.model, opts.db_path)
submission_names = list(list_submissions(model))
name = opts.name
if len(submission_names) == 0 and opts.name is None:
if name:
submission_uri = get_submission_uri(name)
- logger.info('Submission URI: %s', name)
- else:
- logger.debug('No name, unable to create submission ur')
-
- mapper = None
- if opts.make_track_hub:
- mapper = TrackHubSubmission(name,
- model,
- baseurl=opts.make_track_hub,
- baseupload=opts.track_hub_upload,
- host=opts.host)
+ logger.info('Submission URI: %s', submission_uri)
+
+ mapper = AWSSubmission(name, model, encode_host=opts.encoded, lims_host=opts.host)
if opts.load_rdf is not None:
if submission_uri is None:
if opts.scan_submission:
if name is None:
parser.error("Please define a submission name")
- if mapper is None:
- parser.error("Scan submission needs --make-track-hub=public-url")
mapper.scan_submission_dirs(results)
- if opts.make_track_hub:
- trackdb = mapper.make_hub(results)
-
- if opts.make_manifest:
- make_manifest(mapper, results, opts.make_manifest)
+ if opts.upload:
+ mapper.upload(results, opts.dry_run)
if opts.sparql:
sparql_query(model, opts.sparql)
if opts.print_rdf:
writer = get_serializer()
- print writer.serialize_model_to_string(model)
+ print(writer.serialize_model_to_string(model))
- def make_manifest(mapper, results, filename=None):
- manifest = mapper.make_manifest(results)
-
- if filename is None or filename == '-':
- sys.stdout.write(manifest)
- else:
- with open(filename, 'w') as mainifeststream:
- mainifeststream.write(manifest)
-
-
def make_parser():
parser = OptionParser()
commands.add_option('--fastq', default=False, action="store_true",
help="generate scripts for making fastq files")
commands.add_option('--scan-submission', default=False, action="store_true",
- help="Import metadata for submission into our model")
- commands.add_option('--make-track-hub', default=None,
- help='web root that will host the trackhub.')
- commands.add_option('--track-hub-upload', default=None,
- help='where to upload track hub <host>:<path>')
- commands.add_option('--make-manifest',
- help='name the manifest file name or - for stdout to create it',
- default=None)
+ help="cache md5 sums")
+ commands.add_option('--upload', default=False, action="store_true",
+ help="Upload files")
parser.add_option_group(commands)
parser.add_option('--compression', default=None, type='choice',
choices=['gzip'],
help='select compression type for fastq files')
- parser.add_option('--daf', default=None, help='specify daf name')
parser.add_option('--library-url', default=None,
help="specify an alternate source for library information")
+ parser.add_option('--encoded', default='www.encodeproject.org',
+ help='base url for talking to encode server')
+ parser.add_option('--dry-run', default=False, action='store_true',
+ help='avoid making changes to encoded')
# debugging
parser.add_option('--verbose', default=False, action="store_true",
help='verbose logging')
return parser
if __name__ == "__main__":
- django.setup()
-
main()
self.submissionSet = get_submission_uri(self.name)
self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
self.libraryNS = RDF.NS('{0}/library/'.format(host))
+ self.flowcellNS = RDF.NS('{0}/flowcell/'.format(host))
self.__view_map = None
LOGGER.info("Importing %s from %s" % (lib_id, result_dir))
try:
self.import_analysis_dir(result_dir, lib_id)
- except MetadataLookupException, e:
+ except MetadataLookupException as e:
LOGGER.error("Skipping %s: %s" % (lib_id, str(e)))
def import_analysis_dir(self, analysis_dir, library_id):
# add file specific information
fileNode = self.make_file_node(pathname, an_analysis)
self.add_md5s(filename, fileNode, analysis_dir)
+ self.add_file_size(filename, fileNode, analysis_dir)
self.add_fastq_metadata(filename, fileNode)
self.add_label(file_type, fileNode, libNode)
self.model.add_statement(
self.model.add_statement(
RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
+ def add_file_size(self, filename, fileNode, analysis_dir):
+ LOGGER.debug("Updating file size")
+ submission_pathname = os.path.join(analysis_dir, filename)
+ file_size = os.stat(submission_pathname).st_size
+ self.model.add_statement(
+ RDF.Statement(fileNode, dafTermOntology['file_size'], toTypedNode(file_size)))
+
def add_fastq_metadata(self, filename, fileNode):
# How should I detect if this is actually a fastq file?
try:
s = RDF.Statement(fileNode, model_term, toTypedNode(value))
self.model.append(s)
+ if 'flowcell' in fqname:
+ value = self.flowcellNS[fqname['flowcell'] + '/']
+ s = RDF.Statement(fileNode, libraryOntology['flowcell'], value)
+ self.model.append(s)
+
def add_label(self, file_type, file_node, lib_node):
"""Add rdfs:label to a file node
"""
self.model.append(s)
self._add_lane_details(libNode)
+ self._add_flowcell_details()
def _add_lane_details(self, libNode):
"""Import lane details
LOGGER.debug("Importing %s" % (lane.uri,))
try:
parser.parse_into_model(self.model, lane.uri)
- except RDF.RedlandError, e:
+ except RDF.RedlandError as e:
LOGGER.error("Error accessing %s" % (lane.uri,))
raise e
+ def _add_flowcell_details(self):
+ template = loader.get_template('aws_flowcell.sparql')
+ results = self.execute_query(template, Context())
+
+ parser = RDF.Parser(name='rdfa')
+ for r in self.execute_query(template, Context()):
+ flowcell = r['flowcell']
+ try:
+ parser.parse_into_model(self.model, flowcell.uri)
+ except RDF.RedlandError as e:
+ LOGGER.error("Error accessing %s" % (str(flowcell)))
+ raise e
+
+
def find_best_match(self, filename):
"""Search through potential filename matching patterns
"""
LOGGER.debug("Found: %s" % (literal_re,))
try:
filename_re = re.compile(literal_re)
- except re.error, e:
+ except re.error as e:
LOGGER.error("Unable to compile: %s" % (literal_re,))
patterns[literal_re] = view_name
return patterns