Merge branch 'django1.7' of mus.cacr.caltech.edu:htsworkflow into django1.7
authorDiane Trout <diane@ghic.org>
Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
committerDiane Trout <diane@ghic.org>
Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
1  2 
encode_submission/encode3.py
htsworkflow/submission/submission.py

index 82e0401215e5fc3bb85cf25ee8b3b9a5159c9999,a3f6d29642bd2e391d8f6802bd138c72a76a86f1..1e1a54a64c8f5e441e6b6f085f5b0b55924c6da8
@@@ -1,8 -1,7 +1,8 @@@
 +#!/usr/bin/env python
  """Create a track hub
  """
 +from __future__ import print_function, unicode_literals
  
 -#!/usr/bin/env python
  from ConfigParser import SafeConfigParser
  import fnmatch
  from glob import glob
@@@ -13,11 -12,14 +13,11 @@@ from optparse import OptionParser, Opti
  import os
  from pprint import pprint, pformat
  import shlex
 -from StringIO import StringIO
 +from six.moves import StringIO
  import stat
  import sys
  import time
  import types
 -import urllib
 -import urllib2
 -import urlparse
  from zipfile import ZipFile
  
  import RDF
@@@ -34,9 -36,8 +34,8 @@@ from htsworkflow.util.rdfhelp import 
  from htsworkflow.submission.daf import get_submission_uri
  from htsworkflow.submission.submission import list_submissions
  from htsworkflow.submission.results import ResultMap
- from htsworkflow.submission.trackhub_submission import TrackHubSubmission
  from htsworkflow.submission.condorfastq import CondorFastqExtract
+ from htsworkflow.submission.aws_submission import AWSSubmission
  logger = logging.getLogger(__name__)
  
  INDENTED = "  " + os.linesep
@@@ -53,12 -54,13 +52,13 @@@ def main(cmdline=None)
      from django.conf import settings
  
      if opts.debug:
-         settings.LOGGING['loggers']['level'] = 'DEBUG'
+         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
      elif opts.verbose:
-         settings.LOGGING['loggers']['level'] = 'INFO'
+         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
  
-     model = get_model(opts.model, opts.db_path)
+     django.setup()
  
+     model = get_model(opts.model, opts.db_path)
      submission_names = list(list_submissions(model))
      name = opts.name
      if len(submission_names) == 0 and opts.name is None:
  
      if name:
          submission_uri = get_submission_uri(name)
-         logger.info('Submission URI: %s', name)
-     else:
-         logger.debug('No name, unable to create submission ur')
-     mapper = None
-     if opts.make_track_hub:
-         mapper = TrackHubSubmission(name,
-                                     model,
-                                     baseurl=opts.make_track_hub,
-                                     baseupload=opts.track_hub_upload,
-                                     host=opts.host)
+         logger.info('Submission URI: %s', submission_uri)
+     mapper = AWSSubmission(name, model, encode_host=opts.encoded, lims_host=opts.host)
  
      if opts.load_rdf is not None:
          if submission_uri is None:
      if opts.scan_submission:
          if name is None:
              parser.error("Please define a submission name")
-         if mapper is None:
-             parser.error("Scan submission needs --make-track-hub=public-url")
          mapper.scan_submission_dirs(results)
  
-     if opts.make_track_hub:
-         trackdb = mapper.make_hub(results)
-     if opts.make_manifest:
-         make_manifest(mapper, results, opts.make_manifest)
+     if opts.upload:
+         mapper.upload(results, opts.dry_run)
  
      if opts.sparql:
          sparql_query(model, opts.sparql)
  
      if opts.print_rdf:
          writer = get_serializer()
 -        print writer.serialize_model_to_string(model)
 +        print(writer.serialize_model_to_string(model))
  
  
- def make_manifest(mapper, results, filename=None):
-     manifest = mapper.make_manifest(results)
-     if filename is None or filename == '-':
-         sys.stdout.write(manifest)
-     else:
-         with open(filename, 'w') as mainifeststream:
-             mainifeststream.write(manifest)
  def make_parser():
      parser = OptionParser()
  
      commands.add_option('--fastq', default=False, action="store_true",
                          help="generate scripts for making fastq files")
      commands.add_option('--scan-submission', default=False, action="store_true",
-                         help="Import metadata for submission into our model")
-     commands.add_option('--make-track-hub', default=None,
-                         help='web root that will host the trackhub.')
-     commands.add_option('--track-hub-upload', default=None,
-                         help='where to upload track hub <host>:<path>')
-     commands.add_option('--make-manifest',
-                         help='name the manifest file name or - for stdout to create it',
-                         default=None)
+                         help="cache md5 sums")
+     commands.add_option('--upload', default=False, action="store_true",
+                         help="Upload files")
  
      parser.add_option_group(commands)
  
      parser.add_option('--compression', default=None, type='choice',
                        choices=['gzip'],
                        help='select compression type for fastq files')
-     parser.add_option('--daf', default=None, help='specify daf name')
      parser.add_option('--library-url', default=None,
                        help="specify an alternate source for library information")
+     parser.add_option('--encoded', default='www.encodeproject.org',
+                       help='base url for talking to encode server')
+     parser.add_option('--dry-run', default=False, action='store_true',
+                       help='avoid making changes to encoded')
      # debugging
      parser.add_option('--verbose', default=False, action="store_true",
                        help='verbose logging')
      return parser
  
  if __name__ == "__main__":
-     django.setup()
      main()
index cb204557cecf8a74dbd230e9475ad55f76b05e66,ee28e91440f4d0056257b73c0140d2dd5ccfb290..b13138ac047109332a9a4dd9de0fbe4f0fbf9ad9
@@@ -34,6 -34,7 +34,7 @@@ class Submission(object)
          self.submissionSet = get_submission_uri(self.name)
          self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
          self.libraryNS = RDF.NS('{0}/library/'.format(host))
+         self.flowcellNS = RDF.NS('{0}/flowcell/'.format(host))
  
          self.__view_map = None
  
@@@ -44,7 -45,7 +45,7 @@@
              LOGGER.info("Importing %s from %s" % (lib_id, result_dir))
              try:
                  self.import_analysis_dir(result_dir, lib_id)
 -            except MetadataLookupException, e:
 +            except MetadataLookupException as e:
                  LOGGER.error("Skipping %s: %s" % (lib_id, str(e)))
  
      def import_analysis_dir(self, analysis_dir, library_id):
          # add file specific information
          fileNode = self.make_file_node(pathname, an_analysis)
          self.add_md5s(filename, fileNode, analysis_dir)
+         self.add_file_size(filename, fileNode, analysis_dir)
          self.add_fastq_metadata(filename, fileNode)
          self.add_label(file_type, fileNode, libNode)
          self.model.add_statement(
              self.model.add_statement(
                  RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
  
+     def add_file_size(self, filename, fileNode, analysis_dir):
+         LOGGER.debug("Updating file size")
+         submission_pathname = os.path.join(analysis_dir, filename)
+         file_size = os.stat(submission_pathname).st_size
+         self.model.add_statement(
+             RDF.Statement(fileNode, dafTermOntology['file_size'], toTypedNode(file_size)))
      def add_fastq_metadata(self, filename, fileNode):
          # How should I detect if this is actually a fastq file?
          try:
                  s = RDF.Statement(fileNode, model_term, toTypedNode(value))
                  self.model.append(s)
  
+         if 'flowcell' in fqname:
+             value = self.flowcellNS[fqname['flowcell'] + '/']
+             s = RDF.Statement(fileNode, libraryOntology['flowcell'], value)
+             self.model.append(s)
      def add_label(self, file_type, file_node, lib_node):
          """Add rdfs:label to a file node
          """
              self.model.append(s)
  
          self._add_lane_details(libNode)
+         self._add_flowcell_details()
  
      def _add_lane_details(self, libNode):
          """Import lane details
              LOGGER.debug("Importing %s" % (lane.uri,))
              try:
                  parser.parse_into_model(self.model, lane.uri)
 -            except RDF.RedlandError, e:
 +            except RDF.RedlandError as e:
                  LOGGER.error("Error accessing %s" % (lane.uri,))
                  raise e
  
  
+     def _add_flowcell_details(self):
+         template = loader.get_template('aws_flowcell.sparql')
+         results = self.execute_query(template, Context())
+         parser = RDF.Parser(name='rdfa')
+         for r in self.execute_query(template, Context()):
+             flowcell = r['flowcell']
+             try:
+                 parser.parse_into_model(self.model, flowcell.uri)
+             except RDF.RedlandError as e:
+                 LOGGER.error("Error accessing %s" % (str(flowcell)))
+                 raise e
      def find_best_match(self, filename):
          """Search through potential filename matching patterns
          """
              LOGGER.debug("Found: %s" % (literal_re,))
              try:
                  filename_re = re.compile(literal_re)
 -            except re.error, e:
 +            except re.error as e:
                  LOGGER.error("Unable to compile: %s" % (literal_re,))
              patterns[literal_re] = view_name
          return patterns