Merge branch 'django1.7' of mus.cacr.caltech.edu:htsworkflow into django1.7

author Diane Trout <diane@ghic.org>

Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)

committer Diane Trout <diane@ghic.org>

Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
author Diane Trout <diane@ghic.org>
Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
committer Diane Trout <diane@ghic.org>
Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
diff --combined encode_submission/encode3.py

index 82e0401215e5fc3bb85cf25ee8b3b9a5159c9999,a3f6d29642bd2e391d8f6802bd138c72a76a86f1..1e1a54a64c8f5e441e6b6f085f5b0b55924c6da8
--- 1/encode_submission/encode3.py
--- 2/encode_submission/encode3.py
+++ b/encode_submission/encode3.py
@@@ -1,8 -1,7 +1,8 @@@
+ +#!/usr/bin/env python
   """Create a track hub
   """
+ +from __future__ import print_function, unicode_literals
   
- -#!/usr/bin/env python
   from ConfigParser import SafeConfigParser
   import fnmatch
   from glob import glob
@@@ -13,11 -12,14 +13,11 @@@ from optparse import OptionParser, Opti
   import os
   from pprint import pprint, pformat
   import shlex
- -from StringIO import StringIO
+ +from six.moves import StringIO
   import stat
   import sys
   import time
   import types
- -import urllib
- -import urllib2
- -import urlparse
   from zipfile import ZipFile
   
   import RDF
@@@ -34,9 -36,8 +34,8 @@@ from htsworkflow.util.rdfhelp import 
   from htsworkflow.submission.daf import get_submission_uri
   from htsworkflow.submission.submission import list_submissions
   from htsworkflow.submission.results import ResultMap
- from htsworkflow.submission.trackhub_submission import TrackHubSubmission
   from htsworkflow.submission.condorfastq import CondorFastqExtract
- 
+ from htsworkflow.submission.aws_submission import AWSSubmission
   logger = logging.getLogger(__name__)
   
   INDENTED = "  " + os.linesep
@@@ -53,12 -54,13 +52,13 @@@ def main(cmdline=None)
       from django.conf import settings
   
       if opts.debug:
-         settings.LOGGING['loggers']['level'] = 'DEBUG'
+         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
       elif opts.verbose:
-         settings.LOGGING['loggers']['level'] = 'INFO'
+         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
   
-     model = get_model(opts.model, opts.db_path)
+     django.setup()
   
+     model = get_model(opts.model, opts.db_path)
       submission_names = list(list_submissions(model))
       name = opts.name
       if len(submission_names) == 0 and opts.name is None:
@@@ -77,17 -79,9 +77,9 @@@
   
       if name:
           submission_uri = get_submission_uri(name)
-         logger.info('Submission URI: %s', name)
-     else:
-         logger.debug('No name, unable to create submission ur')
- 
-     mapper = None
-     if opts.make_track_hub:
-         mapper = TrackHubSubmission(name,
-                                     model,
-                                     baseurl=opts.make_track_hub,
-                                     baseupload=opts.track_hub_upload,
-                                     host=opts.host)
+         logger.info('Submission URI: %s', submission_uri)
+ 
+     mapper = AWSSubmission(name, model, encode_host=opts.encoded, lims_host=opts.host)
   
       if opts.load_rdf is not None:
           if submission_uri is None:
@@@ -119,34 -113,19 +111,19 @@@
       if opts.scan_submission:
           if name is None:
               parser.error("Please define a submission name")
-         if mapper is None:
-             parser.error("Scan submission needs --make-track-hub=public-url")
           mapper.scan_submission_dirs(results)
   
-     if opts.make_track_hub:
-         trackdb = mapper.make_hub(results)
- 
-     if opts.make_manifest:
-         make_manifest(mapper, results, opts.make_manifest)
+     if opts.upload:
+         mapper.upload(results, opts.dry_run)
   
       if opts.sparql:
           sparql_query(model, opts.sparql)
   
       if opts.print_rdf:
           writer = get_serializer()
- -        print writer.serialize_model_to_string(model)
+ +        print(writer.serialize_model_to_string(model))
   
   
- def make_manifest(mapper, results, filename=None):
-     manifest = mapper.make_manifest(results)
- 
-     if filename is None or filename == '-':
-         sys.stdout.write(manifest)
-     else:
-         with open(filename, 'w') as mainifeststream:
-             mainifeststream.write(manifest)
- 
- 
   def make_parser():
       parser = OptionParser()
   
@@@ -173,14 -152,9 +150,9 @@@
       commands.add_option('--fastq', default=False, action="store_true",
                           help="generate scripts for making fastq files")
       commands.add_option('--scan-submission', default=False, action="store_true",
-                         help="Import metadata for submission into our model")
-     commands.add_option('--make-track-hub', default=None,
-                         help='web root that will host the trackhub.')
-     commands.add_option('--track-hub-upload', default=None,
-                         help='where to upload track hub <host>:<path>')
-     commands.add_option('--make-manifest',
-                         help='name the manifest file name or - for stdout to create it',
-                         default=None)
+                         help="cache md5 sums")
+     commands.add_option('--upload', default=False, action="store_true",
+                         help="Upload files")
   
       parser.add_option_group(commands)
   
@@@ -189,9 -163,12 +161,12 @@@
       parser.add_option('--compression', default=None, type='choice',
                         choices=['gzip'],
                         help='select compression type for fastq files')
-     parser.add_option('--daf', default=None, help='specify daf name')
       parser.add_option('--library-url', default=None,
                         help="specify an alternate source for library information")
+     parser.add_option('--encoded', default='www.encodeproject.org',
+                       help='base url for talking to encode server')
+     parser.add_option('--dry-run', default=False, action='store_true',
+                       help='avoid making changes to encoded')
       # debugging
       parser.add_option('--verbose', default=False, action="store_true",
                         help='verbose logging')
@@@ -203,6 -180,4 +178,4 @@@
       return parser
   
   if __name__ == "__main__":
-     django.setup()
- 
       main()
diff --combined htsworkflow/submission/submission.py

index cb204557cecf8a74dbd230e9475ad55f76b05e66,ee28e91440f4d0056257b73c0140d2dd5ccfb290..b13138ac047109332a9a4dd9de0fbe4f0fbf9ad9
--- 1/htsworkflow/submission/submission.py
--- 2/htsworkflow/submission/submission.py
+++ b/htsworkflow/submission/submission.py
@@@ -34,6 -34,7 +34,7 @@@ class Submission(object)
           self.submissionSet = get_submission_uri(self.name)
           self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
           self.libraryNS = RDF.NS('{0}/library/'.format(host))
+         self.flowcellNS = RDF.NS('{0}/flowcell/'.format(host))
   
           self.__view_map = None
   
@@@ -44,7 -45,7 +45,7 @@@
               LOGGER.info("Importing %s from %s" % (lib_id, result_dir))
               try:
                   self.import_analysis_dir(result_dir, lib_id)
- -            except MetadataLookupException, e:
+ +            except MetadataLookupException as e:
                   LOGGER.error("Skipping %s: %s" % (lib_id, str(e)))
   
       def import_analysis_dir(self, analysis_dir, library_id):
@@@ -123,6 -124,7 +124,7 @@@
           # add file specific information
           fileNode = self.make_file_node(pathname, an_analysis)
           self.add_md5s(filename, fileNode, analysis_dir)
+         self.add_file_size(filename, fileNode, analysis_dir)
           self.add_fastq_metadata(filename, fileNode)
           self.add_label(file_type, fileNode, libNode)
           self.model.add_statement(
@@@ -168,6 -170,13 +170,13 @@@
               self.model.add_statement(
                   RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
   
+     def add_file_size(self, filename, fileNode, analysis_dir):
+         LOGGER.debug("Updating file size")
+         submission_pathname = os.path.join(analysis_dir, filename)
+         file_size = os.stat(submission_pathname).st_size
+         self.model.add_statement(
+             RDF.Statement(fileNode, dafTermOntology['file_size'], toTypedNode(file_size)))
+ 
       def add_fastq_metadata(self, filename, fileNode):
           # How should I detect if this is actually a fastq file?
           try:
@@@ -187,6 -196,11 +196,11 @@@
                   s = RDF.Statement(fileNode, model_term, toTypedNode(value))
                   self.model.append(s)
   
+         if 'flowcell' in fqname:
+             value = self.flowcellNS[fqname['flowcell'] + '/']
+             s = RDF.Statement(fileNode, libraryOntology['flowcell'], value)
+             self.model.append(s)
+ 
       def add_label(self, file_type, file_node, lib_node):
           """Add rdfs:label to a file node
           """
@@@ -231,6 -245,7 +245,7 @@@
               self.model.append(s)
   
           self._add_lane_details(libNode)
+         self._add_flowcell_details()
   
       def _add_lane_details(self, libNode):
           """Import lane details
@@@ -245,11 -260,25 +260,25 @@@
               LOGGER.debug("Importing %s" % (lane.uri,))
               try:
                   parser.parse_into_model(self.model, lane.uri)
- -            except RDF.RedlandError, e:
+ +            except RDF.RedlandError as e:
                   LOGGER.error("Error accessing %s" % (lane.uri,))
                   raise e
   
   
+     def _add_flowcell_details(self):
+         template = loader.get_template('aws_flowcell.sparql')
+         results = self.execute_query(template, Context())
+ 
+         parser = RDF.Parser(name='rdfa')
+         for r in self.execute_query(template, Context()):
+             flowcell = r['flowcell']
+             try:
+                 parser.parse_into_model(self.model, flowcell.uri)
+             except RDF.RedlandError as e:
+                 LOGGER.error("Error accessing %s" % (str(flowcell)))
+                 raise e
+ 
+ 
       def find_best_match(self, filename):
           """Search through potential filename matching patterns
           """
@@@ -286,7 -315,7 +315,7 @@@
               LOGGER.debug("Found: %s" % (literal_re,))
               try:
                   filename_re = re.compile(literal_re)
- -            except re.error, e:
+ +            except re.error as e:
                   LOGGER.error("Unable to compile: %s" % (literal_re,))
               patterns[literal_re] = view_name
           return patterns
author	Diane Trout <diane@ghic.org>
	Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
committer	Diane Trout <diane@ghic.org>
	Fri, 27 Mar 2015 18:13:50 +0000 (11:13 -0700)
		1	2
encode_submission/encode3.py	patch \|	diff1 \|	diff2 \|	blob \| history
htsworkflow/submission/submission.py	patch \|	diff1 \|	diff2 \|	blob \| history