mark the example submission rule files as being raw, so the escapes dont get confused
[htsworkflow.git] / encode_submission / ucsc_gather.py
index c929524abd6e97f606187e15929f2179758c6282..a9fa72a83ddc1c4263e509070f50053d238fa405 100644 (file)
@@ -17,9 +17,13 @@ import types
 import urllib
 import urllib2
 import urlparse
+from zipfile import ZipFile
 
 import RDF
 
+if not 'DJANGO_SETTINGS_MODULE' in os.environ:
+    os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
+
 from htsworkflow.util import api
 from htsworkflow.util.rdfhelp import \
      dafTermOntology, \
@@ -30,18 +34,27 @@ from htsworkflow.util.rdfhelp import \
      sparql_query, \
      submissionOntology
 from htsworkflow.submission.daf import \
-     DAFMapper, \
+     UCSCSubmission, \
      MetadataLookupException, \
      get_submission_uri
+from htsworkflow.submission.results import ResultMap
 from htsworkflow.submission.condorfastq import CondorFastqExtract
 
 logger = logging.getLogger('ucsc_gather')
 
+TAR = '/bin/tar'
+LFTP = '/usr/bin/lftp'
+
 def main(cmdline=None):
     parser = make_parser()
     opts, args = parser.parse_args(cmdline)
     submission_uri = None
 
+    global TAR
+    global LFTP
+    TAR = opts.tar
+    LFTP = opts.lftp
+
     if opts.debug:
         logging.basicConfig(level = logging.DEBUG )
     elif opts.verbose:
@@ -52,8 +65,9 @@ def main(cmdline=None):
     apidata = api.make_auth_from_opts(opts, parser)
 
     model = get_model(opts.model, opts.db_path)
+    mapper = None
     if opts.name:
-        mapper = DAFMapper(opts.name, opts.daf,  model)
+        mapper = UCSCSubmission(opts.name, opts.daf,  model)
         if opts.library_url is not None:
             mapper.library_url = opts.library_url
         submission_uri = get_submission_uri(opts.name)
@@ -67,28 +81,38 @@ def main(cmdline=None):
     if opts.make_ddf and opts.daf is None:
         parser.error("Please specify your daf when making ddf files")
 
-    library_result_map = []
+    results = ResultMap()
     for a in args:
-        library_result_map.extend(read_library_result_map(a))
+        results.add_results_from_file(a)
 
     if opts.make_tree_from is not None:
-        make_tree_from(opts.make_tree_from, library_result_map)
+        results.make_tree_from(opts.make_tree_from)
 
     if opts.link_daf:
-        if opts.daf is None:
-            parser.error("Please specify daf filename with --daf")
-        link_daf(opts.daf, library_result_map)
+        if mapper is None:
+            parser.error("Specify a submission model")
+        if mapper.daf is None:
+            parser.error("Please load a daf first")
+        mapper.link_daf(results)
 
     if opts.fastq:
-        extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
+        flowcells = os.path.join(opts.sequence, 'flowcells')
+        extractor = CondorFastqExtract(opts.host, flowcells,
                                        force=opts.force)
-        extractor.build_fastqs(library_result_map)
+        extractor.create_scripts(results)
 
     if opts.scan_submission:
-        scan_submission_dirs(mapper, library_result_map)
+        mapper.scan_submission_dirs(results)
 
     if opts.make_ddf:
-        make_all_ddfs(mapper, library_result_map, opts.daf, force=opts.force)
+        if not os.path.exists(TAR):
+            parser.error("%s does not exist, please specify --tar" % (TAR,))
+        if not os.path.exists(LFTP):
+            parser.error("%s does not exist, please specify --lftp" % (LFTP,))
+        make_all_ddfs(mapper, results, opts.daf, force=opts.force)
+
+    if opts.zip_ddf:
+        zip_ddfs(mapper, results, opts.daf)
 
     if opts.sparql:
         sparql_query(model, opts.sparql)
@@ -112,6 +136,10 @@ def make_parser():
     model.add_option('--sparql', default=None, help="execute sparql query")
     model.add_option('--print-rdf', action="store_true", default=False,
       help="print ending model state")
+    model.add_option('--tar', default=TAR,
+                     help="override path to tar command")
+    model.add_option('--lftp', default=LFTP,
+                     help="override path to lftp command")
     parser.add_option_group(model)
     # commands
     commands = OptionGroup(parser, 'commands')
@@ -126,6 +154,9 @@ def make_parser():
                         help="link daf into submission directories")
     commands.add_option('--make-ddf', help='make the ddfs', default=False,
                       action="store_true")
+    commands.add_option('--zip-ddf', default=False, action='store_true',
+                        help='zip up just the metadata')
+
     parser.add_option_group(commands)
 
     parser.add_option('--force', default=False, action="store_true",
@@ -143,56 +174,10 @@ def make_parser():
 
     return parser
 
-def make_tree_from(source_path, library_result_map):
-    """Create a tree using data files from source path.
-    """
-    for lib_id, lib_path in library_result_map:
-        if not os.path.exists(lib_path):
-            logger.info("Making dir {0}".format(lib_path))
-            os.mkdir(lib_path)
-        source_lib_dir = os.path.abspath(os.path.join(source_path, lib_path))
-        if os.path.exists(source_lib_dir):
-            pass
-        for filename in os.listdir(source_lib_dir):
-            source_pathname = os.path.join(source_lib_dir, filename)
-            target_pathname = os.path.join(lib_path, filename)
-            if not os.path.exists(source_pathname):
-                raise IOError("{0} does not exist".format(source_pathname))
-            if not os.path.exists(target_pathname):
-                os.symlink(source_pathname, target_pathname)
-                logger.info(
-                    'LINK {0} to {1}'.format(source_pathname, target_pathname))
-
-
-def link_daf(daf_path, library_result_map):
-    if not os.path.exists(daf_path):
-        raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
-
-    base_daf = os.path.basename(daf_path)
-
-    for lib_id, result_dir in library_result_map:
-        if not os.path.exists(result_dir):
-            raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
-        submission_daf = os.path.join(result_dir, base_daf)
-        if not os.path.exists(submission_daf):
-            if not os.path.exists(daf_path):
-                raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
-            os.link(daf_path, submission_daf)
-
-
-def scan_submission_dirs(view_map, library_result_map):
-    """Look through our submission directories and collect needed information
-    """
-    for lib_id, result_dir in library_result_map:
-        logger.info("Importing %s from %s" % (lib_id, result_dir))
-        try:
-            view_map.import_submission_dir(result_dir, lib_id)
-        except MetadataLookupException, e:
-            logger.error("Skipping %s: %s" % (lib_id, str(e)))
 
 def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
     dag_fragment = []
-    for lib_id, result_dir in library_result_map:
+    for lib_id, result_dir in library_result_map.items():
         submissionNode = view_map.get_submission_node(result_dir)
         dag_fragment.extend(
             make_ddf(view_map, submissionNode, daf_name, make_condor, result_dir)
@@ -235,7 +220,7 @@ WHERE {
   OPTIONAL { ?library libraryOntology:library_id ?labExpId }
   OPTIONAL { ?library libraryOntology:library_id ?labVersion }
   OPTIONAL { ?library libraryOntology:replicate ?replicate }
-  OPTIONAL { ?library libraryOntology:condition ?treatment }
+  OPTIONAL { ?library libraryOntology:condition_term ?treatment }
   OPTIONAL { ?library ucscDaf:protocol ?protocol }
   OPTIONAL { ?library ucscDaf:readType ?readType }
   OPTIONAL { ?library ucscDaf:strain ?strain }
@@ -250,7 +235,7 @@ ORDER BY  ?submitView"""
         logger.error("Need name for %s" % (str(submissionNode)))
         return []
 
-    ddf_name = name + '.ddf'
+    ddf_name = make_ddf_name(name)
     if outdir is not None:
         outfile = os.path.join(outdir, ddf_name)
         output = open(outfile,'w')
@@ -312,29 +297,31 @@ ORDER BY  ?submitView"""
     return dag_fragments
 
 
-def read_library_result_map(filename):
+def zip_ddfs(view_map, library_result_map, daf_name):
+    """zip up just the ddf & daf files
     """
-    Read a file that maps library id to result directory.
-    Does not support spaces in filenames.
-
-    For example:
-      10000 result/foo/bar
-    """
-    stream = open(filename,'r')
+    rootdir = os.getcwd()
+    for lib_id, result_dir in library_result_map:
+        submissionNode = view_map.get_submission_node(result_dir)
+        nameNode = view_map.model.get_target(submissionNode,
+                                             submissionOntology['name'])
+        name = fromTypedNode(nameNode)
+        if name is None:
+            logger.error("Need name for %s" % (str(submissionNode)))
+            continue
 
-    results = []
-    for line in stream:
-        line = line.rstrip()
-        if not line.startswith('#') and len(line) > 0 :
-            library_id, result_dir = line.split()
-            results.append((library_id, result_dir))
-    return results
+        zip_name = '../{0}.zip'.format(lib_id)
+        os.chdir(os.path.join(rootdir, result_dir))
+        with ZipFile(zip_name, 'w') as stream:
+            stream.write(make_ddf_name(name))
+            stream.write(daf_name)
+        os.chdir(rootdir)
 
 
 def make_condor_archive_script(name, files, outdir=None):
     script = """Universe = vanilla
 
-Executable = /bin/tar
+Executable = %(tar)s
 arguments = czvhf ../%(archivename)s %(filelist)s
 
 Error = compress.out.$(Process).log
@@ -356,7 +343,8 @@ queue
     context = {'archivename': make_submission_name(name),
                'filelist': " ".join(files),
                'initialdir': os.path.abspath(outdir),
-               'user': os.getlogin()}
+               'user': os.getlogin(),
+               'tar': TAR}
 
     condor_script = os.path.join(outdir, make_condor_name(name, 'archive'))
     condor_stream = open(condor_script,'w')
@@ -365,11 +353,11 @@ queue
     return condor_script
 
 
-def make_condor_upload_script(name, outdir=None):
+def make_condor_upload_script(name, lftp, outdir=None):
     script = """Universe = vanilla
 
-Executable = /usr/bin/lftp
-arguments = -c put ../%(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
+Executable = %(lftp)s
+arguments = -c put %(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
 
 Error = upload.out.$(Process).log
 Output = upload.out.$(Process).log
@@ -391,7 +379,8 @@ queue
                'user': os.getlogin(),
                'ftpuser': ftpuser,
                'ftppassword': ftppassword,
-               'ftphost': encodeftp}
+               'ftphost': encodeftp,
+               'lftp': LFTP}
 
     condor_script = os.path.join(outdir, make_condor_name(name, 'upload'))
     condor_stream = open(condor_script,'w')
@@ -419,24 +408,6 @@ def make_dag_fragment(ininame, archive_condor, upload_condor):
     return fragments
 
 
-def get_library_info(host, apidata, library_id):
-    url = api.library_url(host, library_id)
-    contents = api.retrieve_info(url, apidata)
-    return contents
-
-
-def make_submission_section(line_counter, files, attributes):
-    """
-    Create a section in the submission ini file
-    """
-    inifile = [ "[line%s]" % (line_counter,) ]
-    inifile += ["files=%s" % (",".join(files))]
-
-    for k,v in attributes.items():
-        inifile += ["%s=%s" % (k,v)]
-    return inifile
-
-
 def make_base_name(pathname):
     base = os.path.basename(pathname)
     name, ext = os.path.splitext(base)