Move code out of ucsc_gather and into the htsworkflow tree.
authorDiane Trout <diane@caltech.edu>
Mon, 2 Apr 2012 21:55:21 +0000 (14:55 -0700)
committerDiane Trout <diane@caltech.edu>
Wed, 11 Apr 2012 22:21:30 +0000 (15:21 -0700)
I thought it'd be better to refactor ucsc_gather into more
target chunks so I could test the components better and
start sharing code with the SOFT / GEO submission tool I need
to write

encode_submission/ucsc_gather.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/daf.py
htsworkflow/submission/results.py [new file with mode: 0644]
htsworkflow/submission/test/__init__.py [new file with mode: 0644]
htsworkflow/submission/test/test_condorfastq.py
htsworkflow/submission/test/test_daf.py
htsworkflow/submission/test/test_results.py [new file with mode: 0644]

index 8ad4f3ee297427c3d9c26252b4aa57718cd61bdb..1cdf0118f6f990943adec84ab90492be44f360fe 100644 (file)
@@ -31,9 +31,10 @@ from htsworkflow.util.rdfhelp import \
      sparql_query, \
      submissionOntology
 from htsworkflow.submission.daf import \
      sparql_query, \
      submissionOntology
 from htsworkflow.submission.daf import \
-     DAFMapper, \
+     UCSCSubmission, \
      MetadataLookupException, \
      get_submission_uri
      MetadataLookupException, \
      get_submission_uri
+from htsworkflow.submission.results import ResultMap
 from htsworkflow.submission.condorfastq import CondorFastqExtract
 
 logger = logging.getLogger('ucsc_gather')
 from htsworkflow.submission.condorfastq import CondorFastqExtract
 
 logger = logging.getLogger('ucsc_gather')
@@ -53,8 +54,9 @@ def main(cmdline=None):
     apidata = api.make_auth_from_opts(opts, parser)
 
     model = get_model(opts.model, opts.db_path)
     apidata = api.make_auth_from_opts(opts, parser)
 
     model = get_model(opts.model, opts.db_path)
+    mapper = None
     if opts.name:
     if opts.name:
-        mapper = DAFMapper(opts.name, opts.daf,  model)
+        mapper = UCSCSubmssion(opts.name, opts.daf,  model)
         if opts.library_url is not None:
             mapper.library_url = opts.library_url
         submission_uri = get_submission_uri(opts.name)
         if opts.library_url is not None:
             mapper.library_url = opts.library_url
         submission_uri = get_submission_uri(opts.name)
@@ -68,31 +70,33 @@ def main(cmdline=None):
     if opts.make_ddf and opts.daf is None:
         parser.error("Please specify your daf when making ddf files")
 
     if opts.make_ddf and opts.daf is None:
         parser.error("Please specify your daf when making ddf files")
 
-    library_result_map = []
+    results = ResultMap()
     for a in args:
     for a in args:
-        library_result_map.extend(read_library_result_map(a))
+        results.add_results_from_file(a)
 
     if opts.make_tree_from is not None:
 
     if opts.make_tree_from is not None:
-        make_tree_from(opts.make_tree_from, library_result_map)
+        results.make_tree_from(opts.make_tree_from)
 
     if opts.link_daf:
 
     if opts.link_daf:
-        if opts.daf is None:
-            parser.error("Please specify daf filename with --daf")
-        link_daf(opts.daf, library_result_map)
+        if mapper is None:
+            parser.error("Specify a submission model")
+        if mapper.daf is None:
+            parser.error("Please load a daf first")
+        mapper.link_daf(results)
 
     if opts.fastq:
         extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
                                        force=opts.force)
 
     if opts.fastq:
         extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
                                        force=opts.force)
-        extractor.create_scripts(library_result_map)
+        extractor.create_scripts(results)
 
     if opts.scan_submission:
 
     if opts.scan_submission:
-        scan_submission_dirs(mapper, library_result_map)
+        mapper.scan_submission_dirs(results)
 
     if opts.make_ddf:
 
     if opts.make_ddf:
-        make_all_ddfs(mapper, library_result_map, opts.daf, force=opts.force)
+        make_all_ddfs(mapper, results, opts.daf, force=opts.force)
 
     if opts.zip_ddf:
 
     if opts.zip_ddf:
-        zip_ddfs(mapper, library_result_map, opts.daf)
+        zip_ddfs(mapper, results, opts.daf)
 
     if opts.sparql:
         sparql_query(model, opts.sparql)
 
     if opts.sparql:
         sparql_query(model, opts.sparql)
@@ -150,53 +154,6 @@ def make_parser():
 
     return parser
 
 
     return parser
 
-def make_tree_from(source_path, library_result_map):
-    """Create a tree using data files from source path.
-    """
-    for lib_id, lib_path in library_result_map:
-        if not os.path.exists(lib_path):
-            logger.info("Making dir {0}".format(lib_path))
-            os.mkdir(lib_path)
-        source_lib_dir = os.path.abspath(os.path.join(source_path, lib_path))
-        if os.path.exists(source_lib_dir):
-            pass
-        for filename in os.listdir(source_lib_dir):
-            source_pathname = os.path.join(source_lib_dir, filename)
-            target_pathname = os.path.join(lib_path, filename)
-            if not os.path.exists(source_pathname):
-                raise IOError("{0} does not exist".format(source_pathname))
-            if not os.path.exists(target_pathname):
-                os.symlink(source_pathname, target_pathname)
-                logger.info(
-                    'LINK {0} to {1}'.format(source_pathname, target_pathname))
-
-
-def link_daf(daf_path, library_result_map):
-    if not os.path.exists(daf_path):
-        raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
-
-    base_daf = os.path.basename(daf_path)
-
-    for lib_id, result_dir in library_result_map:
-        if not os.path.exists(result_dir):
-            raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
-        submission_daf = os.path.join(result_dir, base_daf)
-        if not os.path.exists(submission_daf):
-            if not os.path.exists(daf_path):
-                raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
-            os.link(daf_path, submission_daf)
-
-
-def scan_submission_dirs(view_map, library_result_map):
-    """Look through our submission directories and collect needed information
-    """
-    for lib_id, result_dir in library_result_map:
-        logger.info("Importing %s from %s" % (lib_id, result_dir))
-        try:
-            view_map.import_submission_dir(result_dir, lib_id)
-        except MetadataLookupException, e:
-            logger.error("Skipping %s: %s" % (lib_id, str(e)))
-
 
 def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
     dag_fragment = []
 
 def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
     dag_fragment = []
@@ -341,25 +298,6 @@ def zip_ddfs(view_map, library_result_map, daf_name):
         os.chdir(rootdir)
 
 
         os.chdir(rootdir)
 
 
-def read_library_result_map(filename):
-    """
-    Read a file that maps library id to result directory.
-    Does not support spaces in filenames.
-
-    For example:
-      10000 result/foo/bar
-    """
-    stream = open(filename,'r')
-
-    results = []
-    for line in stream:
-        line = line.rstrip()
-        if not line.startswith('#') and len(line) > 0 :
-            library_id, result_dir = line.split()
-            results.append((library_id, result_dir))
-    return results
-
-
 def make_condor_archive_script(name, files, outdir=None):
     script = """Universe = vanilla
 
 def make_condor_archive_script(name, files, outdir=None):
     script = """Universe = vanilla
 
@@ -448,12 +386,6 @@ def make_dag_fragment(ininame, archive_condor, upload_condor):
     return fragments
 
 
     return fragments
 
 
-def get_library_info(host, apidata, library_id):
-    url = api.library_url(host, library_id)
-    contents = api.retrieve_info(url, apidata)
-    return contents
-
-
 def make_base_name(pathname):
     base = os.path.basename(pathname)
     name, ext = os.path.splitext(base)
 def make_base_name(pathname):
     base = os.path.basename(pathname)
     name, ext = os.path.splitext(base)
index 70ad32ecf09486a395ebc8c0927368ccc9ea057f..8513f7b2da2b266803f12ec32acf650242fb165e 100644 (file)
@@ -36,18 +36,18 @@ class CondorFastqExtract(object):
         self.log_path = log_path
         self.force = force
 
         self.log_path = log_path
         self.force = force
 
-    def create_scripts(self, library_result_map ):
+    def create_scripts(self, result_map ):
         """
         Generate condor scripts to build any needed fastq files
 
         Args:
         """
         Generate condor scripts to build any needed fastq files
 
         Args:
-          library_result_map (list):  [(library_id, destination directory), ...]
+          result_map: htsworkflow.submission.results.ResultMap()
         """
         template_map = {'srf': 'srf.condor',
                         'qseq': 'qseq.condor',
                         'split_fastq': 'split_fastq.condor'}
 
         """
         template_map = {'srf': 'srf.condor',
                         'qseq': 'qseq.condor',
                         'split_fastq': 'split_fastq.condor'}
 
-        condor_entries = self.build_condor_arguments(library_result_map)
+        condor_entries = self.build_condor_arguments(result_map)
         for script_type in template_map.keys():
             template = loader.get_template(template_map[script_type])
             variables = {'python': sys.executable,
         for script_type in template_map.keys():
             template = loader.get_template(template_map[script_type])
             variables = {'python': sys.executable,
@@ -61,7 +61,7 @@ class CondorFastqExtract(object):
             with open(script_type + '.condor','w+') as outstream:
                 outstream.write(template.render(context))
 
             with open(script_type + '.condor','w+') as outstream:
                 outstream.write(template.render(context))
 
-    def build_condor_arguments(self, library_result_map):
+    def build_condor_arguments(self, result_map):
         condor_entries = {'srf': [],
                           'qseq': [],
                           'split_fastq': []}
         condor_entries = {'srf': [],
                           'qseq': [],
                           'split_fastq': []}
@@ -69,9 +69,9 @@ class CondorFastqExtract(object):
                             'qseq': self.condor_qseq_to_fastq,
                             'split_fastq': self.condor_desplit_fastq
                             }
                             'qseq': self.condor_qseq_to_fastq,
                             'split_fastq': self.condor_desplit_fastq
                             }
-        lib_db = self.find_archive_sequence_files(library_result_map)
 
 
-        needed_targets = self.find_missing_targets(library_result_map, lib_db)
+        lib_db = self.find_archive_sequence_files(result_map)
+        needed_targets = self.find_missing_targets(result_map, lib_db)
 
         for target_pathname, available_sources in needed_targets.items():
             LOGGER.debug(' target : %s' % (target_pathname,))
 
         for target_pathname, available_sources in needed_targets.items():
             LOGGER.debug(' target : %s' % (target_pathname,))
@@ -93,7 +93,7 @@ class CondorFastqExtract(object):
 
         return condor_entries
 
 
         return condor_entries
 
-    def find_archive_sequence_files(self,  library_result_map):
+    def find_archive_sequence_files(self,  result_map):
         """
         Find archived sequence files associated with our results.
         """
         """
         Find archived sequence files associated with our results.
         """
@@ -102,7 +102,7 @@ class CondorFastqExtract(object):
         lib_db = {}
         seq_dirs = set()
         candidate_lanes = {}
         lib_db = {}
         seq_dirs = set()
         candidate_lanes = {}
-        for lib_id, result_dir in library_result_map:
+        for lib_id in result_map.keys():
             lib_info = self.api.get_library(lib_id)
             lib_info['lanes'] = {}
             lib_db[lib_id] = lib_info
             lib_info = self.api.get_library(lib_id)
             lib_info['lanes'] = {}
             lib_db[lib_id] = lib_info
@@ -129,7 +129,7 @@ class CondorFastqExtract(object):
 
         return lib_db
 
 
         return lib_db
 
-    def find_missing_targets(self, library_result_map, lib_db):
+    def find_missing_targets(self, result_map, lib_db):
         """
         Check if the sequence file exists.
         This requires computing what the sequence name is and checking
         """
         Check if the sequence file exists.
         This requires computing what the sequence name is and checking
@@ -141,7 +141,8 @@ class CondorFastqExtract(object):
         fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
         # find what targets we're missing
         needed_targets = {}
         fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
         # find what targets we're missing
         needed_targets = {}
-        for lib_id, result_dir in library_result_map:
+        for lib_id in result_map.keys():
+            result_dir = result_map[lib_id]
             lib = lib_db[lib_id]
             lane_dict = make_lane_dict(lib_db, lib_id)
 
             lib = lib_db[lib_id]
             lane_dict = make_lane_dict(lib_db, lib_id)
 
@@ -188,7 +189,7 @@ class CondorFastqExtract(object):
             raise ValueError("srf to fastq can only handle one file")
 
         return {
             raise ValueError("srf to fastq can only handle one file")
 
         return {
-            'sources': [sources[0].path],
+            'sources': [os.path.abspath(sources[0].path)],
             'pyscript': srf2fastq.__file__,
             'flowcell': sources[0].flowcell,
             'ispaired': sources[0].paired,
             'pyscript': srf2fastq.__file__,
             'flowcell': sources[0].flowcell,
             'ispaired': sources[0].paired,
index 7534a8d9ef6082ffd901d80fa8ea87c6b43b55d4..96037b9647c93146a9887334c3ec1ebdf9266b0d 100644 (file)
@@ -237,8 +237,8 @@ def get_view_namespace(submission_uri):
     return viewNS
 
 
     return viewNS
 
 
-class DAFMapper(object):
-    """Convert filenames to views in the UCSC Daf
+class UCSCSubmission(object):
+    """Build a submission by examining the DAF for what we need to submit
     """
     def __init__(self, name, daf_file=None, model=None):
         """Construct a RDF backed model of a UCSC DAF
     """
     def __init__(self, name, daf_file=None, model=None):
         """Construct a RDF backed model of a UCSC DAF
@@ -267,15 +267,23 @@ class DAFMapper(object):
 
         if hasattr(daf_file, 'next'):
             # its some kind of stream
 
         if hasattr(daf_file, 'next'):
             # its some kind of stream
-            fromstream_into_model(self.model, self.submissionSet, daf_file)
+            self.daf = daf_file.read()
         else:
             # file
         else:
             # file
-            parse_into_model(self.model, self.submissionSet, daf_file)
+            stream = open(daf_file, 'r')
+            self.daf = stream.read()
+            stream.close()
+
+        fromstring_into_model(self.model, self.submissionSet, self.daf)
 
         self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
         self.submissionSetNS = RDF.NS(str(self.submissionSet) + '/')
         self.__view_map = None
 
 
         self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
         self.submissionSetNS = RDF.NS(str(self.submissionSet) + '/')
         self.__view_map = None
 
+    def _get_daf_name(self):
+        return self.name + '.daf'
+    daf_name = property(_get_daf_name,doc="construct name for DAF file")
+
     def add_pattern(self, view_name, filename_pattern):
         """Map a filename regular expression to a view name
         """
     def add_pattern(self, view_name, filename_pattern):
         """Map a filename regular expression to a view name
         """
@@ -285,6 +293,16 @@ class DAFMapper(object):
                           dafTermOntology['filename_re'],
                           obj))
 
                           dafTermOntology['filename_re'],
                           obj))
 
+    def scan_submission_dirs(self, result_map):
+        """Examine files in our result directory
+        """
+        for lib_id, result_dir in result_map.items():
+            logger.info("Importing %s from %s" % (lib_id, result_dir))
+            try:
+                self.import_submission_dir(result_dir, lib_id)
+            except MetadataLookupException, e:
+                logger.error("Skipping %s: %s" % (lib_id, str(e)))
+
     def import_submission_dir(self, submission_dir, library_id):
         """Import a submission directories and update our model as needed
         """
     def import_submission_dir(self, submission_dir, library_id):
         """Import a submission directories and update our model as needed
         """
@@ -562,6 +580,28 @@ class DAFMapper(object):
 
         return False
 
 
         return False
 
+
+    def link_daf(self, result_map):
+        if self.daf is None or len(self.daf) == 0:
+            raise RuntimeError(
+                "DAF data does not exist, how can I link to it?")
+
+        base_daf = self.daf_name
+
+        for result_dir in result_map.values():
+            if not os.path.exists(result_dir):
+                raise RuntimeError(
+                    "Couldn't find target directory %s" %(result_dir,))
+            submission_daf = os.path.join(result_dir, base_daf)
+            if os.path.exists(submission_daf):
+                previous_daf = open(submission_daf, 'r').read()
+                if self.daf != previous_daf:
+                    LOGGER.info("Old daf is different, overwriting it.")
+            stream = open(submission_daf, 'w')
+            stream.write(self.daf)
+            stream.close()
+
+
 if __name__ == "__main__":
     example_daf = """# Lab and general info
 grant             Hardison
 if __name__ == "__main__":
     example_daf = """# Lab and general info
 grant             Hardison
@@ -591,3 +631,5 @@ required         no
     name = "test_rep"
     mapper = DAFMapper(name, daf_file = example_daf_stream, model=model)
     dump_model(model)
     name = "test_rep"
     mapper = DAFMapper(name, daf_file = example_daf_stream, model=model)
     dump_model(model)
+
+
diff --git a/htsworkflow/submission/results.py b/htsworkflow/submission/results.py
new file mode 100644 (file)
index 0000000..daeb7d1
--- /dev/null
@@ -0,0 +1,94 @@
+"""Help collect and process results for submission
+"""
+import os
+import logging
+
+from collections import namedtuple
+
+LOGGER = logging.getLogger(__name__)
+
+class ResultMap(object):
+    """Store list of results
+    """
+    def __init__(self):
+        self.results_order = []
+        self.results = {}
+
+    def keys(self):
+        return self.results_order
+
+    def values(self):
+        return ( self.results[r] for r in self.results_order )
+
+    def items(self):
+        return ( (r, self.results[r]) for r in self.results_order )
+
+    def __getitem__(self, key):
+        return self.results[key]
+
+    def add_results_from_file(self, filename):
+        pathname = os.path.abspath(filename)
+        basepath, name = os.path.split(pathname)
+        results = read_result_list(filename)
+        for lib_id, lib_path in results:
+            if not os.path.isabs(lib_path):
+                lib_path = os.path.join(basepath, lib_path)
+            self.add_result(lib_id, lib_path)
+
+    def add_result(self, lib_id, lib_path):
+        self.results_order.append(lib_id)
+        self.results[lib_id] = lib_path
+
+    def make_tree_from(self, source_path, destpath = None):
+        """Create a tree using data files from source path.
+        """
+        print source_path, destpath
+        if destpath is None:
+            destpath = os.getcwd()
+
+        for lib_id in self.results_order:
+            lib_path = self.results[lib_id]
+            lib_destination = os.path.join(destpath, lib_path)
+            if not os.path.exists(lib_destination):
+                LOGGER.info("Making dir {0}".format(lib_destination))
+                os.mkdir(lib_destination)
+
+            source_rel_dir = os.path.join(source_path, lib_path)
+            source_lib_dir = os.path.abspath(source_rel_dir)
+
+            print "source_lib_dir", source_lib_dir
+            for filename in os.listdir(source_lib_dir):
+                source_pathname = os.path.join(source_lib_dir, filename)
+                target_pathname = os.path.join(lib_destination, filename)
+                if not os.path.exists(source_pathname):
+                    raise IOError(
+                        "{0} does not exist".format(source_pathname))
+                print target_pathname
+                if not os.path.exists(target_pathname):
+                    os.symlink(source_pathname, target_pathname)
+                    LOGGER.info(
+                        'LINK {0} to {1}'.format(source_pathname,
+                                                 target_pathname))
+
+def read_result_list(filename):
+    """
+    Read a file that maps library id to result directory.
+    Does not support spaces in filenames.
+
+    For example:
+      10000 result/foo/bar
+    """
+    stream = open(filename, 'r')
+    results = parse_result_list(stream)
+    stream.close()
+    return results
+
+
+def parse_result_list(stream):
+    results = []
+    for line in stream:
+        line = line.rstrip()
+        if not line.startswith('#') and len(line) > 0:
+            library_id, result_dir = line.split()
+            results.append((library_id, result_dir))
+    return results
diff --git a/htsworkflow/submission/test/__init__.py b/htsworkflow/submission/test/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
index 74893eb5facfde2a8e2154816e500f7278cbef14..27b9997e21d61eeefcf555a047f1a5ba1c2e8c4e 100644 (file)
@@ -8,6 +8,7 @@ import tempfile
 import unittest
 
 from htsworkflow.submission import condorfastq
 import unittest
 
 from htsworkflow.submission import condorfastq
+from htsworkflow.submission.results import ResultMap
 
 FCDIRS = [
     'C02F9ACXX',
 
 FCDIRS = [
     'C02F9ACXX',
@@ -164,6 +165,9 @@ class TestCondorFastq(unittest.TestCase):
         self.subdir = os.path.join(self.tempdir, self.subname)
         os.mkdir(self.subdir)
 
         self.subdir = os.path.join(self.tempdir, self.subname)
         os.mkdir(self.subdir)
 
+        self.result_map = ResultMap()
+        self.result_map.add_result('11154', self.subname)
+
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
@@ -174,8 +178,8 @@ class TestCondorFastq(unittest.TestCase):
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        lib_db = extract.find_archive_sequence_files(result_map)
+
+        lib_db = extract.find_archive_sequence_files(self.result_map)
 
         self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
         lanes = [
 
         self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
         lanes = [
@@ -198,10 +202,9 @@ class TestCondorFastq(unittest.TestCase):
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        lib_db = extract.find_archive_sequence_files(result_map)
+        lib_db = extract.find_archive_sequence_files(self.result_map)
 
 
-        needed_targets = extract.find_missing_targets(result_map,
+        needed_targets = extract.find_missing_targets(self.result_map,
                                                       lib_db)
         self.failUnlessEqual(len(needed_targets), 7)
         srf_30221 = needed_targets[
                                                       lib_db)
         self.failUnlessEqual(len(needed_targets), 7)
         srf_30221 = needed_targets[
@@ -233,8 +236,7 @@ class TestCondorFastq(unittest.TestCase):
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
-        result_map = [('11154', self.subdir)]
-        commands = extract.build_condor_arguments(result_map)
+        commands = extract.build_condor_arguments(self.result_map)
 
         srf = commands['srf']
         qseq = commands['qseq']
 
         srf = commands['srf']
         qseq = commands['qseq']
@@ -245,25 +247,25 @@ class TestCondorFastq(unittest.TestCase):
         self.failUnlessEqual(len(split), 2)
 
         srf_data = {
         self.failUnlessEqual(len(split), 2)
 
         srf_data = {
-            os.path.join(self.subdir, '11154_30221AAXX_c33_l4.fastq'): {
+            os.path.join(self.subname, '11154_30221AAXX_c33_l4.fastq'): {
                 'mid': None,
                 'ispaired': False,
                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                 'flowcell': u'30221AAXX',
                 'mid': None,
                 'ispaired': False,
                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                 'flowcell': u'30221AAXX',
-                'target': os.path.join(self.subdir,
+                'target': os.path.join(self.subname,
                                        u'11154_30221AAXX_c33_l4.fastq'),
             },
                                        u'11154_30221AAXX_c33_l4.fastq'),
             },
-            os.path.join(self.subdir, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
+            os.path.join(self.subname, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
                 'mid': None,
                 'ispaired': True,
                 'flowcell': u'30DY0AAXX',
                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                 'mid': 76,
                 'target':
                 'mid': None,
                 'ispaired': True,
                 'flowcell': u'30DY0AAXX',
                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                 'mid': 76,
                 'target':
-                    os.path.join(self.subdir,
+                    os.path.join(self.subname,
                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                 'target_right':
                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                 'target_right':
-                    os.path.join(self.subdir,
+                    os.path.join(self.subname,
                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
             }
         }
                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
             }
         }
@@ -281,19 +283,19 @@ class TestCondorFastq(unittest.TestCase):
                 self.failUnlessEqual(args['mid'], expected['mid'])
 
         qseq_data = {
                 self.failUnlessEqual(args['mid'], expected['mid'])
 
         qseq_data = {
-            os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
+            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
             },
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
             },
-            os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
+            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
             },
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
             },
-            os.path.join(self.subdir, '11154_61MJTAAXX_c76_l6.fastq'): {
+            os.path.join(self.subname, '11154_61MJTAAXX_c76_l6.fastq'): {
                 'istar': True,
                 'ispaired': False,
                 'sources': [
                 'istar': True,
                 'ispaired': False,
                 'sources': [
@@ -340,8 +342,7 @@ class TestCondorFastq(unittest.TestCase):
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
                                                  self.tempdir,
                                                  self.logdir)
         extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        extract.create_scripts(result_map)
+        extract.create_scripts(self.result_map)
 
         self.failUnless(os.path.exists('srf.condor'))
         with open('srf.condor', 'r') as srf:
 
         self.failUnless(os.path.exists('srf.condor'))
         with open('srf.condor', 'r') as srf:
index b730144d227af3a1bf7fc6b73783ede6a7f3a1ff..334a71a9203e7ca5d621b634a62bfc9b5fd299a6 100644 (file)
@@ -5,7 +5,7 @@ import shutil
 import tempfile
 import unittest
 
 import tempfile
 import unittest
 
-from htsworkflow.submission import daf
+from htsworkflow.submission import daf, results
 from htsworkflow.util.rdfhelp import \
      dafTermOntology, \
      fromTypedNode, \
 from htsworkflow.util.rdfhelp import \
      dafTermOntology, \
      fromTypedNode, \
@@ -15,6 +15,7 @@ from htsworkflow.util.rdfhelp import \
      get_model, \
      get_serializer
 
      get_model, \
      get_serializer
 
+from htsworkflow.submission.test import test_results
 import RDF
 
 test_daf = """# Lab and general info
 import RDF
 
 test_daf = """# Lab and general info
@@ -155,7 +156,7 @@ def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
                                        ns)
 
     test_daf_stream = StringIO(test_daf)
                                        ns)
 
     test_daf_stream = StringIO(test_daf)
-    mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
+    mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
     return mapper
 
 def dump_model(model):
     return mapper
 
 def dump_model(model):
@@ -164,7 +165,14 @@ def dump_model(model):
     print turtle
 
 
     print turtle
 
 
-class TestDAFMapper(unittest.TestCase):
+class TestUCSCSubmission(unittest.TestCase):
+    def setUp(self):
+        test_results.generate_sample_results_tree(self)
+
+    def tearDown(self):
+        # see things created by temp_results.generate_sample_results_tree
+        shutil.rmtree(self.tempdir)
+
     def test_create_mapper_add_pattern(self):
         name = 'testsub'
         mapper = load_daf_mapper(name)
     def test_create_mapper_add_pattern(self):
         name = 'testsub'
         mapper = load_daf_mapper(name)
@@ -287,6 +295,26 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
         self.failUnless('controlId' in variables)
 
 
         self.failUnless('controlId' in variables)
 
 
+    def test_link_daf(self):
+        name = 'testsub'
+        submission = load_daf_mapper(name, test_daf=test_daf)
+        result_map = results.ResultMap()
+        result_dir = os.path.join(self.sourcedir,
+                                  test_results.S1_NAME)
+        result_map.add_result('1000', result_dir)
+
+        submission.link_daf(result_map)
+
+        # make sure daf gets linked
+        created_daf = os.path.join(result_dir, name+'.daf')
+        self.failUnless(os.path.exists(created_daf))
+        stream = open(created_daf,'r')
+        daf_body = stream.read()
+        stream.close()
+
+        self.failUnlessEqual(test_daf, daf_body)
+
+
 @contextmanager
 def mktempdir(prefix='tmp'):
     d = tempfile.mkdtemp(prefix=prefix)
 @contextmanager
 def mktempdir(prefix='tmp'):
     d = tempfile.mkdtemp(prefix=prefix)
@@ -304,7 +332,7 @@ def mktempfile(suffix='', prefix='tmp', dir=None):
 
 def suite():
     suite = unittest.makeSuite(TestDAF, 'test')
 
 def suite():
     suite = unittest.makeSuite(TestDAF, 'test')
-    suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
+    suite.addTest(unittest.makeSuite(TestUCSCSubmission, 'test'))
     return suite
 
 if __name__ == "__main__":
     return suite
 
 if __name__ == "__main__":
diff --git a/htsworkflow/submission/test/test_results.py b/htsworkflow/submission/test/test_results.py
new file mode 100644 (file)
index 0000000..8579c58
--- /dev/null
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+import copy
+import os
+from pprint import pprint
+import shutil
+import tempfile
+import unittest
+
+from htsworkflow.submission.results import ResultMap
+
+S1_NAME = '1000-sample'
+S2_NAME = '2000-sample'
+
+S1_FILES = [
+    os.path.join(S1_NAME, 'file1_l8_r1.fastq'),
+    os.path.join(S1_NAME, 'file1_l8_r2.fastq'),
+]
+
+S2_FILES = [
+    os.path.join(S2_NAME, 'file1.bam'),
+    os.path.join(S2_NAME, 'file1_l5.fastq'),
+]
+
+def generate_sample_results_tree(obj):
+    obj.tempdir = tempfile.mkdtemp(prefix="results_test")
+    obj.sourcedir = os.path.join(obj.tempdir, 'source')
+    obj.resultdir = os.path.join(obj.tempdir, 'results')
+
+    for d in [obj.sourcedir,
+              os.path.join(obj.sourcedir, S1_NAME),
+              os.path.join(obj.sourcedir, S2_NAME),
+              obj.resultdir]:
+        os.mkdir(os.path.join(obj.tempdir, d))
+
+    tomake = []
+    tomake.extend(S1_FILES)
+    tomake.extend(S2_FILES)
+    for f in tomake:
+        stream = open(os.path.join(obj.sourcedir, f), 'w')
+        stream.write(f)
+        stream.close()
+
+class TestResultMap(unittest.TestCase):
+    def setUp(self):
+        generate_sample_results_tree(self)
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+
+    def test_dict_like(self):
+        """Make sure the result map works like an ordered dictionary
+        """
+        results = ResultMap()
+        results.add_result('1000', 'dir1000')
+        results.add_result('2000', 'dir2000')
+        results.add_result('1500', 'dir1500')
+
+        self.failUnlessEqual(results.keys(), ['1000', '2000', '1500'])
+        self.failUnlessEqual(list(results.values()),
+                             ['dir1000', 'dir2000', 'dir1500'])
+        self.failUnlessEqual(list(results.items()),
+                             [('1000', 'dir1000'),
+                              ('2000', 'dir2000'),
+                              ('1500', 'dir1500')])
+
+        self.failUnlessEqual(results['1000'], 'dir1000')
+        self.failUnlessEqual(results['1500'], 'dir1500')
+        self.failUnlessEqual(results['2000'], 'dir2000')
+
+    def test_make_from(self):
+        results = ResultMap()
+        results.add_result('1000', S1_NAME)
+        results.add_result('2000', S2_NAME)
+
+        results.make_tree_from(self.sourcedir, self.resultdir)
+
+        sample1_dir = os.path.join(self.resultdir, S1_NAME)
+        sample2_dir = os.path.join(self.resultdir, S2_NAME)
+        self.failUnless(os.path.isdir(sample1_dir))
+        self.failUnless(os.path.isdir(sample2_dir))
+
+        for f in S1_FILES + S2_FILES:
+            self.failUnless(
+                os.path.islink(
+                    os.path.join(self.resultdir, f)))
+
+
+
+
+def suite():
+    suite = unittest.makeSuite(TestResultMap, 'test')
+    return suite
+
+if __name__ == "__main__":
+    unittest.main(defaultTest='suite')
+