Move code out of ucsc_gather and into the htsworkflow tree.

author Diane Trout <diane@caltech.edu>

Mon, 2 Apr 2012 21:55:21 +0000 (14:55 -0700)

committer Diane Trout <diane@caltech.edu>

Wed, 11 Apr 2012 22:21:30 +0000 (15:21 -0700)
author Diane Trout <diane@caltech.edu>
Mon, 2 Apr 2012 21:55:21 +0000 (14:55 -0700)
committer Diane Trout <diane@caltech.edu>
Wed, 11 Apr 2012 22:21:30 +0000 (15:21 -0700)
diff --git a/encode_submission/ucsc_gather.py b/encode_submission/ucsc_gather.py

index 8ad4f3ee297427c3d9c26252b4aa57718cd61bdb..1cdf0118f6f990943adec84ab90492be44f360fe 100644 (file)
--- a/encode_submission/ucsc_gather.py
+++ b/encode_submission/ucsc_gather.py
@@ -31,9 +31,10 @@ from htsworkflow.util.rdfhelp import \
       sparql_query, \
       submissionOntology
  from htsworkflow.submission.daf import \
       sparql_query, \
       submissionOntology
  from htsworkflow.submission.daf import \
-     DAFMapper, \
+     UCSCSubmission, \
       MetadataLookupException, \
       get_submission_uri
       MetadataLookupException, \
       get_submission_uri
+from htsworkflow.submission.results import ResultMap
  from htsworkflow.submission.condorfastq import CondorFastqExtract
  
  logger = logging.getLogger('ucsc_gather')
  from htsworkflow.submission.condorfastq import CondorFastqExtract
  
  logger = logging.getLogger('ucsc_gather')
@@ -53,8 +54,9 @@ def main(cmdline=None):
      apidata = api.make_auth_from_opts(opts, parser)
  
      model = get_model(opts.model, opts.db_path)
      apidata = api.make_auth_from_opts(opts, parser)
  
      model = get_model(opts.model, opts.db_path)
+    mapper = None
      if opts.name:
      if opts.name:
-        mapper = DAFMapper(opts.name, opts.daf,  model)
+        mapper = UCSCSubmssion(opts.name, opts.daf,  model)
          if opts.library_url is not None:
              mapper.library_url = opts.library_url
          submission_uri = get_submission_uri(opts.name)
          if opts.library_url is not None:
              mapper.library_url = opts.library_url
          submission_uri = get_submission_uri(opts.name)
@@ -68,31 +70,33 @@ def main(cmdline=None):
      if opts.make_ddf and opts.daf is None:
          parser.error("Please specify your daf when making ddf files")
  
      if opts.make_ddf and opts.daf is None:
          parser.error("Please specify your daf when making ddf files")
  
-    library_result_map = []
+    results = ResultMap()
      for a in args:
      for a in args:
-        library_result_map.extend(read_library_result_map(a))
+        results.add_results_from_file(a)
  
      if opts.make_tree_from is not None:
  
      if opts.make_tree_from is not None:
-        make_tree_from(opts.make_tree_from, library_result_map)
+        results.make_tree_from(opts.make_tree_from)
  
      if opts.link_daf:
  
      if opts.link_daf:
-        if opts.daf is None:
-            parser.error("Please specify daf filename with --daf")
-        link_daf(opts.daf, library_result_map)
+        if mapper is None:
+            parser.error("Specify a submission model")
+        if mapper.daf is None:
+            parser.error("Please load a daf first")
+        mapper.link_daf(results)
  
      if opts.fastq:
          extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
                                         force=opts.force)
  
      if opts.fastq:
          extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
                                         force=opts.force)
-        extractor.create_scripts(library_result_map)
+        extractor.create_scripts(results)
  
      if opts.scan_submission:
  
      if opts.scan_submission:
-        scan_submission_dirs(mapper, library_result_map)
+        mapper.scan_submission_dirs(results)
  
      if opts.make_ddf:
  
      if opts.make_ddf:
-        make_all_ddfs(mapper, library_result_map, opts.daf, force=opts.force)
+        make_all_ddfs(mapper, results, opts.daf, force=opts.force)
  
      if opts.zip_ddf:
  
      if opts.zip_ddf:
-        zip_ddfs(mapper, library_result_map, opts.daf)
+        zip_ddfs(mapper, results, opts.daf)
  
      if opts.sparql:
          sparql_query(model, opts.sparql)
  
      if opts.sparql:
          sparql_query(model, opts.sparql)
@@ -150,53 +154,6 @@ def make_parser():
  
      return parser
  
  
      return parser
  
-def make_tree_from(source_path, library_result_map):
-    """Create a tree using data files from source path.
-    """
-    for lib_id, lib_path in library_result_map:
-        if not os.path.exists(lib_path):
-            logger.info("Making dir {0}".format(lib_path))
-            os.mkdir(lib_path)
-        source_lib_dir = os.path.abspath(os.path.join(source_path, lib_path))
-        if os.path.exists(source_lib_dir):
-            pass
-        for filename in os.listdir(source_lib_dir):
-            source_pathname = os.path.join(source_lib_dir, filename)
-            target_pathname = os.path.join(lib_path, filename)
-            if not os.path.exists(source_pathname):
-                raise IOError("{0} does not exist".format(source_pathname))
-            if not os.path.exists(target_pathname):
-                os.symlink(source_pathname, target_pathname)
-                logger.info(
-                    'LINK {0} to {1}'.format(source_pathname, target_pathname))
-
-
-def link_daf(daf_path, library_result_map):
-    if not os.path.exists(daf_path):
-        raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
-
-    base_daf = os.path.basename(daf_path)
-
-    for lib_id, result_dir in library_result_map:
-        if not os.path.exists(result_dir):
-            raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
-        submission_daf = os.path.join(result_dir, base_daf)
-        if not os.path.exists(submission_daf):
-            if not os.path.exists(daf_path):
-                raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
-            os.link(daf_path, submission_daf)
-
-
-def scan_submission_dirs(view_map, library_result_map):
-    """Look through our submission directories and collect needed information
-    """
-    for lib_id, result_dir in library_result_map:
-        logger.info("Importing %s from %s" % (lib_id, result_dir))
-        try:
-            view_map.import_submission_dir(result_dir, lib_id)
-        except MetadataLookupException, e:
-            logger.error("Skipping %s: %s" % (lib_id, str(e)))
-
  
  def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
      dag_fragment = []
  
  def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
      dag_fragment = []
@@ -341,25 +298,6 @@ def zip_ddfs(view_map, library_result_map, daf_name):
          os.chdir(rootdir)
  
  
          os.chdir(rootdir)
  
  
-def read_library_result_map(filename):
-    """
-    Read a file that maps library id to result directory.
-    Does not support spaces in filenames.
-
-    For example:
-      10000 result/foo/bar
-    """
-    stream = open(filename,'r')
-
-    results = []
-    for line in stream:
-        line = line.rstrip()
-        if not line.startswith('#') and len(line) > 0 :
-            library_id, result_dir = line.split()
-            results.append((library_id, result_dir))
-    return results
-
-
  def make_condor_archive_script(name, files, outdir=None):
      script = """Universe = vanilla
  
  def make_condor_archive_script(name, files, outdir=None):
      script = """Universe = vanilla
  
@@ -448,12 +386,6 @@ def make_dag_fragment(ininame, archive_condor, upload_condor):
      return fragments
  
  
      return fragments
  
  
-def get_library_info(host, apidata, library_id):
-    url = api.library_url(host, library_id)
-    contents = api.retrieve_info(url, apidata)
-    return contents
-
-
  def make_base_name(pathname):
      base = os.path.basename(pathname)
      name, ext = os.path.splitext(base)
  def make_base_name(pathname):
      base = os.path.basename(pathname)
      name, ext = os.path.splitext(base)
diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py

index 70ad32ecf09486a395ebc8c0927368ccc9ea057f..8513f7b2da2b266803f12ec32acf650242fb165e 100644 (file)
--- a/htsworkflow/submission/condorfastq.py
+++ b/htsworkflow/submission/condorfastq.py
@@ -36,18 +36,18 @@ class CondorFastqExtract(object):
          self.log_path = log_path
          self.force = force
  
          self.log_path = log_path
          self.force = force
  
-    def create_scripts(self, library_result_map ):
+    def create_scripts(self, result_map ):
          """
          Generate condor scripts to build any needed fastq files
  
          Args:
          """
          Generate condor scripts to build any needed fastq files
  
          Args:
-          library_result_map (list):  [(library_id, destination directory), ...]
+          result_map: htsworkflow.submission.results.ResultMap()
          """
          template_map = {'srf': 'srf.condor',
                          'qseq': 'qseq.condor',
                          'split_fastq': 'split_fastq.condor'}
  
          """
          template_map = {'srf': 'srf.condor',
                          'qseq': 'qseq.condor',
                          'split_fastq': 'split_fastq.condor'}
  
-        condor_entries = self.build_condor_arguments(library_result_map)
+        condor_entries = self.build_condor_arguments(result_map)
          for script_type in template_map.keys():
              template = loader.get_template(template_map[script_type])
              variables = {'python': sys.executable,
          for script_type in template_map.keys():
              template = loader.get_template(template_map[script_type])
              variables = {'python': sys.executable,
@@ -61,7 +61,7 @@ class CondorFastqExtract(object):
              with open(script_type + '.condor','w+') as outstream:
                  outstream.write(template.render(context))
  
              with open(script_type + '.condor','w+') as outstream:
                  outstream.write(template.render(context))
  
-    def build_condor_arguments(self, library_result_map):
+    def build_condor_arguments(self, result_map):
          condor_entries = {'srf': [],
                            'qseq': [],
                            'split_fastq': []}
          condor_entries = {'srf': [],
                            'qseq': [],
                            'split_fastq': []}
@@ -69,9 +69,9 @@ class CondorFastqExtract(object):
                              'qseq': self.condor_qseq_to_fastq,
                              'split_fastq': self.condor_desplit_fastq
                              }
                              'qseq': self.condor_qseq_to_fastq,
                              'split_fastq': self.condor_desplit_fastq
                              }
-        lib_db = self.find_archive_sequence_files(library_result_map)
  
  
-        needed_targets = self.find_missing_targets(library_result_map, lib_db)
+        lib_db = self.find_archive_sequence_files(result_map)
+        needed_targets = self.find_missing_targets(result_map, lib_db)
  
          for target_pathname, available_sources in needed_targets.items():
              LOGGER.debug(' target : %s' % (target_pathname,))
  
          for target_pathname, available_sources in needed_targets.items():
              LOGGER.debug(' target : %s' % (target_pathname,))
@@ -93,7 +93,7 @@ class CondorFastqExtract(object):
  
          return condor_entries
  
  
          return condor_entries
  
-    def find_archive_sequence_files(self,  library_result_map):
+    def find_archive_sequence_files(self,  result_map):
          """
          Find archived sequence files associated with our results.
          """
          """
          Find archived sequence files associated with our results.
          """
@@ -102,7 +102,7 @@ class CondorFastqExtract(object):
          lib_db = {}
          seq_dirs = set()
          candidate_lanes = {}
          lib_db = {}
          seq_dirs = set()
          candidate_lanes = {}
-        for lib_id, result_dir in library_result_map:
+        for lib_id in result_map.keys():
              lib_info = self.api.get_library(lib_id)
              lib_info['lanes'] = {}
              lib_db[lib_id] = lib_info
              lib_info = self.api.get_library(lib_id)
              lib_info['lanes'] = {}
              lib_db[lib_id] = lib_info
@@ -129,7 +129,7 @@ class CondorFastqExtract(object):
  
          return lib_db
  
  
          return lib_db
  
-    def find_missing_targets(self, library_result_map, lib_db):
+    def find_missing_targets(self, result_map, lib_db):
          """
          Check if the sequence file exists.
          This requires computing what the sequence name is and checking
          """
          Check if the sequence file exists.
          This requires computing what the sequence name is and checking
@@ -141,7 +141,8 @@ class CondorFastqExtract(object):
          fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
          # find what targets we're missing
          needed_targets = {}
          fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
          # find what targets we're missing
          needed_targets = {}
-        for lib_id, result_dir in library_result_map:
+        for lib_id in result_map.keys():
+            result_dir = result_map[lib_id]
              lib = lib_db[lib_id]
              lane_dict = make_lane_dict(lib_db, lib_id)
  
              lib = lib_db[lib_id]
              lane_dict = make_lane_dict(lib_db, lib_id)
  
@@ -188,7 +189,7 @@ class CondorFastqExtract(object):
              raise ValueError("srf to fastq can only handle one file")
  
          return {
              raise ValueError("srf to fastq can only handle one file")
  
          return {
-            'sources': [sources[0].path],
+            'sources': [os.path.abspath(sources[0].path)],
              'pyscript': srf2fastq.__file__,
              'flowcell': sources[0].flowcell,
              'ispaired': sources[0].paired,
              'pyscript': srf2fastq.__file__,
              'flowcell': sources[0].flowcell,
              'ispaired': sources[0].paired,
diff --git a/htsworkflow/submission/daf.py b/htsworkflow/submission/daf.py

index 7534a8d9ef6082ffd901d80fa8ea87c6b43b55d4..96037b9647c93146a9887334c3ec1ebdf9266b0d 100644 (file)
--- a/htsworkflow/submission/daf.py
+++ b/htsworkflow/submission/daf.py
@@ -237,8 +237,8 @@ def get_view_namespace(submission_uri):
      return viewNS
  
  
      return viewNS
  
  
-class DAFMapper(object):
-    """Convert filenames to views in the UCSC Daf
+class UCSCSubmission(object):
+    """Build a submission by examining the DAF for what we need to submit
      """
      def __init__(self, name, daf_file=None, model=None):
          """Construct a RDF backed model of a UCSC DAF
      """
      def __init__(self, name, daf_file=None, model=None):
          """Construct a RDF backed model of a UCSC DAF
@@ -267,15 +267,23 @@ class DAFMapper(object):
  
          if hasattr(daf_file, 'next'):
              # its some kind of stream
  
          if hasattr(daf_file, 'next'):
              # its some kind of stream
-            fromstream_into_model(self.model, self.submissionSet, daf_file)
+            self.daf = daf_file.read()
          else:
              # file
          else:
              # file
-            parse_into_model(self.model, self.submissionSet, daf_file)
+            stream = open(daf_file, 'r')
+            self.daf = stream.read()
+            stream.close()
+
+        fromstring_into_model(self.model, self.submissionSet, self.daf)
  
          self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
          self.submissionSetNS = RDF.NS(str(self.submissionSet) + '/')
          self.__view_map = None
  
  
          self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
          self.submissionSetNS = RDF.NS(str(self.submissionSet) + '/')
          self.__view_map = None
  
+    def _get_daf_name(self):
+        return self.name + '.daf'
+    daf_name = property(_get_daf_name,doc="construct name for DAF file")
+
      def add_pattern(self, view_name, filename_pattern):
          """Map a filename regular expression to a view name
          """
      def add_pattern(self, view_name, filename_pattern):
          """Map a filename regular expression to a view name
          """
@@ -285,6 +293,16 @@ class DAFMapper(object):
                            dafTermOntology['filename_re'],
                            obj))
  
                            dafTermOntology['filename_re'],
                            obj))
  
+    def scan_submission_dirs(self, result_map):
+        """Examine files in our result directory
+        """
+        for lib_id, result_dir in result_map.items():
+            logger.info("Importing %s from %s" % (lib_id, result_dir))
+            try:
+                self.import_submission_dir(result_dir, lib_id)
+            except MetadataLookupException, e:
+                logger.error("Skipping %s: %s" % (lib_id, str(e)))
+
      def import_submission_dir(self, submission_dir, library_id):
          """Import a submission directories and update our model as needed
          """
      def import_submission_dir(self, submission_dir, library_id):
          """Import a submission directories and update our model as needed
          """
@@ -562,6 +580,28 @@ class DAFMapper(object):
  
          return False
  
  
          return False
  
+
+    def link_daf(self, result_map):
+        if self.daf is None or len(self.daf) == 0:
+            raise RuntimeError(
+                "DAF data does not exist, how can I link to it?")
+
+        base_daf = self.daf_name
+
+        for result_dir in result_map.values():
+            if not os.path.exists(result_dir):
+                raise RuntimeError(
+                    "Couldn't find target directory %s" %(result_dir,))
+            submission_daf = os.path.join(result_dir, base_daf)
+            if os.path.exists(submission_daf):
+                previous_daf = open(submission_daf, 'r').read()
+                if self.daf != previous_daf:
+                    LOGGER.info("Old daf is different, overwriting it.")
+            stream = open(submission_daf, 'w')
+            stream.write(self.daf)
+            stream.close()
+
+
  if __name__ == "__main__":
      example_daf = """# Lab and general info
  grant             Hardison
  if __name__ == "__main__":
      example_daf = """# Lab and general info
  grant             Hardison
@@ -591,3 +631,5 @@ required         no
      name = "test_rep"
      mapper = DAFMapper(name, daf_file = example_daf_stream, model=model)
      dump_model(model)
      name = "test_rep"
      mapper = DAFMapper(name, daf_file = example_daf_stream, model=model)
      dump_model(model)
+
+
diff --git a/htsworkflow/submission/results.py b/htsworkflow/submission/results.py

new file mode 100644 (file)

index 0000000..daeb7d1
--- /dev/null
+++ b/htsworkflow/submission/results.py
@@ -0,0 +1,94 @@
+"""Help collect and process results for submission
+"""
+import os
+import logging
+
+from collections import namedtuple
+
+LOGGER = logging.getLogger(__name__)
+
+class ResultMap(object):
+    """Store list of results
+    """
+    def __init__(self):
+        self.results_order = []
+        self.results = {}
+
+    def keys(self):
+        return self.results_order
+
+    def values(self):
+        return ( self.results[r] for r in self.results_order )
+
+    def items(self):
+        return ( (r, self.results[r]) for r in self.results_order )
+
+    def __getitem__(self, key):
+        return self.results[key]
+
+    def add_results_from_file(self, filename):
+        pathname = os.path.abspath(filename)
+        basepath, name = os.path.split(pathname)
+        results = read_result_list(filename)
+        for lib_id, lib_path in results:
+            if not os.path.isabs(lib_path):
+                lib_path = os.path.join(basepath, lib_path)
+            self.add_result(lib_id, lib_path)
+
+    def add_result(self, lib_id, lib_path):
+        self.results_order.append(lib_id)
+        self.results[lib_id] = lib_path
+
+    def make_tree_from(self, source_path, destpath = None):
+        """Create a tree using data files from source path.
+        """
+        print source_path, destpath
+        if destpath is None:
+            destpath = os.getcwd()
+
+        for lib_id in self.results_order:
+            lib_path = self.results[lib_id]
+            lib_destination = os.path.join(destpath, lib_path)
+            if not os.path.exists(lib_destination):
+                LOGGER.info("Making dir {0}".format(lib_destination))
+                os.mkdir(lib_destination)
+
+            source_rel_dir = os.path.join(source_path, lib_path)
+            source_lib_dir = os.path.abspath(source_rel_dir)
+
+            print "source_lib_dir", source_lib_dir
+            for filename in os.listdir(source_lib_dir):
+                source_pathname = os.path.join(source_lib_dir, filename)
+                target_pathname = os.path.join(lib_destination, filename)
+                if not os.path.exists(source_pathname):
+                    raise IOError(
+                        "{0} does not exist".format(source_pathname))
+                print target_pathname
+                if not os.path.exists(target_pathname):
+                    os.symlink(source_pathname, target_pathname)
+                    LOGGER.info(
+                        'LINK {0} to {1}'.format(source_pathname,
+                                                 target_pathname))
+
+def read_result_list(filename):
+    """
+    Read a file that maps library id to result directory.
+    Does not support spaces in filenames.
+
+    For example:
+      10000 result/foo/bar
+    """
+    stream = open(filename, 'r')
+    results = parse_result_list(stream)
+    stream.close()
+    return results
+
+
+def parse_result_list(stream):
+    results = []
+    for line in stream:
+        line = line.rstrip()
+        if not line.startswith('#') and len(line) > 0:
+            library_id, result_dir = line.split()
+            results.append((library_id, result_dir))
+    return results
diff --git a/htsworkflow/submission/test/__init__.py b/htsworkflow/submission/test/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/htsworkflow/submission/test/test_condorfastq.py b/htsworkflow/submission/test/test_condorfastq.py

index 74893eb5facfde2a8e2154816e500f7278cbef14..27b9997e21d61eeefcf555a047f1a5ba1c2e8c4e 100644 (file)
--- a/htsworkflow/submission/test/test_condorfastq.py
+++ b/htsworkflow/submission/test/test_condorfastq.py
@@ -8,6 +8,7 @@ import tempfile
  import unittest
  
  from htsworkflow.submission import condorfastq
  import unittest
  
  from htsworkflow.submission import condorfastq
+from htsworkflow.submission.results import ResultMap
  
  FCDIRS = [
      'C02F9ACXX',
  
  FCDIRS = [
      'C02F9ACXX',
@@ -164,6 +165,9 @@ class TestCondorFastq(unittest.TestCase):
          self.subdir = os.path.join(self.tempdir, self.subname)
          os.mkdir(self.subdir)
  
          self.subdir = os.path.join(self.tempdir, self.subname)
          os.mkdir(self.subdir)
  
+        self.result_map = ResultMap()
+        self.result_map.add_result('11154', self.subname)
+
      def tearDown(self):
          shutil.rmtree(self.tempdir)
          os.chdir(self.cwd)
      def tearDown(self):
          shutil.rmtree(self.tempdir)
          os.chdir(self.cwd)
@@ -174,8 +178,8 @@ class TestCondorFastq(unittest.TestCase):
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        lib_db = extract.find_archive_sequence_files(result_map)
+
+        lib_db = extract.find_archive_sequence_files(self.result_map)
  
          self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
          lanes = [
  
          self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
          lanes = [
@@ -198,10 +202,9 @@ class TestCondorFastq(unittest.TestCase):
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        lib_db = extract.find_archive_sequence_files(result_map)
+        lib_db = extract.find_archive_sequence_files(self.result_map)
  
  
-        needed_targets = extract.find_missing_targets(result_map,
+        needed_targets = extract.find_missing_targets(self.result_map,
                                                        lib_db)
          self.failUnlessEqual(len(needed_targets), 7)
          srf_30221 = needed_targets[
                                                        lib_db)
          self.failUnlessEqual(len(needed_targets), 7)
          srf_30221 = needed_targets[
@@ -233,8 +236,7 @@ class TestCondorFastq(unittest.TestCase):
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
-        result_map = [('11154', self.subdir)]
-        commands = extract.build_condor_arguments(result_map)
+        commands = extract.build_condor_arguments(self.result_map)
  
          srf = commands['srf']
          qseq = commands['qseq']
  
          srf = commands['srf']
          qseq = commands['qseq']
@@ -245,25 +247,25 @@ class TestCondorFastq(unittest.TestCase):
          self.failUnlessEqual(len(split), 2)
  
          srf_data = {
          self.failUnlessEqual(len(split), 2)
  
          srf_data = {
-            os.path.join(self.subdir, '11154_30221AAXX_c33_l4.fastq'): {
+            os.path.join(self.subname, '11154_30221AAXX_c33_l4.fastq'): {
                  'mid': None,
                  'ispaired': False,
                  'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                  'flowcell': u'30221AAXX',
                  'mid': None,
                  'ispaired': False,
                  'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                  'flowcell': u'30221AAXX',
-                'target': os.path.join(self.subdir,
+                'target': os.path.join(self.subname,
                                         u'11154_30221AAXX_c33_l4.fastq'),
              },
                                         u'11154_30221AAXX_c33_l4.fastq'),
              },
-            os.path.join(self.subdir, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
+            os.path.join(self.subname, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
                  'mid': None,
                  'ispaired': True,
                  'flowcell': u'30DY0AAXX',
                  'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                  'mid': 76,
                  'target':
                  'mid': None,
                  'ispaired': True,
                  'flowcell': u'30DY0AAXX',
                  'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                  'mid': 76,
                  'target':
-                    os.path.join(self.subdir,
+                    os.path.join(self.subname,
                                   u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                  'target_right':
                                   u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                  'target_right':
-                    os.path.join(self.subdir,
+                    os.path.join(self.subname,
                                   u'11154_30DY0AAXX_c151_l8_r2.fastq'),
              }
          }
                                   u'11154_30DY0AAXX_c151_l8_r2.fastq'),
              }
          }
@@ -281,19 +283,19 @@ class TestCondorFastq(unittest.TestCase):
                  self.failUnlessEqual(args['mid'], expected['mid'])
  
          qseq_data = {
                  self.failUnlessEqual(args['mid'], expected['mid'])
  
          qseq_data = {
-            os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
+            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
                  'istar': True,
                  'ispaired': True,
                  'sources': [
                      u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
              },
                  'istar': True,
                  'ispaired': True,
                  'sources': [
                      u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
              },
-            os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
+            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
                  'istar': True,
                  'ispaired': True,
                  'sources': [
                      u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
              },
                  'istar': True,
                  'ispaired': True,
                  'sources': [
                      u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
              },
-            os.path.join(self.subdir, '11154_61MJTAAXX_c76_l6.fastq'): {
+            os.path.join(self.subname, '11154_61MJTAAXX_c76_l6.fastq'): {
                  'istar': True,
                  'ispaired': False,
                  'sources': [
                  'istar': True,
                  'ispaired': False,
                  'sources': [
@@ -340,8 +342,7 @@ class TestCondorFastq(unittest.TestCase):
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
                                                   self.tempdir,
                                                   self.logdir)
          extract.api = FakeApi()
-        result_map = [('11154', self.subname)]
-        extract.create_scripts(result_map)
+        extract.create_scripts(self.result_map)
  
          self.failUnless(os.path.exists('srf.condor'))
          with open('srf.condor', 'r') as srf:
  
          self.failUnless(os.path.exists('srf.condor'))
          with open('srf.condor', 'r') as srf:
diff --git a/htsworkflow/submission/test/test_daf.py b/htsworkflow/submission/test/test_daf.py

index b730144d227af3a1bf7fc6b73783ede6a7f3a1ff..334a71a9203e7ca5d621b634a62bfc9b5fd299a6 100644 (file)
--- a/htsworkflow/submission/test/test_daf.py
+++ b/htsworkflow/submission/test/test_daf.py
@@ -5,7 +5,7 @@ import shutil
  import tempfile
  import unittest
  
  import tempfile
  import unittest
  
-from htsworkflow.submission import daf
+from htsworkflow.submission import daf, results
  from htsworkflow.util.rdfhelp import \
       dafTermOntology, \
       fromTypedNode, \
  from htsworkflow.util.rdfhelp import \
       dafTermOntology, \
       fromTypedNode, \
@@ -15,6 +15,7 @@ from htsworkflow.util.rdfhelp import \
       get_model, \
       get_serializer
  
       get_model, \
       get_serializer
  
+from htsworkflow.submission.test import test_results
  import RDF
  
  test_daf = """# Lab and general info
  import RDF
  
  test_daf = """# Lab and general info
@@ -155,7 +156,7 @@ def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
                                         ns)
  
      test_daf_stream = StringIO(test_daf)
                                         ns)
  
      test_daf_stream = StringIO(test_daf)
-    mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
+    mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
      return mapper
  
  def dump_model(model):
      return mapper
  
  def dump_model(model):
@@ -164,7 +165,14 @@ def dump_model(model):
      print turtle
  
  
      print turtle
  
  
-class TestDAFMapper(unittest.TestCase):
+class TestUCSCSubmission(unittest.TestCase):
+    def setUp(self):
+        test_results.generate_sample_results_tree(self)
+
+    def tearDown(self):
+        # see things created by temp_results.generate_sample_results_tree
+        shutil.rmtree(self.tempdir)
+
      def test_create_mapper_add_pattern(self):
          name = 'testsub'
          mapper = load_daf_mapper(name)
      def test_create_mapper_add_pattern(self):
          name = 'testsub'
          mapper = load_daf_mapper(name)
@@ -287,6 +295,26 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
          self.failUnless('controlId' in variables)
  
  
          self.failUnless('controlId' in variables)
  
  
+    def test_link_daf(self):
+        name = 'testsub'
+        submission = load_daf_mapper(name, test_daf=test_daf)
+        result_map = results.ResultMap()
+        result_dir = os.path.join(self.sourcedir,
+                                  test_results.S1_NAME)
+        result_map.add_result('1000', result_dir)
+
+        submission.link_daf(result_map)
+
+        # make sure daf gets linked
+        created_daf = os.path.join(result_dir, name+'.daf')
+        self.failUnless(os.path.exists(created_daf))
+        stream = open(created_daf,'r')
+        daf_body = stream.read()
+        stream.close()
+
+        self.failUnlessEqual(test_daf, daf_body)
+
+
  @contextmanager
  def mktempdir(prefix='tmp'):
      d = tempfile.mkdtemp(prefix=prefix)
  @contextmanager
  def mktempdir(prefix='tmp'):
      d = tempfile.mkdtemp(prefix=prefix)
@@ -304,7 +332,7 @@ def mktempfile(suffix='', prefix='tmp', dir=None):
  
  def suite():
      suite = unittest.makeSuite(TestDAF, 'test')
  
  def suite():
      suite = unittest.makeSuite(TestDAF, 'test')
-    suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
+    suite.addTest(unittest.makeSuite(TestUCSCSubmission, 'test'))
      return suite
  
  if __name__ == "__main__":
      return suite
  
  if __name__ == "__main__":
diff --git a/htsworkflow/submission/test/test_results.py b/htsworkflow/submission/test/test_results.py

new file mode 100644 (file)

index 0000000..8579c58
--- /dev/null
+++ b/htsworkflow/submission/test/test_results.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+import copy
+import os
+from pprint import pprint
+import shutil
+import tempfile
+import unittest
+
+from htsworkflow.submission.results import ResultMap
+
+S1_NAME = '1000-sample'
+S2_NAME = '2000-sample'
+
+S1_FILES = [
+    os.path.join(S1_NAME, 'file1_l8_r1.fastq'),
+    os.path.join(S1_NAME, 'file1_l8_r2.fastq'),
+]
+
+S2_FILES = [
+    os.path.join(S2_NAME, 'file1.bam'),
+    os.path.join(S2_NAME, 'file1_l5.fastq'),
+]
+
+def generate_sample_results_tree(obj):
+    obj.tempdir = tempfile.mkdtemp(prefix="results_test")
+    obj.sourcedir = os.path.join(obj.tempdir, 'source')
+    obj.resultdir = os.path.join(obj.tempdir, 'results')
+
+    for d in [obj.sourcedir,
+              os.path.join(obj.sourcedir, S1_NAME),
+              os.path.join(obj.sourcedir, S2_NAME),
+              obj.resultdir]:
+        os.mkdir(os.path.join(obj.tempdir, d))
+
+    tomake = []
+    tomake.extend(S1_FILES)
+    tomake.extend(S2_FILES)
+    for f in tomake:
+        stream = open(os.path.join(obj.sourcedir, f), 'w')
+        stream.write(f)
+        stream.close()
+
+class TestResultMap(unittest.TestCase):
+    def setUp(self):
+        generate_sample_results_tree(self)
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+
+    def test_dict_like(self):
+        """Make sure the result map works like an ordered dictionary
+        """
+        results = ResultMap()
+        results.add_result('1000', 'dir1000')
+        results.add_result('2000', 'dir2000')
+        results.add_result('1500', 'dir1500')
+
+        self.failUnlessEqual(results.keys(), ['1000', '2000', '1500'])
+        self.failUnlessEqual(list(results.values()),
+                             ['dir1000', 'dir2000', 'dir1500'])
+        self.failUnlessEqual(list(results.items()),
+                             [('1000', 'dir1000'),
+                              ('2000', 'dir2000'),
+                              ('1500', 'dir1500')])
+
+        self.failUnlessEqual(results['1000'], 'dir1000')
+        self.failUnlessEqual(results['1500'], 'dir1500')
+        self.failUnlessEqual(results['2000'], 'dir2000')
+
+    def test_make_from(self):
+        results = ResultMap()
+        results.add_result('1000', S1_NAME)
+        results.add_result('2000', S2_NAME)
+
+        results.make_tree_from(self.sourcedir, self.resultdir)
+
+        sample1_dir = os.path.join(self.resultdir, S1_NAME)
+        sample2_dir = os.path.join(self.resultdir, S2_NAME)
+        self.failUnless(os.path.isdir(sample1_dir))
+        self.failUnless(os.path.isdir(sample2_dir))
+
+        for f in S1_FILES + S2_FILES:
+            self.failUnless(
+                os.path.islink(
+                    os.path.join(self.resultdir, f)))
+
+
+
+
+def suite():
+    suite = unittest.makeSuite(TestResultMap, 'test')
+    return suite
+
+if __name__ == "__main__":
+    unittest.main(defaultTest='suite')
+
author	Diane Trout <diane@caltech.edu>
	Mon, 2 Apr 2012 21:55:21 +0000 (14:55 -0700)
committer	Diane Trout <diane@caltech.edu>
	Wed, 11 Apr 2012 22:21:30 +0000 (15:21 -0700)
encode_submission/ucsc_gather.py		patch \| blob \| history
htsworkflow/submission/condorfastq.py		patch \| blob \| history
htsworkflow/submission/daf.py		patch \| blob \| history
htsworkflow/submission/results.py	[new file with mode: 0644]	patch \| blob
htsworkflow/submission/test/__init__.py	[new file with mode: 0644]	patch \| blob
htsworkflow/submission/test/test_condorfastq.py		patch \| blob \| history
htsworkflow/submission/test/test_daf.py		patch \| blob \| history
htsworkflow/submission/test/test_results.py	[new file with mode: 0644]	patch \| blob