From 5eba67d1e483b3b9abbb68ec3efdb1ef0e95bf46 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 21 Jan 2011 14:07:49 -0800 Subject: [PATCH] Add function to ucsc submission tool to make symlink tree. I needed a way to read data files from one tree and build archives in another. This also changed the condor scripts to tell tar to read the contents of the file being pointed at by the symlink. In addition there were slight additions and loosenings of the file name to ucsc view matching patterns. (As Georgi slightly changed his names.) --- extra/ucsc_encode_submission/ucsc_gather.py | 56 ++++++++++++++++----- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/extra/ucsc_encode_submission/ucsc_gather.py b/extra/ucsc_encode_submission/ucsc_gather.py index d160b21..4cea711 100755 --- a/extra/ucsc_encode_submission/ucsc_gather.py +++ b/extra/ucsc_encode_submission/ucsc_gather.py @@ -46,6 +46,9 @@ def main(cmdline=None): for a in args: library_result_map.extend(read_library_result_map(a)) + if opts.make_tree_from is not None: + make_tree_from(opts.make_tree_from, library_result_map) + if opts.daf is not None: link_daf(opts.daf, library_result_map) @@ -83,6 +86,9 @@ def make_parser(): parser = OptionParser() # commands + parser.add_option('--make-tree-from', + help="create directories & link data files", + default=None) parser.add_option('--fastq', help="generate scripts for making fastq files", default=False, action="store_true") @@ -113,6 +119,26 @@ def make_parser(): return parser +def make_tree_from(source_path, library_result_map): + """Create a tree using data files from source path. + """ + for lib_id, lib_path in library_result_map: + if not os.path.exists(lib_path): + logging.info("Making dir {0}".format(lib_path)) + os.mkdir(lib_path) + source_lib_dir = os.path.join(source_path, lib_path) + if os.path.exists(source_lib_dir): + pass + for filename in os.listdir(source_lib_dir): + source_pathname = os.path.join(source_lib_dir, filename) + target_pathname = os.path.join(lib_path, filename) + if not os.path.exists(source_pathname): + raise IOError("{0} does not exist".format(source_pathname)) + if not os.path.exists(target_pathname): + os.symlink(source_pathname, target_pathname) + logging.info( + 'LINK {0} to {1}'.format(source_pathname, target_pathname)) + def build_fastqs(host, apidata, sequences_path, library_result_map, force=False ): """ @@ -423,12 +449,14 @@ def make_condor_archive_script(ininame, files): script = """Universe = vanilla Executable = /bin/tar -arguments = czvf ../%(archivename)s %(filelist)s +arguments = czvhf ../%(archivename)s %(filelist)s Error = compress.err.$(Process).log Output = compress.out.$(Process).log Log = /tmp/submission-compress-%(user)s.log initialdir = %(initialdir)s +environment="GZIP=-3" +request_memory = 20 queue """ @@ -596,8 +624,8 @@ class NameToViewMap(object): self.patterns = [ ('*.bai', None), - ('*.bam', self._guess_bam_view), ('*.splices.bam', 'Splices'), + ('*.bam', self._guess_bam_view), ('junctions.bed', 'Junctions'), ('*.jnct', 'Junctions'), ('*.plus.bigwig', 'PlusSignal'), @@ -607,21 +635,24 @@ class NameToViewMap(object): ('*.condor', None), ('*.daf', None), ('*.ddf', None), - ('cufflinks-0.9.0-genes.expr', 'GeneDeNovo'), - ('cufflinks-0.9.0-transcripts.expr', 'TranscriptDeNovo'), - ('cufflinks-0.9.0-transcripts.gtf', 'GeneModel'), - ('GENCODE-v3c-genes.expr', 'GeneGencV3c'), - ('GENCODE-v3c-transcripts.expr', 'TranscriptGencV3c'), - ('GENCODE-v4-genes.expr', 'GeneGencV4'), - ('GENCODE-v4-transcripts.expr', 'TranscriptGencV4'), - ('GENCODE-v4-transcript.expr', 'TranscriptGencV4'), + ('*.?ufflinks-0.9.0?genes.expr', 'GeneDeNovo'), + ('*.?ufflinks-0.9.0?transcripts.expr', 'TranscriptDeNovo'), + ('*.?ufflinks-0.9.0?transcripts.gtf', 'GeneModel'), + ('*.GENCODE-v3c?genes.expr', 'GeneGencV3c'), + ('*.GENCODE-v3c?transcripts.expr', 'TranscriptGencV3c'), + ('*.GENCODE-v4?genes.expr', 'GeneGencV4'), + ('*.GENCODE-v4?transcripts.expr', 'TranscriptGencV4'), + ('*.GENCODE-v4?transcript.expr', 'TranscriptGencV4'), + ('*_1.75mers.fastq', 'FastqRd1'), + ('*_2.75mers.fastq', 'FastqRd2'), ('*_r1.fastq', 'FastqRd1'), ('*_r2.fastq', 'FastqRd2'), ('*.fastq', 'Fastq'), ('*.gtf', 'GeneModel'), ('*.ini', None), ('*.log', None), - ('*.stats.txt', 'InsLength'), + ('paired-end-distribution*', 'InsLength'), + ('*.stats.txt', 'InsLength'), ('*.srf', None), ('*.wig', None), ('*.zip', None), @@ -630,6 +661,7 @@ class NameToViewMap(object): self.views = { None: {"MapAlgorithm": "NA"}, "Paired": {"MapAlgorithm": ma}, + "Aligns": {"MapAlgorithm": ma}, "Single": {"MapAlgorithm": ma}, "Splices": {"MapAlgorithm": ma}, "Junctions": {"MapAlgorithm": ma}, @@ -695,7 +727,7 @@ class NameToViewMap(object): if is_paired: return "Paired" else: - return "Align" + return "Aligns" def _is_paired(self, lib_id, lib_info): -- 2.30.2