extra/ucsc_encode_submission/ucsc_gather.py

   1 #!/usr/bin/env python
   2 from ConfigParser import SafeConfigParser
   3 from glob import glob
   4 import json
   5 import logging
   6 from optparse import OptionParser
   7 import os
   8 from pprint import pprint, pformat
   9 import shlex
  10 from StringIO import StringIO
  11 import time
  12 import sys
  13 import types
  14 import urllib
  15 import urllib2
  16 import urlparse
  17
  18 from htsworkflow.util import api
  19 from htsworkflow.pipelines.sequences import \
  20     create_sequence_table, \
  21     scan_for_sequences
  22
  23 def main(cmdline=None):
  24     parser = make_parser()
  25     opts, args = parser.parse_args(cmdline)
  26
  27     if opts.debug:
  28         logging.basicConfig(level = logging.DEBUG )
  29     elif opts.verbose:
  30         logging.basicConfig(level = logging.INFO )
  31     else:
  32         logging.basicConfig(level = logging.WARNING )
  33
  34     apidata = {'apiid': opts.apiid, 'apikey': opts.apikey }
  35
  36     if opts.host is None or opts.apiid is None or opts.apikey is None:
  37         parser.error("Please specify host url, apiid, apikey")
  38
  39     if len(args) == 0:
  40         parser.error("I need at least one library submission-dir input file")
  41
  42     library_result_map = []
  43     for a in args:
  44         library_result_map.extend(read_library_result_map(a))
  45
  46     if opts.daf is not None:
  47         link_daf(opts.daf, library_result_map)
  48
  49     if opts.fastq:
  50         build_fastqs(opts.host,
  51                      apidata,
  52                      opts.sequence,
  53                      library_result_map,
  54                      force=opts.force)
  55
  56     if opts.ini:
  57         make_submission_ini(opts.host, apidata, library_result_map)
  58
  59     if opts.makeddf:
  60         make_all_ddfs(library_result_map, opts.daf)
  61
  62
  63 def make_parser():
  64     # Load defaults from the config files
  65     config = SafeConfigParser()
  66     config.read([os.path.expanduser('~/.htsworkflow.ini'), '/etc/htsworkflow.ini'])
  67
  68     sequence_archive = None
  69     apiid = None
  70     apikey = None
  71     apihost = None
  72     SECTION = 'sequence_archive'
  73     if config.has_section(SECTION):
  74         sequence_archive = config.get(SECTION, 'sequence_archive',sequence_archive)
  75         sequence_archive = os.path.expanduser(sequence_archive)
  76         apiid = config.get(SECTION, 'apiid', apiid)
  77         apikey = config.get(SECTION, 'apikey', apikey)
  78         apihost = config.get(SECTION, 'host', apihost)
  79
  80     parser = OptionParser()
  81
  82     # commands
  83     parser.add_option('--fastq', help="generate scripts for making fastq files",
  84                       default=False, action="store_true")
  85
  86     parser.add_option('--ini', help="generate submission ini file", default=False,
  87                       action="store_true")
  88
  89     parser.add_option('--makeddf', help='make the ddfs', default=False,
  90                       action="store_true")
  91
  92     parser.add_option('--daf', default=None, help='specify daf name')
  93     parser.add_option('--force', default=False, action="store_true",
  94                       help="Force regenerating fastqs")
  95
  96     # configuration options
  97     parser.add_option('--apiid', default=apiid, help="Specify API ID")
  98     parser.add_option('--apikey', default=apikey, help="Specify API KEY")
  99     parser.add_option('--host',  default=apihost,
 100                       help="specify HTSWorkflow host",)
 101     parser.add_option('--sequence', default=sequence_archive,
 102                       help="sequence repository")
 103
 104     # debugging
 105     parser.add_option('--verbose', default=False, action="store_true",
 106                       help='verbose logging')
 107     parser.add_option('--debug', default=False, action="store_true",
 108                       help='debug logging')
 109
 110     return parser
 111
 112
 113 def build_fastqs(host, apidata, sequences_path, library_result_map,
 114                  force=False ):
 115     """
 116     Generate condor scripts to build any needed fastq files
 117
 118     Args:
 119       host (str): root of the htsworkflow api server
 120       apidata (dict): id & key to post to the server
 121       sequences_path (str): root of the directory tree to scan for files
 122       library_result_map (list):  [(library_id, destination directory), ...]
 123     """
 124     qseq_condor_header = """
 125 Universe=vanilla
 126 executable=/woldlab/rattus/lvol0/mus/home/diane/proj/solexa/gaworkflow/scripts/qseq2fastq
 127 error=log/qseq2fastq.err.$(process).log
 128 output=log/qseq2fastq.out.$(process).log
 129 log=log/qseq2fastq.log
 130
 131 """
 132     qseq_condor_entries = []
 133     srf_condor_header = """
 134 Universe=vanilla
 135 executable=/woldlab/rattus/lvol0/mus/home/diane/proj/solexa/gaworkflow/scripts/srf2fastq
 136 output=log/srf_pair_fastq.out.$(process).log
 137 error=log/srf_pair_fastq.err.$(process).log
 138 log=log/srf_pair_fastq.log
 139 environment="PYTHONPATH=/home/diane/lib/python2.6/site-packages:/home/diane/proj/solexa/gaworkflow PATH=/woldlab/rattus/lvol0/mus/home/diane/bin:/usr/bin:/bin"
 140
 141 """
 142     srf_condor_entries = []
 143     lib_db = find_archive_sequence_files(host,
 144                                          apidata,
 145                                          sequences_path,
 146                                          library_result_map)
 147
 148     needed_targets = find_missing_targets(library_result_map, lib_db, force)
 149
 150     for target_pathname, available_sources in needed_targets.items():
 151         logging.debug(' target : %s' % (target_pathname,))
 152         logging.debug(' candidate sources: %s' % (available_sources,))
 153         if available_sources.has_key('qseq'):
 154             source = available_sources['qseq']
 155             qseq_condor_entries.append(
 156                 condor_qseq_to_fastq(source.path,
 157                                      target_pathname,
 158                                      source.flowcell,
 159                                      force=force)
 160             )
 161         elif available_sources.has_key('srf'):
 162             source = available_sources['srf']
 163             mid = getattr(source, 'mid_point', None)
 164             srf_condor_entries.append(
 165                 condor_srf_to_fastq(source.path,
 166                                     target_pathname,
 167                                     source.paired,
 168                                     source.flowcell,
 169                                     mid,
 170                                     force=force)
 171             )
 172         else:
 173             print " need file", target_pathname
 174
 175     if len(srf_condor_entries) > 0:
 176         make_submit_script('srf.fastq.condor',
 177                            srf_condor_header,
 178                            srf_condor_entries)
 179
 180     if len(qseq_condor_entries) > 0:
 181         make_submit_script('qseq.fastq.condor',
 182                            qseq_condor_header,
 183                            qseq_condor_entries)
 184
 185
 186 def find_missing_targets(library_result_map, lib_db, force=False):
 187     """
 188     Check if the sequence file exists.
 189     This requires computing what the sequence name is and checking
 190     to see if it can be found in the sequence location.
 191
 192     Adds seq.paired flag to sequences listed in lib_db[*]['lanes']
 193     """
 194     fastq_paired_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s_r%(read)s.fastq'
 195     fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
 196     # find what targets we're missing
 197     needed_targets = {}
 198     for lib_id, result_dir in library_result_map:
 199         lib = lib_db[lib_id]
 200         lane_dict = make_lane_dict(lib_db, lib_id)
 201
 202         for lane_key, sequences in lib['lanes'].items():
 203             for seq in sequences:
 204                 seq.paired = lane_dict[seq.flowcell]['paired_end']
 205                 lane_status = lane_dict[seq.flowcell]['status']
 206
 207                 if seq.paired and seq.read is None:
 208                     seq.read = 1
 209                 filename_attributes = {
 210                     'flowcell': seq.flowcell,
 211                     'lib_id': lib_id,
 212                     'lane': seq.lane,
 213                     'read': seq.read,
 214                     'cycle': seq.cycle
 215                     }
 216                 # skip bad runs
 217                 if lane_status == 'Failed':
 218                     continue
 219                 if seq.flowcell == '30DY0AAXX':
 220                     # 30DY0 only ran for 151 bases instead of 152
 221                     # it is actually 76 1st read, 75 2nd read
 222                     seq.mid_point = 76
 223
 224                 # end filters
 225                 if seq.paired:
 226                     target_name = fastq_paired_template % filename_attributes
 227                 else:
 228                     target_name = fastq_single_template % filename_attributes
 229
 230                 target_pathname = os.path.join(result_dir, target_name)
 231                 if force or not os.path.exists(target_pathname):
 232                     t = needed_targets.setdefault(target_pathname, {})
 233                     t[seq.filetype] = seq
 234
 235     return needed_targets
 236
 237
 238 def link_daf(daf_path, library_result_map):
 239     if not os.path.exists(daf_path):
 240         raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
 241
 242     base_daf = os.path.basename(daf_path)
 243
 244     for lib_id, result_dir in library_result_map:
 245         submission_daf = os.path.join(result_dir, base_daf)
 246         if not os.path.exists(submission_daf):
 247             os.link(daf_path, submission_daf)
 248
 249
 250 def make_submission_ini(host, apidata, library_result_map, paired=True):
 251     # ma is "map algorithm"
 252     ma = 'TH1014'
 253
 254     if paired:
 255         aligns = "Paired"
 256     else:
 257         aligns = "Aligns"
 258
 259     attributes = {
 260       # bam index
 261      '.bai':                   {'view': None, 'MapAlgorithm': 'NA'},
 262      '.bam':                   {'view': aligns, 'MapAlgorithm': ma},
 263      '.splices.bam':           {'view': 'Splices', 'MapAlgorithm': ma},
 264      '.jnct':                  {'view': 'Junctions', 'MapAlgorithm': ma},
 265      '.plus.bigwig':           {'view': 'PlusSignal', 'MapAlgorithm': ma},
 266      '.minus.bigwig':          {'view': 'MinusSignal', 'MapAlgorithm': ma},
 267      '.bigwig':                {'view': 'Signal', 'MapAlgorithm': ma},
 268      '.tar.bz2':               {'view': None},
 269      '.condor':                {'view': None},
 270      '.daf':                   {'view': None},
 271      '.ddf':                   {'view': None},
 272      'denovo.genes.expr':       {'view': 'GeneDeNovo', 'MapAlgorithm': ma},
 273      'denovo.transcripts.expr': {'view': 'TranscriptDeNovo', 'MapAlgorithm': ma},
 274      'novel.genes.expr':       {'view': 'GeneDeNovo', 'MapAlgorithm': ma},
 275      'novel.transcripts.expr': {'view': 'TranscriptDeNovo', 'MapAlgorithm': ma},
 276      '.genes.expr':            {'view': 'GeneFPKM', 'MapAlgorithm': ma},
 277      '.transcripts.expr':      {'view': 'TranscriptFPKM', 'MapAlgorithm': ma},
 278      '.transcript.expr':       {'view': 'TranscriptFPKM', 'MapAlgorithm': ma},
 279      '.fastq':                 {'view': 'Fastq', 'MapAlgorithm': 'NA' },
 280      '_r1.fastq':              {'view': 'FastqRd1', 'MapAlgorithm': 'NA'},
 281      '_r2.fastq':              {'view': 'FastqRd2', 'MapAlgorithm': 'NA'},
 282      '.gtf':                   {'view': 'GeneModel', 'MapAlgorithm': ma},
 283      '.ini':                   {'view': None},
 284      '.log':                   {'view': None},
 285      '.stats.txt':             {'view': 'InsLength', 'MapAlgorithm': ma},
 286      '.srf':                   {'view': None},
 287      '.wig':                   {'view': None},
 288      '.zip':                   {'view': None},
 289     }
 290
 291     candidate_fastq_src = {}
 292
 293     for lib_id, result_dir in library_result_map:
 294         order_by = ['order_by=files', 'view', 'replicate', 'cell',
 295                     'readType', 'mapAlgorithm', 'insertLength' ]
 296         inifile =  ['[config]']
 297         inifile += [" ".join(order_by)]
 298         inifile += ['']
 299         line_counter = 1
 300         lib_info = get_library_info(host, apidata, lib_id)
 301         result_ini = os.path.join(result_dir, result_dir+'.ini')
 302
 303         if lib_info['cell_line'].lower() == 'unknown':
 304             logging.warn("Library %s missing cell_line" % (lib_id,))
 305
 306         standard_attributes = {'cell': lib_info['cell_line'],
 307                                'replicate': lib_info['replicate'],
 308                                }
 309         if paired:
 310             if lib_info['insert_size'] is None:
 311                 errmsg = "Library %s is missing insert_size, assuming 200"
 312                 logging.warn(errmsg % (lib_id,))
 313                 insert_size = 200
 314             else:
 315                 insert_size = lib_info['insert_size']
 316             standard_attributes['insertLength'] = insert_size
 317             standard_attributes['readType'] = '2x75'
 318         else:
 319             standard_attributes['insertLength'] = 'ilNA'
 320             standard_attributes['readType'] = '1x75D'
 321
 322         # write other lines
 323         submission_files = os.listdir(result_dir)
 324         fastqs = {}
 325         for f in submission_files:
 326             best_ext = find_best_extension(attributes, f)
 327
 328             if best_ext is not None:
 329                if attributes[best_ext]['view'] is None:
 330
 331                    continue
 332                elif best_ext.endswith('fastq'):
 333                    fastqs.setdefault(best_ext, set()).add(f)
 334                else:
 335                    inifile.extend(
 336                        make_submission_section(line_counter,
 337                                                [f],
 338                                                standard_attributes,
 339                                                attributes[best_ext]
 340                                                )
 341                        )
 342                    inifile += ['']
 343                    line_counter += 1
 344             else:
 345                 raise ValueError("Unrecognized file: %s" % (f,))
 346
 347         # add in fastqs on a single line.
 348         for extension, fastq_set in fastqs.items():
 349             inifile.extend(
 350                 make_submission_section(line_counter,
 351                                         fastq_set,
 352                                         standard_attributes,
 353                                         attributes[extension])
 354             )
 355             inifile += ['']
 356             line_counter += 1
 357
 358         f = open(result_ini,'w')
 359         f.write(os.linesep.join(inifile))
 360
 361
 362 def make_lane_dict(lib_db, lib_id):
 363     """
 364     Convert the lane_set in a lib_db to a dictionary
 365     indexed by flowcell ID
 366     """
 367     result = []
 368     for lane in lib_db[lib_id]['lane_set']:
 369         result.append((lane['flowcell'], lane))
 370     return dict(result)
 371
 372
 373 def make_all_ddfs(library_result_map, daf_name, make_condor=True):
 374     dag_fragment = []
 375     for lib_id, result_dir in library_result_map:
 376         ininame = result_dir+'.ini'
 377         inipathname = os.path.join(result_dir, ininame)
 378         if os.path.exists(inipathname):
 379             dag_fragment.extend(
 380                 make_ddf(ininame, daf_name, True, make_condor, result_dir)
 381             )
 382
 383     if make_condor and len(dag_fragment) > 0:
 384         dag_filename = 'submission.dagman'
 385         if os.path.exists(dag_filename):
 386             logging.warn("%s exists, please delete" % (dag_filename,))
 387         else:
 388             f = open(dag_filename,'w')
 389             f.write( os.linesep.join(dag_fragment))
 390             f.write( os.linesep )
 391             f.close()
 392
 393
 394 def make_ddf(ininame,  daf_name, guess_ddf=False, make_condor=False, outdir=None):
 395     """
 396     Make ddf files, and bonus condor file
 397     """
 398     dag_fragments = []
 399     curdir = os.getcwd()
 400     if outdir is not None:
 401         os.chdir(outdir)
 402     output = sys.stdout
 403     ddf_name = None
 404     if guess_ddf:
 405         ddf_name = make_ddf_name(ininame)
 406         print ddf_name
 407         output = open(ddf_name,'w')
 408
 409     file_list = read_ddf_ini(ininame, output)
 410
 411     file_list.append(daf_name)
 412     if ddf_name is not None:
 413         file_list.append(ddf_name)
 414
 415     if make_condor:
 416         archive_condor = make_condor_archive_script(ininame, file_list)
 417         upload_condor = make_condor_upload_script(ininame)
 418
 419         dag_fragments.extend(
 420             make_dag_fragment(ininame, archive_condor, upload_condor)
 421         )
 422
 423     os.chdir(curdir)
 424
 425     return dag_fragments
 426
 427
 428 def read_ddf_ini(filename, output=sys.stdout):
 429     """
 430     Read a ini file and dump out a tab delmited text file
 431     """
 432     file_list = []
 433     config = SafeConfigParser()
 434     config.read(filename)
 435
 436     order_by = shlex.split(config.get("config", "order_by"))
 437
 438     output.write("\t".join(order_by))
 439     output.write(os.linesep)
 440     sections = config.sections()
 441     sections.sort()
 442     for section in sections:
 443         if section == "config":
 444             # skip the config block
 445             continue
 446         values = []
 447         for key in order_by:
 448             v = config.get(section, key)
 449             values.append(v)
 450             if key == 'files':
 451                 file_list.extend(parse_filelist(v))
 452
 453         output.write("\t".join(values))
 454         output.write(os.linesep)
 455     return file_list
 456
 457
 458 def read_library_result_map(filename):
 459     """
 460     Read a file that maps library id to result directory.
 461     Does not support spaces in filenames.
 462
 463     For example:
 464       10000 result/foo/bar
 465     """
 466     stream = open(filename,'r')
 467
 468     results = []
 469     for line in stream:
 470         line = line.rstrip()
 471         if not line.startswith('#') and len(line) > 0 :
 472             library_id, result_dir = line.split()
 473             results.append((library_id, result_dir))
 474     return results
 475
 476
 477 def make_condor_archive_script(ininame, files):
 478     script = """Universe = vanilla
 479
 480 Executable = /bin/tar
 481 arguments = czvf ../%(archivename)s %(filelist)s
 482
 483 Error = compress.err.$(Process).log
 484 Output = compress.out.$(Process).log
 485 Log = /tmp/submission-compress.log
 486 initialdir = %(initialdir)s
 487
 488 queue
 489 """
 490     for f in files:
 491         if not os.path.exists(f):
 492             raise RuntimeError("Missing %s" % (f,))
 493
 494     context = {'archivename': make_submission_name(ininame),
 495                'filelist': " ".join(files),
 496                'initialdir': os.getcwd()}
 497
 498     condor_script = make_condor_name(ininame, 'archive')
 499     condor_stream = open(condor_script,'w')
 500     condor_stream.write(script % context)
 501     condor_stream.close()
 502     return condor_script
 503
 504
 505 def make_condor_upload_script(ininame):
 506     script = """Universe = vanilla
 507
 508 Executable = /usr/bin/lftp
 509 arguments = -c put ../%(archivename)s -o ftp://detrout@encodeftp.cse.ucsc.edu/
 510
 511 Error = upload.err.$(Process).log
 512 Output = upload.out.$(Process).log
 513 Log = /tmp/submission-upload.log
 514 initialdir = %(initialdir)s
 515
 516 queue
 517 """
 518     context = {'archivename': make_submission_name(ininame),
 519                'initialdir': os.getcwd()}
 520
 521     condor_script = make_condor_name(ininame, 'upload')
 522     condor_stream = open(condor_script,'w')
 523     condor_stream.write(script % context)
 524     condor_stream.close()
 525     return condor_script
 526
 527
 528 def make_dag_fragment(ininame, archive_condor, upload_condor):
 529     """
 530     Make the couple of fragments compress and then upload the data.
 531     """
 532     cur_dir = os.getcwd()
 533     archive_condor = os.path.join(cur_dir, archive_condor)
 534     upload_condor = os.path.join(cur_dir, upload_condor)
 535     job_basename = make_base_name(ininame)
 536
 537     fragments = []
 538     fragments.append('JOB %s_archive %s' % (job_basename, archive_condor))
 539     fragments.append('JOB %s_upload %s' % (job_basename,  upload_condor))
 540     fragments.append('PARENT %s_archive CHILD %s_upload' % (job_basename, job_basename))
 541
 542     return fragments
 543
 544
 545 def get_library_info(host, apidata, library_id):
 546     url = api.library_url(host, library_id)
 547     contents = api.retrieve_info(url, apidata)
 548     return contents
 549
 550
 551 def condor_srf_to_fastq(srf_file, target_pathname, paired, flowcell=None,
 552                         mid=None, force=False):
 553     args = [ srf_file, ]
 554     if paired:
 555         args.extend(['--left', target_pathname])
 556         # this is ugly. I did it because I was pregenerating the target
 557         # names before I tried to figure out what sources could generate
 558         # those targets, and everything up to this point had been
 559         # one-to-one. So I couldn't figure out how to pair the
 560         # target names.
 561         # With this at least the command will run correctly.
 562         # however if we rename the default targets, this'll break
 563         # also I think it'll generate it twice.
 564         args.extend(['--right',
 565                      target_pathname.replace('_r1.fastq', '_r2.fastq')])
 566     else:
 567         args.extend(['--single', target_pathname ])
 568     if flowcell is not None:
 569         args.extend(['--flowcell', flowcell])
 570
 571     if mid is not None:
 572         args.extend(['-m', str(mid)])
 573
 574     if force:
 575         args.extend(['--force'])
 576
 577     script = """
 578 arguments="%s"
 579 queue
 580 """ % (" ".join(args),)
 581
 582     return  script
 583
 584
 585 def condor_qseq_to_fastq(qseq_file, target_pathname, flowcell=None, force=False):
 586     args = ['-i', qseq_file, '-o', target_pathname ]
 587     if flowcell is not None:
 588         args.extend(['-f', flowcell])
 589     script = """
 590 arguments="%s"
 591 queue
 592 """ % (" ".join(args))
 593
 594     return script
 595
 596 def find_archive_sequence_files(host, apidata, sequences_path,
 597                                 library_result_map):
 598     """
 599     Find all the archive sequence files possibly associated with our results.
 600
 601     """
 602     logging.debug("Searching for sequence files in: %s" %(sequences_path,))
 603
 604     lib_db = {}
 605     seq_dirs = set()
 606     #seq_dirs = set(os.path.join(sequences_path, 'srfs'))
 607     candidate_lanes = {}
 608     for lib_id, result_dir in library_result_map:
 609         lib_info = get_library_info(host, apidata, lib_id)
 610         lib_db[lib_id] = lib_info
 611
 612         for lane in lib_info['lane_set']:
 613             lane_key = (lane['flowcell'], lane['lane_number'])
 614             candidate_lanes[lane_key] = lib_id
 615             seq_dirs.add(os.path.join(sequences_path,
 616                                          'flowcells',
 617                                          lane['flowcell']))
 618     logging.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
 619     candidate_seq_list = scan_for_sequences(seq_dirs)
 620
 621     # at this point we have too many sequences as scan_for_sequences
 622     # returns all the sequences in a flowcell directory
 623     # so lets filter out the extras
 624
 625     for seq in candidate_seq_list:
 626         lane_key = (seq.flowcell, seq.lane)
 627         lib_id = candidate_lanes.get(lane_key, None)
 628         if lib_id is not None:
 629             lib_info = lib_db[lib_id]
 630             lanes = lib_info.setdefault('lanes', {})
 631             lanes.setdefault(lane_key, set()).add(seq)
 632
 633     return lib_db
 634
 635
 636 def find_best_extension(extension_map, filename):
 637     """
 638     Search through extension_map looking for the best extension
 639     The 'best' is the longest match
 640
 641     :Args:
 642       extension_map (dict): '.ext' -> { 'view': 'name' or None }
 643       filename (str): the filename whose extention we are about to examine
 644     """
 645     best_ext = None
 646     path, last_ext = os.path.splitext(filename)
 647
 648     for ext in extension_map.keys():
 649         if filename.endswith(ext):
 650             if best_ext is None:
 651                 best_ext = ext
 652             elif len(ext) > len(best_ext):
 653                 best_ext = ext
 654     return best_ext
 655
 656
 657 def make_submission_section(line_counter, files, standard_attributes, file_attributes):
 658     """
 659     Create a section in the submission ini file
 660     """
 661     inifile = [ '[line%s]' % (line_counter,) ]
 662     inifile += ["files=%s" % (",".join(files))]
 663     cur_attributes = {}
 664     cur_attributes.update(standard_attributes)
 665     cur_attributes.update(file_attributes)
 666
 667     for k,v in cur_attributes.items():
 668         inifile += ["%s=%s" % (k,v)]
 669     return inifile
 670
 671
 672 def make_base_name(pathname):
 673     base = os.path.basename(pathname)
 674     name, ext = os.path.splitext(base)
 675     return name
 676
 677
 678 def make_submission_name(ininame):
 679     name = make_base_name(ininame)
 680     return name + '.tgz'
 681
 682
 683 def make_ddf_name(pathname):
 684     name = make_base_name(pathname)
 685     return name + '.ddf'
 686
 687
 688 def make_condor_name(pathname, run_type=None):
 689     name = make_base_name(pathname)
 690     elements = [name]
 691     if run_type is not None:
 692         elements.append(run_type)
 693     elements.append('condor')
 694     return ".".join(elements)
 695
 696
 697 def make_submit_script(target, header, body_list):
 698     """
 699     write out a text file
 700
 701     this was intended for condor submit scripts
 702
 703     Args:
 704       target (str or stream):
 705         if target is a string, we will open and close the file
 706         if target is a stream, the caller is responsible.
 707
 708       header (str);
 709         header to write at the beginning of the file
 710       body_list (list of strs):
 711         a list of blocks to add to the file.
 712     """
 713     if type(target) in types.StringTypes:
 714         f = open(target,'w')
 715     else:
 716         f = target
 717     f.write(header)
 718     for entry in body_list:
 719         f.write(entry)
 720     if type(target) in types.StringTypes:
 721         f.close()
 722
 723 def parse_filelist(file_string):
 724     return file_string.split(',')
 725
 726
 727 def validate_filelist(files):
 728     """
 729     Die if a file doesn't exist in a file list
 730     """
 731     for f in files:
 732         if not os.path.exists(f):
 733             raise RuntimeError("%s does not exist" % (f,))
 734
 735
 736 if __name__ == "__main__":
 737     main()