import RDF
+if not 'DJANGO_SETTINGS_MODULE' in os.environ:
+ os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
+
from htsworkflow.util import api
from htsworkflow.util.rdfhelp import \
dafTermOntology, \
sparql_query, \
submissionOntology
from htsworkflow.submission.daf import \
- DAFMapper, \
+ UCSCSubmission, \
MetadataLookupException, \
get_submission_uri
+from htsworkflow.submission.results import ResultMap
from htsworkflow.submission.condorfastq import CondorFastqExtract
logger = logging.getLogger('ucsc_gather')
+TAR = '/bin/tar'
+LFTP = '/usr/bin/lftp'
+
def main(cmdline=None):
parser = make_parser()
opts, args = parser.parse_args(cmdline)
submission_uri = None
+ global TAR
+ global LFTP
+ TAR = opts.tar
+ LFTP = opts.lftp
+
if opts.debug:
logging.basicConfig(level = logging.DEBUG )
elif opts.verbose:
apidata = api.make_auth_from_opts(opts, parser)
model = get_model(opts.model, opts.db_path)
+ mapper = None
if opts.name:
- mapper = DAFMapper(opts.name, opts.daf, model)
+ mapper = UCSCSubmission(opts.name, opts.daf, model)
if opts.library_url is not None:
mapper.library_url = opts.library_url
submission_uri = get_submission_uri(opts.name)
if opts.make_ddf and opts.daf is None:
parser.error("Please specify your daf when making ddf files")
- library_result_map = []
+ results = ResultMap()
for a in args:
- library_result_map.extend(read_library_result_map(a))
+ results.add_results_from_file(a)
if opts.make_tree_from is not None:
- make_tree_from(opts.make_tree_from, library_result_map)
+ results.make_tree_from(opts.make_tree_from)
if opts.link_daf:
- if opts.daf is None:
- parser.error("Please specify daf filename with --daf")
- link_daf(opts.daf, library_result_map)
+ if mapper is None:
+ parser.error("Specify a submission model")
+ if mapper.daf is None:
+ parser.error("Please load a daf first")
+ mapper.link_daf(results)
if opts.fastq:
- extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
+ flowcells = os.path.join(opts.sequence, 'flowcells')
+ extractor = CondorFastqExtract(opts.host, flowcells,
force=opts.force)
- extractor.create_scripts(library_result_map)
+ extractor.create_scripts(results)
if opts.scan_submission:
- scan_submission_dirs(mapper, library_result_map)
+ mapper.scan_submission_dirs(results)
if opts.make_ddf:
- make_all_ddfs(mapper, library_result_map, opts.daf, force=opts.force)
+ if not os.path.exists(TAR):
+ parser.error("%s does not exist, please specify --tar" % (TAR,))
+ if not os.path.exists(LFTP):
+ parser.error("%s does not exist, please specify --lftp" % (LFTP,))
+ make_all_ddfs(mapper, results, opts.daf, force=opts.force)
if opts.zip_ddf:
- zip_ddfs(mapper, library_result_map, opts.daf)
+ zip_ddfs(mapper, results, opts.daf)
if opts.sparql:
sparql_query(model, opts.sparql)
model.add_option('--sparql', default=None, help="execute sparql query")
model.add_option('--print-rdf', action="store_true", default=False,
help="print ending model state")
+ model.add_option('--tar', default=TAR,
+ help="override path to tar command")
+ model.add_option('--lftp', default=LFTP,
+ help="override path to lftp command")
parser.add_option_group(model)
# commands
commands = OptionGroup(parser, 'commands')
return parser
-def make_tree_from(source_path, library_result_map):
- """Create a tree using data files from source path.
- """
- for lib_id, lib_path in library_result_map:
- if not os.path.exists(lib_path):
- logger.info("Making dir {0}".format(lib_path))
- os.mkdir(lib_path)
- source_lib_dir = os.path.abspath(os.path.join(source_path, lib_path))
- if os.path.exists(source_lib_dir):
- pass
- for filename in os.listdir(source_lib_dir):
- source_pathname = os.path.join(source_lib_dir, filename)
- target_pathname = os.path.join(lib_path, filename)
- if not os.path.exists(source_pathname):
- raise IOError("{0} does not exist".format(source_pathname))
- if not os.path.exists(target_pathname):
- os.symlink(source_pathname, target_pathname)
- logger.info(
- 'LINK {0} to {1}'.format(source_pathname, target_pathname))
-
-
-def link_daf(daf_path, library_result_map):
- if not os.path.exists(daf_path):
- raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
-
- base_daf = os.path.basename(daf_path)
-
- for lib_id, result_dir in library_result_map:
- if not os.path.exists(result_dir):
- raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
- submission_daf = os.path.join(result_dir, base_daf)
- if not os.path.exists(submission_daf):
- if not os.path.exists(daf_path):
- raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
- os.link(daf_path, submission_daf)
-
-
-def scan_submission_dirs(view_map, library_result_map):
- """Look through our submission directories and collect needed information
- """
- for lib_id, result_dir in library_result_map:
- logger.info("Importing %s from %s" % (lib_id, result_dir))
- try:
- view_map.import_submission_dir(result_dir, lib_id)
- except MetadataLookupException, e:
- logger.error("Skipping %s: %s" % (lib_id, str(e)))
-
def make_all_ddfs(view_map, library_result_map, daf_name, make_condor=True, force=False):
dag_fragment = []
- for lib_id, result_dir in library_result_map:
+ for lib_id, result_dir in library_result_map.items():
submissionNode = view_map.get_submission_node(result_dir)
dag_fragment.extend(
make_ddf(view_map, submissionNode, daf_name, make_condor, result_dir)
os.chdir(rootdir)
-def read_library_result_map(filename):
- """
- Read a file that maps library id to result directory.
- Does not support spaces in filenames.
-
- For example:
- 10000 result/foo/bar
- """
- stream = open(filename,'r')
-
- results = []
- for line in stream:
- line = line.rstrip()
- if not line.startswith('#') and len(line) > 0 :
- library_id, result_dir = line.split()
- results.append((library_id, result_dir))
- return results
-
-
def make_condor_archive_script(name, files, outdir=None):
script = """Universe = vanilla
-Executable = /bin/tar
+Executable = %(tar)s
arguments = czvhf ../%(archivename)s %(filelist)s
Error = compress.out.$(Process).log
context = {'archivename': make_submission_name(name),
'filelist': " ".join(files),
'initialdir': os.path.abspath(outdir),
- 'user': os.getlogin()}
+ 'user': os.getlogin(),
+ 'tar': TAR}
condor_script = os.path.join(outdir, make_condor_name(name, 'archive'))
condor_stream = open(condor_script,'w')
return condor_script
-def make_condor_upload_script(name, outdir=None):
+def make_condor_upload_script(name, lftp, outdir=None):
script = """Universe = vanilla
-Executable = /usr/bin/lftp
-arguments = -c put ../%(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
+Executable = %(lftp)s
+arguments = -c put %(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
Error = upload.out.$(Process).log
Output = upload.out.$(Process).log
'user': os.getlogin(),
'ftpuser': ftpuser,
'ftppassword': ftppassword,
- 'ftphost': encodeftp}
+ 'ftphost': encodeftp,
+ 'lftp': LFTP}
condor_script = os.path.join(outdir, make_condor_name(name, 'upload'))
condor_stream = open(condor_script,'w')
return fragments
-def get_library_info(host, apidata, library_id):
- url = api.library_url(host, library_id)
- contents = api.retrieve_info(url, apidata)
- return contents
-
-
def make_base_name(pathname):
base = os.path.basename(pathname)
name, ext = os.path.splitext(base)