From: Diane Trout Date: Mon, 8 Jul 2013 22:02:42 +0000 (-0700) Subject: Update test to work with species -> species_name rename. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=23b5117c641c50d397fd46c1d567558466766559 Update test to work with species -> species_name rename. --- diff --git a/encode_submission/encode3.py b/encode_submission/encode3.py new file mode 100644 index 0000000..197d7f3 --- /dev/null +++ b/encode_submission/encode3.py @@ -0,0 +1,182 @@ +"""Create a track hub +""" + +#!/usr/bin/env python +from ConfigParser import SafeConfigParser +import fnmatch +from glob import glob +import json +import logging +import netrc +from optparse import OptionParser, OptionGroup +import os +from pprint import pprint, pformat +import shlex +from StringIO import StringIO +import stat +import sys +import time +import types +import urllib +import urllib2 +import urlparse +from zipfile import ZipFile + +import RDF + +if not 'DJANGO_SETTINGS_MODULE' in os.environ: + os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings' + +from htsworkflow.util import api +from htsworkflow.util.rdfhelp import \ + dafTermOntology, \ + fromTypedNode, \ + get_model, \ + get_serializer, \ + load_into_model, \ + sparql_query, \ + submissionOntology +from htsworkflow.submission.daf import get_submission_uri +from htsworkflow.submission.results import ResultMap +from htsworkflow.submission.trackhub import TrackHubSubmission +from htsworkflow.submission.condorfastq import CondorFastqExtract + +logger = logging.getLogger(__name__) + +def main(cmdline=None): + parser = make_parser() + opts, args = parser.parse_args(cmdline) + submission_uri = None + + if opts.debug: + logging.basicConfig(level = logging.DEBUG ) + elif opts.verbose: + logging.basicConfig(level = logging.INFO ) + else: + logging.basicConfig(level = logging.WARNING ) + + apidata = api.make_auth_from_opts(opts, parser) + + model = get_model(opts.model, opts.db_path) + mapper = None + if opts.name: + mapper = TrackHubSubmission(opts.name, model, host=opts.host) + submission_uri = get_submission_uri(opts.name) + + + if opts.load_rdf is not None: + if submission_uri is None: + parser.error("Please specify the submission name") + load_into_model(model, 'turtle', opts.load_rdf, submission_uri) + + results = ResultMap() + for a in args: + if os.path.exists(a): + results.add_results_from_file(a) + else: + logger.warn("File %s doesn't exist.", a) + + if opts.make_link_tree_from is not None: + results.make_tree_from(opts.make_link_tree_from, link=True) + + if opts.copy_tree_from is not None: + results.make_tree_from(opts.copy_tree_from, link=False) + + if opts.fastq: + logger.info("Building fastq extraction scripts") + flowcells = os.path.join(opts.sequence, 'flowcells') + extractor = CondorFastqExtract(opts.host, flowcells, + model=opts.model, + force=opts.force) + extractor.create_scripts(results) + + if opts.scan_submission: + if opts.name is None: + parser.error("Please define a submission name") + mapper.scan_submission_dirs(results) + + if opts.make_hub: + make_hub(mapper, results, opts.make_hub) + + if opts.make_manifest: + make_manifest(mapper, results, opts.make_manifest) + + if opts.sparql: + sparql_query(model, opts.sparql) + + if opts.print_rdf: + writer = get_serializer() + print writer.serialize_model_to_string(model) + + +def make_hub(mapper, results, filename=None): + trackdb = mapper.make_hub(results) + + if filename is None or filename == '-': + sys.stdout.write(trackdb) + else: + with open('trackDb.txt', 'w') as trackstream: + trackstream.write(trackdb) + +def make_manifest(mapper, results, filename=None): + manifest = mapper.make_manifest(results) + + if filename is None or filename == '-': + sys.stdout.write(manifest) + else: + with open(filename, 'w') as mainifeststream: + mainifeststream.write(manifest) + +def make_parser(): + parser = OptionParser() + + model = OptionGroup(parser, 'model') + model.add_option('--name', help="Set submission name") + model.add_option('--db-path', default=None, + help="set rdf database path") + model.add_option('--model', default=None, + help="Load model database") + model.add_option('--load-rdf', default=None, + help="load rdf statements into model") + model.add_option('--sparql', default=None, help="execute sparql query") + model.add_option('--print-rdf', action="store_true", default=False, + help="print ending model state") + parser.add_option_group(model) + # commands + commands = OptionGroup(parser, 'commands') + commands.add_option('--make-link-tree-from', + help="create directories & link data files", + default=None) + commands.add_option('--copy-tree-from', + help="create directories & copy data files", + default=None) + commands.add_option('--fastq', default=False, action="store_true", + help="generate scripts for making fastq files") + commands.add_option('--scan-submission', default=False, action="store_true", + help="Import metadata for submission into our model") + commands.add_option('--make-hub', default=None, + help='name the hub file or - for stdout to create it') + commands.add_option('--make-manifest', + help='name the manifest file name or - for stdout to create it', + default=None) + + + parser.add_option_group(commands) + + parser.add_option('--force', default=False, action="store_true", + help="Force regenerating fastqs") + parser.add_option('--daf', default=None, help='specify daf name') + parser.add_option('--library-url', default=None, + help="specify an alternate source for library information") + # debugging + parser.add_option('--verbose', default=False, action="store_true", + help='verbose logging') + parser.add_option('--debug', default=False, action="store_true", + help='debug logging') + + api.add_auth_options(parser) + + return parser + +if __name__ == "__main__": + main() diff --git a/encode_submission/trackhub.py b/encode_submission/trackhub.py deleted file mode 100644 index 197d7f3..0000000 --- a/encode_submission/trackhub.py +++ /dev/null @@ -1,182 +0,0 @@ -"""Create a track hub -""" - -#!/usr/bin/env python -from ConfigParser import SafeConfigParser -import fnmatch -from glob import glob -import json -import logging -import netrc -from optparse import OptionParser, OptionGroup -import os -from pprint import pprint, pformat -import shlex -from StringIO import StringIO -import stat -import sys -import time -import types -import urllib -import urllib2 -import urlparse -from zipfile import ZipFile - -import RDF - -if not 'DJANGO_SETTINGS_MODULE' in os.environ: - os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings' - -from htsworkflow.util import api -from htsworkflow.util.rdfhelp import \ - dafTermOntology, \ - fromTypedNode, \ - get_model, \ - get_serializer, \ - load_into_model, \ - sparql_query, \ - submissionOntology -from htsworkflow.submission.daf import get_submission_uri -from htsworkflow.submission.results import ResultMap -from htsworkflow.submission.trackhub import TrackHubSubmission -from htsworkflow.submission.condorfastq import CondorFastqExtract - -logger = logging.getLogger(__name__) - -def main(cmdline=None): - parser = make_parser() - opts, args = parser.parse_args(cmdline) - submission_uri = None - - if opts.debug: - logging.basicConfig(level = logging.DEBUG ) - elif opts.verbose: - logging.basicConfig(level = logging.INFO ) - else: - logging.basicConfig(level = logging.WARNING ) - - apidata = api.make_auth_from_opts(opts, parser) - - model = get_model(opts.model, opts.db_path) - mapper = None - if opts.name: - mapper = TrackHubSubmission(opts.name, model, host=opts.host) - submission_uri = get_submission_uri(opts.name) - - - if opts.load_rdf is not None: - if submission_uri is None: - parser.error("Please specify the submission name") - load_into_model(model, 'turtle', opts.load_rdf, submission_uri) - - results = ResultMap() - for a in args: - if os.path.exists(a): - results.add_results_from_file(a) - else: - logger.warn("File %s doesn't exist.", a) - - if opts.make_link_tree_from is not None: - results.make_tree_from(opts.make_link_tree_from, link=True) - - if opts.copy_tree_from is not None: - results.make_tree_from(opts.copy_tree_from, link=False) - - if opts.fastq: - logger.info("Building fastq extraction scripts") - flowcells = os.path.join(opts.sequence, 'flowcells') - extractor = CondorFastqExtract(opts.host, flowcells, - model=opts.model, - force=opts.force) - extractor.create_scripts(results) - - if opts.scan_submission: - if opts.name is None: - parser.error("Please define a submission name") - mapper.scan_submission_dirs(results) - - if opts.make_hub: - make_hub(mapper, results, opts.make_hub) - - if opts.make_manifest: - make_manifest(mapper, results, opts.make_manifest) - - if opts.sparql: - sparql_query(model, opts.sparql) - - if opts.print_rdf: - writer = get_serializer() - print writer.serialize_model_to_string(model) - - -def make_hub(mapper, results, filename=None): - trackdb = mapper.make_hub(results) - - if filename is None or filename == '-': - sys.stdout.write(trackdb) - else: - with open('trackDb.txt', 'w') as trackstream: - trackstream.write(trackdb) - -def make_manifest(mapper, results, filename=None): - manifest = mapper.make_manifest(results) - - if filename is None or filename == '-': - sys.stdout.write(manifest) - else: - with open(filename, 'w') as mainifeststream: - mainifeststream.write(manifest) - -def make_parser(): - parser = OptionParser() - - model = OptionGroup(parser, 'model') - model.add_option('--name', help="Set submission name") - model.add_option('--db-path', default=None, - help="set rdf database path") - model.add_option('--model', default=None, - help="Load model database") - model.add_option('--load-rdf', default=None, - help="load rdf statements into model") - model.add_option('--sparql', default=None, help="execute sparql query") - model.add_option('--print-rdf', action="store_true", default=False, - help="print ending model state") - parser.add_option_group(model) - # commands - commands = OptionGroup(parser, 'commands') - commands.add_option('--make-link-tree-from', - help="create directories & link data files", - default=None) - commands.add_option('--copy-tree-from', - help="create directories & copy data files", - default=None) - commands.add_option('--fastq', default=False, action="store_true", - help="generate scripts for making fastq files") - commands.add_option('--scan-submission', default=False, action="store_true", - help="Import metadata for submission into our model") - commands.add_option('--make-hub', default=None, - help='name the hub file or - for stdout to create it') - commands.add_option('--make-manifest', - help='name the manifest file name or - for stdout to create it', - default=None) - - - parser.add_option_group(commands) - - parser.add_option('--force', default=False, action="store_true", - help="Force regenerating fastqs") - parser.add_option('--daf', default=None, help='specify daf name') - parser.add_option('--library-url', default=None, - help="specify an alternate source for library information") - # debugging - parser.add_option('--verbose', default=False, action="store_true", - help='verbose logging') - parser.add_option('--debug', default=False, action="store_true", - help='debug logging') - - api.add_auth_options(parser) - - return parser - -if __name__ == "__main__": - main() diff --git a/htsworkflow/submission/test/test_daf.py b/htsworkflow/submission/test/test_daf.py index c722721..7959d1e 100644 --- a/htsworkflow/submission/test/test_daf.py +++ b/htsworkflow/submission/test/test_daf.py @@ -247,7 +247,7 @@ thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ; # server is 500 for this library self.failUnlessEqual(gel_cut, 100) - species = daf_mapper._get_library_attribute(libNode, 'species') + species = daf_mapper._get_library_attribute(libNode, 'species_name') self.failUnlessEqual(species, "Homo sapiens") with mktempdir('analysis') as analysis_dir: diff --git a/htsworkflow/submission/trackhub.py b/htsworkflow/submission/trackhub.py deleted file mode 100644 index 8b7b424..0000000 --- a/htsworkflow/submission/trackhub.py +++ /dev/null @@ -1,78 +0,0 @@ -import logging -import os - -import RDF - -from htsworkflow.submission.submission import Submission - -from htsworkflow.util.rdfhelp import \ - fromTypedNode, \ - geoSoftNS, \ - stripNamespace, \ - submissionOntology - -from django.conf import settings -from django.template import Context, loader - -LOGGER = logging.getLogger(__name__) - -class TrackHubSubmission(Submission): - def __init__(self, name, model, host): - super(TrackHubSubmission, self).__init__(name, model, host) - - def make_hub(self, result_map): - samples = [] - for lib_id, result_dir in result_map.items(): - an_analysis = self.get_submission_node(result_dir) - metadata = self.get_sample_metadata(an_analysis) - if len(metadata) == 0: - errmsg = 'No metadata found for {0}' - LOGGER.error(errmsg.format(str(an_analysis),)) - continue - elif len(metadata) > 1: - errmsg = 'Confused there are more than one sample for %s' - LOGGER.debug(errmsg % (str(an_analysis),)) - metadata = metadata[0] - samples.append(metadata) - - template = loader.get_template('trackDb.txt') - context = Context({ - 'samples': samples, - }) - return str(template.render(context)) - - def make_manifest(self, result_map): - files = [] - for lib_id, result_dir in result_map.items(): - an_analysis = self.get_submission_node(result_dir) - metadata = self.get_manifest_metadata(an_analysis) - files.extend(metadata) - - template = loader.get_template('manifest.txt') - context = Context({ - 'files': files - }) - return str(template.render(context)) - - def get_sample_metadata(self, analysis_node): - """Gather information for filling out sample section of a SOFT file - """ - query_template = loader.get_template('trackhub_samples.sparql') - - context = Context({ - 'submission': str(analysis_node.uri), - 'submissionSet': str(self.submissionSetNS[''].uri), - }) - - results = self.execute_query(query_template, context) - return results - - def get_manifest_metadata(self, analysis_node): - query_template = loader.get_template('trackhub_manifest.sparql') - - context = Context({ - 'submission': str(analysis_node.uri), - 'submissionSet': str(self.submissionSetNS[''].uri), - }) - results = self.execute_query(query_template, context) - return results diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py new file mode 100644 index 0000000..8b7b424 --- /dev/null +++ b/htsworkflow/submission/trackhub_submission.py @@ -0,0 +1,78 @@ +import logging +import os + +import RDF + +from htsworkflow.submission.submission import Submission + +from htsworkflow.util.rdfhelp import \ + fromTypedNode, \ + geoSoftNS, \ + stripNamespace, \ + submissionOntology + +from django.conf import settings +from django.template import Context, loader + +LOGGER = logging.getLogger(__name__) + +class TrackHubSubmission(Submission): + def __init__(self, name, model, host): + super(TrackHubSubmission, self).__init__(name, model, host) + + def make_hub(self, result_map): + samples = [] + for lib_id, result_dir in result_map.items(): + an_analysis = self.get_submission_node(result_dir) + metadata = self.get_sample_metadata(an_analysis) + if len(metadata) == 0: + errmsg = 'No metadata found for {0}' + LOGGER.error(errmsg.format(str(an_analysis),)) + continue + elif len(metadata) > 1: + errmsg = 'Confused there are more than one sample for %s' + LOGGER.debug(errmsg % (str(an_analysis),)) + metadata = metadata[0] + samples.append(metadata) + + template = loader.get_template('trackDb.txt') + context = Context({ + 'samples': samples, + }) + return str(template.render(context)) + + def make_manifest(self, result_map): + files = [] + for lib_id, result_dir in result_map.items(): + an_analysis = self.get_submission_node(result_dir) + metadata = self.get_manifest_metadata(an_analysis) + files.extend(metadata) + + template = loader.get_template('manifest.txt') + context = Context({ + 'files': files + }) + return str(template.render(context)) + + def get_sample_metadata(self, analysis_node): + """Gather information for filling out sample section of a SOFT file + """ + query_template = loader.get_template('trackhub_samples.sparql') + + context = Context({ + 'submission': str(analysis_node.uri), + 'submissionSet': str(self.submissionSetNS[''].uri), + }) + + results = self.execute_query(query_template, context) + return results + + def get_manifest_metadata(self, analysis_node): + query_template = loader.get_template('trackhub_manifest.sparql') + + context = Context({ + 'submission': str(analysis_node.uri), + 'submissionSet': str(self.submissionSetNS[''].uri), + }) + results = self.execute_query(query_template, context) + return results