Switch encode3 submitter to use aws.
[htsworkflow.git] / encode_submission / encode3.py
1 """Create a track hub
2 """
3
4 #!/usr/bin/env python
5 from ConfigParser import SafeConfigParser
6 import fnmatch
7 from glob import glob
8 import json
9 import logging
10 import netrc
11 from optparse import OptionParser, OptionGroup
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from StringIO import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 import urllib
21 import urllib2
22 import urlparse
23 from zipfile import ZipFile
24
25 import RDF
26
27 from htsworkflow.util import api
28 from htsworkflow.util.rdfhelp import \
29     dafTermOntology, \
30     fromTypedNode, \
31     get_model, \
32     get_serializer, \
33     load_into_model, \
34     sparql_query, \
35     submissionOntology
36 from htsworkflow.submission.daf import get_submission_uri
37 from htsworkflow.submission.submission import list_submissions
38 from htsworkflow.submission.results import ResultMap
39 from htsworkflow.submission.condorfastq import CondorFastqExtract
40 from htsworkflow.submission.aws_submission import AWSSubmission
41 logger = logging.getLogger(__name__)
42
43 INDENTED = "  " + os.linesep
44
45 import django
46 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
47     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
48
49 def main(cmdline=None):
50     parser = make_parser()
51     opts, args = parser.parse_args(cmdline)
52     submission_uri = None
53
54     from django.conf import settings
55
56     if opts.debug:
57         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
58     elif opts.verbose:
59         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
60
61     django.setup()
62
63     model = get_model(opts.model, opts.db_path)
64     submission_names = list(list_submissions(model))
65     name = opts.name
66     if len(submission_names) == 0 and opts.name is None:
67         parser.error("Please name this submission")
68     elif opts.name and submission_names and opts.name not in submission_names:
69         parser.error("{} is not in this model. Choose from: {}{}".format(
70             opts.name,
71             os.linesep,
72             INDENTED.join(submission_names)))
73     elif opts.name is None and len(submission_names) > 1:
74         parser.error("Please choose submission name from: {}{}".format(
75             os.linesep,
76             INDENTED.join(submission_names)))
77     elif len(submission_names) == 1:
78         name = submission_names[0]
79
80     if name:
81         submission_uri = get_submission_uri(name)
82         logger.info('Submission URI: %s', submission_uri)
83
84     mapper = AWSSubmission(name, model, encode_host=opts.encoded, lims_host=opts.host)
85
86     if opts.load_rdf is not None:
87         if submission_uri is None:
88             parser.error("Please specify the submission name")
89         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
90
91     results = ResultMap()
92     for a in args:
93         if os.path.exists(a):
94             results.add_results_from_file(a)
95         else:
96             logger.warn("File %s doesn't exist.", a)
97
98     if opts.make_link_tree_from is not None:
99         results.make_tree_from(opts.make_link_tree_from, link=True)
100
101     if opts.copy_tree_from is not None:
102         results.make_tree_from(opts.copy_tree_from, link=False)
103
104     if opts.fastq:
105         logger.info("Building fastq extraction scripts")
106         flowcells = os.path.join(opts.sequence, 'flowcells')
107         extractor = CondorFastqExtract(opts.host, flowcells,
108                                        model=opts.model,
109                                        compression=opts.compression,
110                                        force=opts.force)
111         extractor.create_scripts(results)
112
113     if opts.scan_submission:
114         if name is None:
115             parser.error("Please define a submission name")
116         mapper.scan_submission_dirs(results)
117
118     if opts.upload:
119         mapper.upload(results, opts.dry_run)
120
121     if opts.sparql:
122         sparql_query(model, opts.sparql)
123
124     if opts.print_rdf:
125         writer = get_serializer()
126         print writer.serialize_model_to_string(model)
127
128
129 def make_parser():
130     parser = OptionParser()
131
132     model = OptionGroup(parser, 'model')
133     model.add_option('--name', help="Set submission name")
134     model.add_option('--db-path', default=None,
135                      help="set rdf database path")
136     model.add_option('--model', default=None,
137                      help="Load model database")
138     model.add_option('--load-rdf', default=None,
139                      help="load rdf statements into model")
140     model.add_option('--sparql', default=None, help="execute sparql query")
141     model.add_option('--print-rdf', action="store_true", default=False,
142                      help="print ending model state")
143     parser.add_option_group(model)
144     # commands
145     commands = OptionGroup(parser, 'commands')
146     commands.add_option('--make-link-tree-from',
147                         help="create directories & link data files",
148                         default=None)
149     commands.add_option('--copy-tree-from',
150                         help="create directories & copy data files",
151                         default=None)
152     commands.add_option('--fastq', default=False, action="store_true",
153                         help="generate scripts for making fastq files")
154     commands.add_option('--scan-submission', default=False, action="store_true",
155                         help="cache md5 sums")
156     commands.add_option('--upload', default=False, action="store_true",
157                         help="Upload files")
158
159     parser.add_option_group(commands)
160
161     parser.add_option('--force', default=False, action="store_true",
162                       help="Force regenerating fastqs")
163     parser.add_option('--compression', default=None, type='choice',
164                       choices=['gzip'],
165                       help='select compression type for fastq files')
166     parser.add_option('--library-url', default=None,
167                       help="specify an alternate source for library information")
168     parser.add_option('--encoded', default='www.encodeproject.org',
169                       help='base url for talking to encode server')
170     parser.add_option('--dry-run', default=False, action='store_true',
171                       help='avoid making changes to encoded')
172     # debugging
173     parser.add_option('--verbose', default=False, action="store_true",
174                       help='verbose logging')
175     parser.add_option('--debug', default=False, action="store_true",
176                       help='debug logging')
177
178     api.add_auth_options(parser)
179
180     return parser
181
182 if __name__ == "__main__":
183     main()