use six.moves to work around urllib / urllib2 / urlparse to urllib 2to3 cleanup
[htsworkflow.git] / encode_submission / encode3.py
1 #!/usr/bin/env python
2 """Create a track hub
3 """
4 from __future__ import print_function, unicode_literals
5
6 from ConfigParser import SafeConfigParser
7 import fnmatch
8 from glob import glob
9 import json
10 import logging
11 import netrc
12 from optparse import OptionParser, OptionGroup
13 import os
14 from pprint import pprint, pformat
15 import shlex
16 from six.moves import StringIO
17 import stat
18 import sys
19 import time
20 import types
21 from zipfile import ZipFile
22
23 import RDF
24
25 from htsworkflow.util import api
26 from htsworkflow.util.rdfhelp import \
27     dafTermOntology, \
28     fromTypedNode, \
29     get_model, \
30     get_serializer, \
31     load_into_model, \
32     sparql_query, \
33     submissionOntology
34 from htsworkflow.submission.daf import get_submission_uri
35 from htsworkflow.submission.submission import list_submissions
36 from htsworkflow.submission.results import ResultMap
37 from htsworkflow.submission.trackhub_submission import TrackHubSubmission
38 from htsworkflow.submission.condorfastq import CondorFastqExtract
39
40 logger = logging.getLogger(__name__)
41
42 INDENTED = "  " + os.linesep
43
44 import django
45 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
46     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
47
48 def main(cmdline=None):
49     parser = make_parser()
50     opts, args = parser.parse_args(cmdline)
51     submission_uri = None
52
53     from django.conf import settings
54
55     if opts.debug:
56         settings.LOGGING['loggers']['level'] = 'DEBUG'
57     elif opts.verbose:
58         settings.LOGGING['loggers']['level'] = 'INFO'
59
60     model = get_model(opts.model, opts.db_path)
61
62     submission_names = list(list_submissions(model))
63     name = opts.name
64     if len(submission_names) == 0 and opts.name is None:
65         parser.error("Please name this submission")
66     elif opts.name and submission_names and opts.name not in submission_names:
67         parser.error("{} is not in this model. Choose from: {}{}".format(
68             opts.name,
69             os.linesep,
70             INDENTED.join(submission_names)))
71     elif opts.name is None and len(submission_names) > 1:
72         parser.error("Please choose submission name from: {}{}".format(
73             os.linesep,
74             INDENTED.join(submission_names)))
75     elif len(submission_names) == 1:
76         name = submission_names[0]
77
78     if name:
79         submission_uri = get_submission_uri(name)
80         logger.info('Submission URI: %s', name)
81     else:
82         logger.debug('No name, unable to create submission ur')
83
84     mapper = None
85     if opts.make_track_hub:
86         mapper = TrackHubSubmission(name,
87                                     model,
88                                     baseurl=opts.make_track_hub,
89                                     baseupload=opts.track_hub_upload,
90                                     host=opts.host)
91
92     if opts.load_rdf is not None:
93         if submission_uri is None:
94             parser.error("Please specify the submission name")
95         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
96
97     results = ResultMap()
98     for a in args:
99         if os.path.exists(a):
100             results.add_results_from_file(a)
101         else:
102             logger.warn("File %s doesn't exist.", a)
103
104     if opts.make_link_tree_from is not None:
105         results.make_tree_from(opts.make_link_tree_from, link=True)
106
107     if opts.copy_tree_from is not None:
108         results.make_tree_from(opts.copy_tree_from, link=False)
109
110     if opts.fastq:
111         logger.info("Building fastq extraction scripts")
112         flowcells = os.path.join(opts.sequence, 'flowcells')
113         extractor = CondorFastqExtract(opts.host, flowcells,
114                                        model=opts.model,
115                                        compression=opts.compression,
116                                        force=opts.force)
117         extractor.create_scripts(results)
118
119     if opts.scan_submission:
120         if name is None:
121             parser.error("Please define a submission name")
122         if mapper is None:
123             parser.error("Scan submission needs --make-track-hub=public-url")
124         mapper.scan_submission_dirs(results)
125
126     if opts.make_track_hub:
127         trackdb = mapper.make_hub(results)
128
129     if opts.make_manifest:
130         make_manifest(mapper, results, opts.make_manifest)
131
132     if opts.sparql:
133         sparql_query(model, opts.sparql)
134
135     if opts.print_rdf:
136         writer = get_serializer()
137         print(writer.serialize_model_to_string(model))
138
139
140 def make_manifest(mapper, results, filename=None):
141     manifest = mapper.make_manifest(results)
142
143     if filename is None or filename == '-':
144         sys.stdout.write(manifest)
145     else:
146         with open(filename, 'w') as mainifeststream:
147             mainifeststream.write(manifest)
148
149
150 def make_parser():
151     parser = OptionParser()
152
153     model = OptionGroup(parser, 'model')
154     model.add_option('--name', help="Set submission name")
155     model.add_option('--db-path', default=None,
156                      help="set rdf database path")
157     model.add_option('--model', default=None,
158                      help="Load model database")
159     model.add_option('--load-rdf', default=None,
160                      help="load rdf statements into model")
161     model.add_option('--sparql', default=None, help="execute sparql query")
162     model.add_option('--print-rdf', action="store_true", default=False,
163                      help="print ending model state")
164     parser.add_option_group(model)
165     # commands
166     commands = OptionGroup(parser, 'commands')
167     commands.add_option('--make-link-tree-from',
168                         help="create directories & link data files",
169                         default=None)
170     commands.add_option('--copy-tree-from',
171                         help="create directories & copy data files",
172                         default=None)
173     commands.add_option('--fastq', default=False, action="store_true",
174                         help="generate scripts for making fastq files")
175     commands.add_option('--scan-submission', default=False, action="store_true",
176                         help="Import metadata for submission into our model")
177     commands.add_option('--make-track-hub', default=None,
178                         help='web root that will host the trackhub.')
179     commands.add_option('--track-hub-upload', default=None,
180                         help='where to upload track hub <host>:<path>')
181     commands.add_option('--make-manifest',
182                         help='name the manifest file name or - for stdout to create it',
183                         default=None)
184
185     parser.add_option_group(commands)
186
187     parser.add_option('--force', default=False, action="store_true",
188                       help="Force regenerating fastqs")
189     parser.add_option('--compression', default=None, type='choice',
190                       choices=['gzip'],
191                       help='select compression type for fastq files')
192     parser.add_option('--daf', default=None, help='specify daf name')
193     parser.add_option('--library-url', default=None,
194                       help="specify an alternate source for library information")
195     # debugging
196     parser.add_option('--verbose', default=False, action="store_true",
197                       help='verbose logging')
198     parser.add_option('--debug', default=False, action="store_true",
199                       help='debug logging')
200
201     api.add_auth_options(parser)
202
203     return parser
204
205 if __name__ == "__main__":
206     django.setup()
207
208     main()