put #!/usr/bin/env python in the right place
[htsworkflow.git] / encode_submission / encode3.py
1 #!/usr/bin/env python
2 """Create a track hub
3 """
4 from ConfigParser import SafeConfigParser
5 import fnmatch
6 from glob import glob
7 import json
8 import logging
9 import netrc
10 from optparse import OptionParser, OptionGroup
11 import os
12 from pprint import pprint, pformat
13 import shlex
14 from six.moves import StringIO
15 import stat
16 import sys
17 import time
18 import types
19 import urllib
20 import urllib2
21 import urlparse
22 from zipfile import ZipFile
23
24 import RDF
25
26 from htsworkflow.util import api
27 from htsworkflow.util.rdfhelp import \
28     dafTermOntology, \
29     fromTypedNode, \
30     get_model, \
31     get_serializer, \
32     load_into_model, \
33     sparql_query, \
34     submissionOntology
35 from htsworkflow.submission.daf import get_submission_uri
36 from htsworkflow.submission.submission import list_submissions
37 from htsworkflow.submission.results import ResultMap
38 from htsworkflow.submission.trackhub_submission import TrackHubSubmission
39 from htsworkflow.submission.condorfastq import CondorFastqExtract
40
41 logger = logging.getLogger(__name__)
42
43 INDENTED = "  " + os.linesep
44
45 import django
46 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
47     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
48
49 def main(cmdline=None):
50     parser = make_parser()
51     opts, args = parser.parse_args(cmdline)
52     submission_uri = None
53
54     from django.conf import settings
55
56     if opts.debug:
57         settings.LOGGING['loggers']['level'] = 'DEBUG'
58     elif opts.verbose:
59         settings.LOGGING['loggers']['level'] = 'INFO'
60
61     model = get_model(opts.model, opts.db_path)
62
63     submission_names = list(list_submissions(model))
64     name = opts.name
65     if len(submission_names) == 0 and opts.name is None:
66         parser.error("Please name this submission")
67     elif opts.name and submission_names and opts.name not in submission_names:
68         parser.error("{} is not in this model. Choose from: {}{}".format(
69             opts.name,
70             os.linesep,
71             INDENTED.join(submission_names)))
72     elif opts.name is None and len(submission_names) > 1:
73         parser.error("Please choose submission name from: {}{}".format(
74             os.linesep,
75             INDENTED.join(submission_names)))
76     elif len(submission_names) == 1:
77         name = submission_names[0]
78
79     if name:
80         submission_uri = get_submission_uri(name)
81         logger.info('Submission URI: %s', name)
82     else:
83         logger.debug('No name, unable to create submission ur')
84
85     mapper = None
86     if opts.make_track_hub:
87         mapper = TrackHubSubmission(name,
88                                     model,
89                                     baseurl=opts.make_track_hub,
90                                     baseupload=opts.track_hub_upload,
91                                     host=opts.host)
92
93     if opts.load_rdf is not None:
94         if submission_uri is None:
95             parser.error("Please specify the submission name")
96         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
97
98     results = ResultMap()
99     for a in args:
100         if os.path.exists(a):
101             results.add_results_from_file(a)
102         else:
103             logger.warn("File %s doesn't exist.", a)
104
105     if opts.make_link_tree_from is not None:
106         results.make_tree_from(opts.make_link_tree_from, link=True)
107
108     if opts.copy_tree_from is not None:
109         results.make_tree_from(opts.copy_tree_from, link=False)
110
111     if opts.fastq:
112         logger.info("Building fastq extraction scripts")
113         flowcells = os.path.join(opts.sequence, 'flowcells')
114         extractor = CondorFastqExtract(opts.host, flowcells,
115                                        model=opts.model,
116                                        compression=opts.compression,
117                                        force=opts.force)
118         extractor.create_scripts(results)
119
120     if opts.scan_submission:
121         if name is None:
122             parser.error("Please define a submission name")
123         if mapper is None:
124             parser.error("Scan submission needs --make-track-hub=public-url")
125         mapper.scan_submission_dirs(results)
126
127     if opts.make_track_hub:
128         trackdb = mapper.make_hub(results)
129
130     if opts.make_manifest:
131         make_manifest(mapper, results, opts.make_manifest)
132
133     if opts.sparql:
134         sparql_query(model, opts.sparql)
135
136     if opts.print_rdf:
137         writer = get_serializer()
138         print writer.serialize_model_to_string(model)
139
140
141 def make_manifest(mapper, results, filename=None):
142     manifest = mapper.make_manifest(results)
143
144     if filename is None or filename == '-':
145         sys.stdout.write(manifest)
146     else:
147         with open(filename, 'w') as mainifeststream:
148             mainifeststream.write(manifest)
149
150
151 def make_parser():
152     parser = OptionParser()
153
154     model = OptionGroup(parser, 'model')
155     model.add_option('--name', help="Set submission name")
156     model.add_option('--db-path', default=None,
157                      help="set rdf database path")
158     model.add_option('--model', default=None,
159                      help="Load model database")
160     model.add_option('--load-rdf', default=None,
161                      help="load rdf statements into model")
162     model.add_option('--sparql', default=None, help="execute sparql query")
163     model.add_option('--print-rdf', action="store_true", default=False,
164                      help="print ending model state")
165     parser.add_option_group(model)
166     # commands
167     commands = OptionGroup(parser, 'commands')
168     commands.add_option('--make-link-tree-from',
169                         help="create directories & link data files",
170                         default=None)
171     commands.add_option('--copy-tree-from',
172                         help="create directories & copy data files",
173                         default=None)
174     commands.add_option('--fastq', default=False, action="store_true",
175                         help="generate scripts for making fastq files")
176     commands.add_option('--scan-submission', default=False, action="store_true",
177                         help="Import metadata for submission into our model")
178     commands.add_option('--make-track-hub', default=None,
179                         help='web root that will host the trackhub.')
180     commands.add_option('--track-hub-upload', default=None,
181                         help='where to upload track hub <host>:<path>')
182     commands.add_option('--make-manifest',
183                         help='name the manifest file name or - for stdout to create it',
184                         default=None)
185
186     parser.add_option_group(commands)
187
188     parser.add_option('--force', default=False, action="store_true",
189                       help="Force regenerating fastqs")
190     parser.add_option('--compression', default=None, type='choice',
191                       choices=['gzip'],
192                       help='select compression type for fastq files')
193     parser.add_option('--daf', default=None, help='specify daf name')
194     parser.add_option('--library-url', default=None,
195                       help="specify an alternate source for library information")
196     # debugging
197     parser.add_option('--verbose', default=False, action="store_true",
198                       help='verbose logging')
199     parser.add_option('--debug', default=False, action="store_true",
200                       help='debug logging')
201
202     api.add_auth_options(parser)
203
204     return parser
205
206 if __name__ == "__main__":
207     django.setup()
208
209     main()