17f4f657d4bb2a7134ad62affb369c0b48bbd836
[htsworkflow.git] / encode_submission / encode3.py
1 """Create a track hub
2 """
3
4 #!/usr/bin/env python
5 from ConfigParser import SafeConfigParser
6 import fnmatch
7 from glob import glob
8 import json
9 import logging
10 import netrc
11 from optparse import OptionParser, OptionGroup
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from StringIO import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 import urllib
21 import urllib2
22 import urlparse
23 from zipfile import ZipFile
24
25 import RDF
26
27 from htsworkflow.util import api
28 from htsworkflow.util.rdfhelp import \
29     dafTermOntology, \
30     fromTypedNode, \
31     get_model, \
32     get_serializer, \
33     load_into_model, \
34     sparql_query, \
35     submissionOntology
36 from htsworkflow.submission.daf import get_submission_uri
37 from htsworkflow.submission.submission import list_submissions
38 from htsworkflow.submission.results import ResultMap
39 from htsworkflow.submission.trackhub_submission import TrackHubSubmission
40 from htsworkflow.submission.condorfastq import CondorFastqExtract
41
42 logger = logging.getLogger(__name__)
43
44 INDENTED = "  " + os.linesep
45
46 import django
47 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
48     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
49
50 def main(cmdline=None):
51     parser = make_parser()
52     opts, args = parser.parse_args(cmdline)
53     submission_uri = None
54
55     from django.conf import settings
56
57     if opts.debug:
58         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
59     elif opts.verbose:
60         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
61
62     django.setup()
63
64     model = get_model(opts.model, opts.db_path)
65     submission_names = list(list_submissions(model))
66     name = opts.name
67     if len(submission_names) == 0 and opts.name is None:
68         parser.error("Please name this submission")
69     elif opts.name and submission_names and opts.name not in submission_names:
70         parser.error("{} is not in this model. Choose from: {}{}".format(
71             opts.name,
72             os.linesep,
73             INDENTED.join(submission_names)))
74     elif opts.name is None and len(submission_names) > 1:
75         parser.error("Please choose submission name from: {}{}".format(
76             os.linesep,
77             INDENTED.join(submission_names)))
78     elif len(submission_names) == 1:
79         name = submission_names[0]
80
81     if name:
82         submission_uri = get_submission_uri(name)
83         logger.info('Submission URI: %s', name)
84     else:
85         logger.debug('No name, unable to create submission ur')
86
87     mapper = None
88     if opts.make_track_hub:
89         mapper = TrackHubSubmission(name,
90                                     model,
91                                     baseurl=opts.make_track_hub,
92                                     baseupload=opts.track_hub_upload,
93                                     host=opts.host)
94
95     if opts.load_rdf is not None:
96         if submission_uri is None:
97             parser.error("Please specify the submission name")
98         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
99
100     results = ResultMap()
101     for a in args:
102         if os.path.exists(a):
103             results.add_results_from_file(a)
104         else:
105             logger.warn("File %s doesn't exist.", a)
106
107     if opts.make_link_tree_from is not None:
108         results.make_tree_from(opts.make_link_tree_from, link=True)
109
110     if opts.copy_tree_from is not None:
111         results.make_tree_from(opts.copy_tree_from, link=False)
112
113     if opts.fastq:
114         logger.info("Building fastq extraction scripts")
115         flowcells = os.path.join(opts.sequence, 'flowcells')
116         extractor = CondorFastqExtract(opts.host, flowcells,
117                                        model=opts.model,
118                                        compression=opts.compression,
119                                        force=opts.force)
120         extractor.create_scripts(results)
121
122     if opts.scan_submission:
123         if name is None:
124             parser.error("Please define a submission name")
125         if mapper is None:
126             parser.error("Scan submission needs --make-track-hub=public-url")
127         mapper.scan_submission_dirs(results)
128
129     if opts.make_track_hub:
130         trackdb = mapper.make_hub(results)
131
132     if opts.make_manifest:
133         make_manifest(mapper, results, opts.make_manifest)
134
135     if opts.sparql:
136         sparql_query(model, opts.sparql)
137
138     if opts.print_rdf:
139         writer = get_serializer()
140         print writer.serialize_model_to_string(model)
141
142
143 def make_manifest(mapper, results, filename=None):
144     manifest = mapper.make_manifest(results)
145
146     if filename is None or filename == '-':
147         sys.stdout.write(manifest)
148     else:
149         with open(filename, 'w') as mainifeststream:
150             mainifeststream.write(manifest)
151
152
153 def make_parser():
154     parser = OptionParser()
155
156     model = OptionGroup(parser, 'model')
157     model.add_option('--name', help="Set submission name")
158     model.add_option('--db-path', default=None,
159                      help="set rdf database path")
160     model.add_option('--model', default=None,
161                      help="Load model database")
162     model.add_option('--load-rdf', default=None,
163                      help="load rdf statements into model")
164     model.add_option('--sparql', default=None, help="execute sparql query")
165     model.add_option('--print-rdf', action="store_true", default=False,
166                      help="print ending model state")
167     parser.add_option_group(model)
168     # commands
169     commands = OptionGroup(parser, 'commands')
170     commands.add_option('--make-link-tree-from',
171                         help="create directories & link data files",
172                         default=None)
173     commands.add_option('--copy-tree-from',
174                         help="create directories & copy data files",
175                         default=None)
176     commands.add_option('--fastq', default=False, action="store_true",
177                         help="generate scripts for making fastq files")
178     commands.add_option('--scan-submission', default=False, action="store_true",
179                         help="Import metadata for submission into our model")
180     commands.add_option('--make-track-hub', default=None,
181                         help='web root that will host the trackhub.')
182     commands.add_option('--track-hub-upload', default=None,
183                         help='where to upload track hub <host>:<path>')
184     commands.add_option('--make-manifest',
185                         help='name the manifest file name or - for stdout to create it',
186                         default=None)
187
188     parser.add_option_group(commands)
189
190     parser.add_option('--force', default=False, action="store_true",
191                       help="Force regenerating fastqs")
192     parser.add_option('--compression', default=None, type='choice',
193                       choices=['gzip'],
194                       help='select compression type for fastq files')
195     parser.add_option('--daf', default=None, help='specify daf name')
196     parser.add_option('--library-url', default=None,
197                       help="specify an alternate source for library information")
198     # debugging
199     parser.add_option('--verbose', default=False, action="store_true",
200                       help='verbose logging')
201     parser.add_option('--debug', default=False, action="store_true",
202                       help='debug logging')
203
204     api.add_auth_options(parser)
205
206     return parser
207
208 if __name__ == "__main__":
209     main()