921988a700bcb6be16f68dc5d5ed2e14849e6870
[htsworkflow.git] / encode_submission / encode3.py
1 """Create a track hub
2 """
3
4 #!/usr/bin/env python
5 from ConfigParser import SafeConfigParser
6 import fnmatch
7 from glob import glob
8 import json
9 import logging
10 import netrc
11 from optparse import OptionParser, OptionGroup
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from StringIO import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 import urllib
21 import urllib2
22 import urlparse
23 from zipfile import ZipFile
24
25 import RDF
26
27 from htsworkflow.util import api
28 from htsworkflow.util.rdfhelp import \
29     dafTermOntology, \
30     fromTypedNode, \
31     get_model, \
32     get_serializer, \
33     load_into_model, \
34     sparql_query, \
35     submissionOntology
36 from htsworkflow.submission.daf import get_submission_uri
37 from htsworkflow.submission.submission import list_submissions
38 from htsworkflow.submission.results import ResultMap
39 from htsworkflow.submission.trackhub_submission import TrackHubSubmission
40 from htsworkflow.submission.condorfastq import CondorFastqExtract
41
42 logger = logging.getLogger(__name__)
43
44 INDENTED = "  " + os.linesep
45
46 import django
47 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
48     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
49
50 def main(cmdline=None):
51     parser = make_parser()
52     opts, args = parser.parse_args(cmdline)
53     submission_uri = None
54
55     from django.conf import settings
56
57     if opts.debug:
58         settings.LOGGING['loggers']['level'] = 'DEBUG'
59     elif opts.verbose:
60         settings.LOGGING['loggers']['level'] = 'INFO'
61
62     apidata = api.make_auth_from_opts(opts, parser)
63
64     model = get_model(opts.model, opts.db_path)
65
66     submission_names = list(list_submissions(model))
67     name = opts.name
68     if len(submission_names) == 0 and opts.name is None:
69         parser.error("Please name this submission")
70     elif opts.name and submission_names and opts.name not in submission_names:
71         parser.error("{} is not in this model. Choose from: {}{}".format(
72             opts.name,
73             os.linesep,
74             INDENTED.join(submission_names)))
75     elif opts.name is None and len(submission_names) > 1:
76         parser.error("Please choose submission name from: {}{}".format(
77             os.linesep,
78             INDENTED.join(submission_names)))
79     elif len(submission_names) == 1:
80         name = submission_names[0]
81
82     if name:
83         submission_uri = get_submission_uri(name)
84         logger.info('Submission URI: %s', name)
85     else:
86         logger.debug('No name, unable to create submission ur')
87
88     mapper = None
89     if opts.make_track_hub:
90         mapper = TrackHubSubmission(name,
91                                     model,
92                                     baseurl=opts.make_track_hub,
93                                     baseupload=opts.track_hub_upload,
94                                     host=opts.host)
95
96     if opts.load_rdf is not None:
97         if submission_uri is None:
98             parser.error("Please specify the submission name")
99         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
100
101     results = ResultMap()
102     for a in args:
103         if os.path.exists(a):
104             results.add_results_from_file(a)
105         else:
106             logger.warn("File %s doesn't exist.", a)
107
108     if opts.make_link_tree_from is not None:
109         results.make_tree_from(opts.make_link_tree_from, link=True)
110
111     if opts.copy_tree_from is not None:
112         results.make_tree_from(opts.copy_tree_from, link=False)
113
114     if opts.fastq:
115         logger.info("Building fastq extraction scripts")
116         flowcells = os.path.join(opts.sequence, 'flowcells')
117         extractor = CondorFastqExtract(opts.host, flowcells,
118                                        model=opts.model,
119                                        compression=opts.compression,
120                                        force=opts.force)
121         extractor.create_scripts(results)
122
123     if opts.scan_submission:
124         if name is None:
125             parser.error("Please define a submission name")
126         if mapper is None:
127             parser.error("Scan submission needs --make-track-hub=public-url")
128         mapper.scan_submission_dirs(results)
129
130     if opts.make_track_hub:
131         trackdb = mapper.make_hub(results)
132
133     if opts.make_manifest:
134         make_manifest(mapper, results, opts.make_manifest)
135
136     if opts.sparql:
137         sparql_query(model, opts.sparql)
138
139     if opts.print_rdf:
140         writer = get_serializer()
141         print writer.serialize_model_to_string(model)
142
143
144 def make_manifest(mapper, results, filename=None):
145     manifest = mapper.make_manifest(results)
146
147     if filename is None or filename == '-':
148         sys.stdout.write(manifest)
149     else:
150         with open(filename, 'w') as mainifeststream:
151             mainifeststream.write(manifest)
152
153
154 def make_parser():
155     parser = OptionParser()
156
157     model = OptionGroup(parser, 'model')
158     model.add_option('--name', help="Set submission name")
159     model.add_option('--db-path', default=None,
160                      help="set rdf database path")
161     model.add_option('--model', default=None,
162                      help="Load model database")
163     model.add_option('--load-rdf', default=None,
164                      help="load rdf statements into model")
165     model.add_option('--sparql', default=None, help="execute sparql query")
166     model.add_option('--print-rdf', action="store_true", default=False,
167                      help="print ending model state")
168     parser.add_option_group(model)
169     # commands
170     commands = OptionGroup(parser, 'commands')
171     commands.add_option('--make-link-tree-from',
172                         help="create directories & link data files",
173                         default=None)
174     commands.add_option('--copy-tree-from',
175                         help="create directories & copy data files",
176                         default=None)
177     commands.add_option('--fastq', default=False, action="store_true",
178                         help="generate scripts for making fastq files")
179     commands.add_option('--scan-submission', default=False, action="store_true",
180                         help="Import metadata for submission into our model")
181     commands.add_option('--make-track-hub', default=None,
182                         help='web root that will host the trackhub.')
183     commands.add_option('--track-hub-upload', default=None,
184                         help='where to upload track hub <host>:<path>')
185     commands.add_option('--make-manifest',
186                         help='name the manifest file name or - for stdout to create it',
187                         default=None)
188
189     parser.add_option_group(commands)
190
191     parser.add_option('--force', default=False, action="store_true",
192                       help="Force regenerating fastqs")
193     parser.add_option('--compression', default=None, type='choice',
194                       choices=['gzip'],
195                       help='select compression type for fastq files')
196     parser.add_option('--daf', default=None, help='specify daf name')
197     parser.add_option('--library-url', default=None,
198                       help="specify an alternate source for library information")
199     # debugging
200     parser.add_option('--verbose', default=False, action="store_true",
201                       help='verbose logging')
202     parser.add_option('--debug', default=False, action="store_true",
203                       help='debug logging')
204
205     api.add_auth_options(parser)
206
207     return parser
208
209 if __name__ == "__main__":
210     django.setup()
211
212     main()