Generate manifest files for ENCODE3
[htsworkflow.git] / encode_submission / trackhub.py
1 """Create a track hub 
2 """
3
4 #!/usr/bin/env python
5 from ConfigParser import SafeConfigParser
6 import fnmatch
7 from glob import glob
8 import json
9 import logging
10 import netrc
11 from optparse import OptionParser, OptionGroup
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from StringIO import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 import urllib
21 import urllib2
22 import urlparse
23 from zipfile import ZipFile
24
25 import RDF
26
27 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
28     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
29
30 from htsworkflow.util import api
31 from htsworkflow.util.rdfhelp import \
32      dafTermOntology, \
33      fromTypedNode, \
34      get_model, \
35      get_serializer, \
36      load_into_model, \
37      sparql_query, \
38      submissionOntology
39 from htsworkflow.submission.daf import get_submission_uri
40 from htsworkflow.submission.results import ResultMap
41 from htsworkflow.submission.trackhub import TrackHubSubmission
42 from htsworkflow.submission.condorfastq import CondorFastqExtract
43
44 logger = logging.getLogger(__name__)
45
46 def main(cmdline=None):
47     parser = make_parser()
48     opts, args = parser.parse_args(cmdline)
49     submission_uri = None
50
51     if opts.debug:
52         logging.basicConfig(level = logging.DEBUG )
53     elif opts.verbose:
54         logging.basicConfig(level = logging.INFO )
55     else:
56         logging.basicConfig(level = logging.WARNING )
57
58     apidata = api.make_auth_from_opts(opts, parser)
59
60     model = get_model(opts.model, opts.db_path)
61     mapper = None
62     if opts.name:
63         mapper = TrackHubSubmission(opts.name,  model, host=opts.host)
64         submission_uri = get_submission_uri(opts.name)
65
66
67     if opts.load_rdf is not None:
68         if submission_uri is None:
69             parser.error("Please specify the submission name")
70         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
71
72     results = ResultMap()
73     for a in args:
74         if os.path.exists(a):
75             results.add_results_from_file(a)
76         else:
77             logger.warn("File %s doesn't exist.", a)
78
79     if opts.make_link_tree_from is not None:
80         results.make_tree_from(opts.make_link_tree_from, link=True)
81
82     if opts.copy_tree_from is not None:
83         results.make_tree_from(opts.copy_tree_from, link=False)
84
85     if opts.fastq:
86         logger.info("Building fastq extraction scripts")
87         flowcells = os.path.join(opts.sequence, 'flowcells')
88         extractor = CondorFastqExtract(opts.host, flowcells,
89                                        model=opts.model,
90                                        force=opts.force)
91         extractor.create_scripts(results)
92
93     if opts.scan_submission:
94         if opts.name is None:
95             parser.error("Please define a submission name")
96         mapper.scan_submission_dirs(results)
97
98     if opts.make_hub:
99         make_hub(mapper, results, opts.make_hub)
100
101     if opts.make_manifest:
102         make_manifest(mapper, results, opts.make_manifest)
103         
104     if opts.sparql:
105         sparql_query(model, opts.sparql)
106
107     if opts.print_rdf:
108         writer = get_serializer()
109         print writer.serialize_model_to_string(model)
110
111
112 def make_hub(mapper, results, filename=None):
113     trackdb = mapper.make_hub(results)
114
115     if filename is None or filename == '-':
116         sys.stdout.write(trackdb)
117     else:
118         with open('trackDb.txt', 'w') as trackstream:
119             trackstream.write(trackdb)
120
121 def make_manifest(mapper, results, filename=None):
122     manifest = mapper.make_manifest(results)
123
124     if filename is None or filename == '-':
125         sys.stdout.write(manifest)
126     else:
127         with open(filename, 'w') as mainifeststream:
128             mainifeststream.write(manifest)
129         
130 def make_parser():
131     parser = OptionParser()
132
133     model = OptionGroup(parser, 'model')
134     model.add_option('--name', help="Set submission name")
135     model.add_option('--db-path', default=None,
136                      help="set rdf database path")
137     model.add_option('--model', default=None,
138       help="Load model database")
139     model.add_option('--load-rdf', default=None,
140       help="load rdf statements into model")
141     model.add_option('--sparql', default=None, help="execute sparql query")
142     model.add_option('--print-rdf', action="store_true", default=False,
143       help="print ending model state")
144     parser.add_option_group(model)
145     # commands
146     commands = OptionGroup(parser, 'commands')
147     commands.add_option('--make-link-tree-from',
148                       help="create directories & link data files",
149                       default=None)
150     commands.add_option('--copy-tree-from',
151                       help="create directories & copy data files",
152                       default=None)
153     commands.add_option('--fastq', default=False, action="store_true",
154                         help="generate scripts for making fastq files")
155     commands.add_option('--scan-submission', default=False, action="store_true",
156                       help="Import metadata for submission into our model")
157     commands.add_option('--make-hub', default=None, 
158                         help='name the hub file or - for stdout to create it')
159     commands.add_option('--make-manifest', 
160                         help='name the manifest file name or - for stdout to create it', 
161                         default=None)
162
163
164     parser.add_option_group(commands)
165
166     parser.add_option('--force', default=False, action="store_true",
167                       help="Force regenerating fastqs")
168     parser.add_option('--daf', default=None, help='specify daf name')
169     parser.add_option('--library-url', default=None,
170                       help="specify an alternate source for library information")
171     # debugging
172     parser.add_option('--verbose', default=False, action="store_true",
173                       help='verbose logging')
174     parser.add_option('--debug', default=False, action="store_true",
175                       help='debug logging')
176
177     api.add_auth_options(parser)
178
179     return parser
180
181 if __name__ == "__main__":
182     main()