This might actually generate soft file with raw & supplemental data.
[htsworkflow.git] / encode_submission / geo_gather.py
1 #!/usr/bin/env python
2 from ConfigParser import SafeConfigParser
3 import fnmatch
4 from glob import glob
5 import json
6 import logging
7 import netrc
8 from optparse import OptionParser, OptionGroup
9 import os
10 from pprint import pprint, pformat
11 import shlex
12 from StringIO import StringIO
13 import stat
14 import sys
15 import time
16 import types
17 import urllib
18 import urllib2
19 import urlparse
20 from zipfile import ZipFile
21
22 import RDF
23
24 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
25     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
26
27
28 from htsworkflow.util import api
29 from htsworkflow.util.rdfhelp import \
30      dafTermOntology, \
31      fromTypedNode, \
32      get_model, \
33      get_serializer, \
34      load_into_model, \
35      sparql_query, \
36      submissionOntology
37 from htsworkflow.submission.daf import get_submission_uri
38 from htsworkflow.submission.results import ResultMap
39 from htsworkflow.submission.geo import GEOSubmission
40 from htsworkflow.submission.condorfastq import CondorFastqExtract
41
42 logger = logging.getLogger(__name__)
43
44 def main(cmdline=None):
45     parser = make_parser()
46     opts, args = parser.parse_args(cmdline)
47     submission_uri = None
48
49     if opts.debug:
50         logging.basicConfig(level = logging.DEBUG )
51     elif opts.verbose:
52         logging.basicConfig(level = logging.INFO )
53     else:
54         logging.basicConfig(level = logging.WARNING )
55
56     apidata = api.make_auth_from_opts(opts, parser)
57
58     model = get_model(opts.model, opts.db_path)
59     mapper = None
60     if opts.name:
61         mapper = GEOSubmission(opts.name,  model, host=opts.host)
62         submission_uri = get_submission_uri(opts.name)
63
64
65     if opts.load_rdf is not None:
66         if submission_uri is None:
67             parser.error("Please specify the submission name")
68         load_into_model(model, 'turtle', opts.load_rdf, submission_uri)
69
70     results = ResultMap()
71     for a in args:
72         results.add_results_from_file(a)
73
74     if opts.make_tree_from is not None:
75         results.make_tree_from(opts.make_tree_from)
76
77     if opts.fastq:
78         logger.info("Building fastq extraction scripts")
79         flowcells = os.path.join(opts.sequence, 'flowcells')
80         extractor = CondorFastqExtract(opts.host, flowcells,
81                                        model=opts.model,
82                                        force=opts.force)
83         extractor.create_scripts(results)
84
85     if opts.scan_submission:
86         if opts.name is None:
87             parser.error("Please define a submission name")
88         mapper.scan_submission_dirs(results)
89
90     if opts.make_soft:
91         mapper.make_soft(results)
92
93     if opts.sparql:
94         sparql_query(model, opts.sparql)
95
96     if opts.print_rdf:
97         writer = get_serializer()
98         print writer.serialize_model_to_string(model)
99
100
101 def make_parser():
102     parser = OptionParser()
103
104     model = OptionGroup(parser, 'model')
105     model.add_option('--name', help="Set submission name")
106     model.add_option('--db-path', default=None,
107                      help="set rdf database path")
108     model.add_option('--model', default=None,
109       help="Load model database")
110     model.add_option('--load-rdf', default=None,
111       help="load rdf statements into model")
112     model.add_option('--sparql', default=None, help="execute sparql query")
113     model.add_option('--print-rdf', action="store_true", default=False,
114       help="print ending model state")
115     parser.add_option_group(model)
116     # commands
117     commands = OptionGroup(parser, 'commands')
118     commands.add_option('--make-tree-from',
119                       help="create directories & link data files",
120                       default=None)
121     commands.add_option('--fastq', default=False, action="store_true",
122                         help="generate scripts for making fastq files")
123     commands.add_option('--scan-submission', default=False, action="store_true",
124                       help="Import metadata for submission into our model")
125     commands.add_option('--make-soft', help='make the soft file', default=False,
126                       action="store_true")
127
128     parser.add_option_group(commands)
129
130     parser.add_option('--force', default=False, action="store_true",
131                       help="Force regenerating fastqs")
132     parser.add_option('--daf', default=None, help='specify daf name')
133     parser.add_option('--library-url', default=None,
134                       help="specify an alternate source for library information")
135     # debugging
136     parser.add_option('--verbose', default=False, action="store_true",
137                       help='verbose logging')
138     parser.add_option('--debug', default=False, action="store_true",
139                       help='debug logging')
140
141     api.add_auth_options(parser)
142
143     return parser
144
145
146 if __name__ == "__main__":
147     main()