1b7ae045281a07152623c34d5190ee48fd6a56bd
[htsworkflow.git] / encode_submission / encode3.py
1 #!/usr/bin/env python
2 """Create a track hub
3 """
4 from __future__ import print_function, unicode_literals
5
6 import argparse
7 import fnmatch
8 from glob import glob
9 import json
10 import logging
11 import netrc
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from six.moves import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 from zipfile import ZipFile
21
22 import RDF
23
24 from htsworkflow.util import api
25 from htsworkflow.util.rdfhelp import \
26     dafTermOntology, \
27     fromTypedNode, \
28     get_model, \
29     get_serializer, \
30     load_into_model, \
31     sparql_query, \
32     submissionOntology
33 from htsworkflow.submission.daf import get_submission_uri
34 from htsworkflow.submission.submission import list_submissions
35 from htsworkflow.submission.results import ResultMap
36 from htsworkflow.submission.condorfastq import CondorFastqExtract
37 from htsworkflow.submission.aws_submission import AWSSubmission
38 logger = logging.getLogger(__name__)
39
40 INDENTED = "  " + os.linesep
41
42 import django
43 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
44     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
45
46 def main(cmdline=None):
47     parser = make_parser()
48     args = parser.parse_args(cmdline)
49     submission_uri = None
50
51     from django.conf import settings
52
53     if args.debug:
54         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
55     elif args.verbose:
56         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
57
58     django.setup()
59
60     model = get_model(args.model, args.db_path)
61     submission_names = list(list_submissions(model))
62     name = args.name
63     if len(submission_names) == 0 and args.name is None:
64         parser.error("Please name this submission")
65     elif args.name and submission_names and args.name not in submission_names:
66         parser.error("{} is not in this model. Choose from: {}{}".format(
67             args.name,
68             os.linesep,
69             INDENTED.join(submission_names)))
70     elif args.name is None and len(submission_names) > 1:
71         parser.error("Please choose submission name from: {}{}".format(
72             os.linesep,
73             INDENTED.join(submission_names)))
74     elif len(submission_names) == 1:
75         name = submission_names[0]
76
77     if name:
78         submission_uri = get_submission_uri(name)
79         logger.info('Submission URI: %s', submission_uri)
80
81     mapper = AWSSubmission(name, model, encode_host=args.encoded, lims_host=args.host)
82
83     if args.load_rdf is not None:
84         if submission_uri is None:
85             parser.error("Please specify the submission name")
86         load_into_model(model, 'turtle', args.load_rdf, submission_uri)
87
88     results = ResultMap()
89     for a in args.libraries:
90         if os.path.exists(a):
91             results.add_results_from_file(a)
92         else:
93             logger.warn("File %s doesn't exist.", a)
94
95     if args.make_link_tree_from is not None:
96         results.make_tree_from(args.make_link_tree_from, link=True)
97
98     if args.copy_tree_from is not None:
99         results.make_tree_from(args.copy_tree_from, link=False)
100
101     if args.fastq:
102         logger.info("Building fastq extraction scripts")
103         flowcells = os.path.join(args.sequence, 'flowcells')
104         extractor = CondorFastqExtract(args.host, flowcells,
105                                        model=args.model,
106                                        compression=args.compression,
107                                        force=args.force)
108         extractor.create_scripts(results)
109
110     if args.scan_submission:
111         if name is None:
112             parser.error("Please define a submission name")
113         mapper.scan_submission_dirs(results)
114
115     if args.upload:
116         mapper.upload(results, args.dry_run)
117
118     if args.sparql:
119         sparql_query(model, args.sparql)
120
121     if args.print_rdf:
122         writer = get_serializer()
123         print(writer.serialize_model_to_string(model))
124
125
126 def make_parser():
127     parser = argparse.ArgumentParser()
128
129     model = parser.add_argument_group('model')
130     model.add_argument('--name', help="Set submission name")
131     model.add_argument('--db-path', default=None,
132                      help="set rdf database path")
133     model.add_argument('--model', default=None,
134                      help="Load model database")
135     model.add_argument('--load-rdf', default=None,
136                      help="load rdf statements into model")
137     model.add_argument('--sparql', default=None, help="execute sparql query")
138     model.add_argument('--print-rdf', action="store_true", default=False,
139                      help="print ending model state")
140
141     # commands
142     commands = parser.add_argument_group('commands')
143     commands.add_argument('--make-link-tree-from',
144                         help="create directories & link data files",
145                         default=None)
146     commands.add_argument('--copy-tree-from',
147                         help="create directories & copy data files",
148                         default=None)
149     commands.add_argument('--fastq', default=False, action="store_true",
150                         help="generate scripts for making fastq files")
151     commands.add_argument('--scan-submission', default=False, action="store_true",
152                         help="cache md5 sums")
153     commands.add_argument('--upload', default=False, action="store_true",
154                         help="Upload files")
155
156     parser.add_argument('--force', default=False, action="store_true",
157                       help="Force regenerating fastqs")
158     parser.add_argument('--compression', default=None,
159                       choices=['gzip'],
160                       help='select compression type for fastq files')
161     parser.add_argument('--library-url', default=None,
162                       help="specify an alternate source for library information")
163     parser.add_argument('--encoded', default='www.encodeproject.org',
164                       help='base url for talking to encode server')
165     parser.add_argument('--dry-run', default=False, action='store_true',
166                       help='avoid making changes to encoded')
167     
168     # debugging
169     parser.add_argument('--verbose', default=False, action="store_true",
170                       help='verbose logging')
171     parser.add_argument('--debug', default=False, action="store_true",
172                       help='debug logging')
173
174     api.add_auth_options(parser)
175      
176     parser.add_argument('libraries', nargs='+',
177                         help='mapping of library id to directory to be processed')
178
179     return parser
180
181 if __name__ == "__main__":
182     main()