Add mode to connect to the DCC and make sure the files with a .upload file are actual...
[htsworkflow.git] / encode_submission / encode3.py
1 #!/usr/bin/env python
2 """Create a track hub
3 """
4 from __future__ import print_function, unicode_literals
5
6 import argparse
7 import fnmatch
8 from glob import glob
9 import json
10 import logging
11 import netrc
12 import os
13 from pprint import pprint, pformat
14 import shlex
15 from six.moves import StringIO
16 import stat
17 import sys
18 import time
19 import types
20 from zipfile import ZipFile
21
22 import RDF
23
24 from htsworkflow.util import api
25 from htsworkflow.util.rdfhelp import \
26     dafTermOntology, \
27     fromTypedNode, \
28     get_model, \
29     get_serializer, \
30     load_into_model, \
31     sparql_query, \
32     submissionOntology
33 from htsworkflow.submission.daf import get_submission_uri
34 from htsworkflow.submission.submission import list_submissions
35 from htsworkflow.submission.results import ResultMap
36 from htsworkflow.submission.condorfastq import CondorFastqExtract
37 from htsworkflow.submission.aws_submission import AWSSubmission
38 logger = logging.getLogger(__name__)
39
40 INDENTED = "  " + os.linesep
41
42 import django
43 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
44     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings.local'
45
46 def main(cmdline=None):
47     parser = make_parser()
48     args = parser.parse_args(cmdline)
49     submission_uri = None
50
51     from django.conf import settings
52
53     if args.debug:
54         settings.LOGGING['loggers']['htsworkflow']['level'] = 'DEBUG'
55     elif args.verbose:
56         settings.LOGGING['loggers']['htsworkflow']['level'] = 'INFO'
57
58     django.setup()
59
60     model = get_model(args.model, args.db_path)
61     submission_names = list(list_submissions(model))
62     name = args.name
63     if len(submission_names) == 0 and args.name is None:
64         parser.error("Please name this submission")
65     elif args.name and submission_names and args.name not in submission_names:
66         parser.error("{} is not in this model. Choose from: {}{}".format(
67             args.name,
68             os.linesep,
69             INDENTED.join(submission_names)))
70     elif args.name is None and len(submission_names) > 1:
71         parser.error("Please choose submission name from: {}{}".format(
72             os.linesep,
73             INDENTED.join(submission_names)))
74     elif len(submission_names) == 1:
75         name = submission_names[0]
76
77     if name:
78         submission_uri = get_submission_uri(name)
79         logger.info('Submission URI: %s', submission_uri)
80
81     mapper = AWSSubmission(name, model, encode_host=args.encoded, lims_host=args.host)
82
83     if args.load_rdf is not None:
84         if submission_uri is None:
85             parser.error("Please specify the submission name")
86         load_into_model(model, 'turtle', args.load_rdf, submission_uri)
87
88     results = ResultMap()
89     for a in args.libraries:
90         if os.path.exists(a):
91             results.add_results_from_file(a)
92         else:
93             logger.warn("File %s doesn't exist.", a)
94
95     if args.make_link_tree_from is not None:
96         results.make_tree_from(args.make_link_tree_from, link=True)
97
98     if args.copy_tree_from is not None:
99         results.make_tree_from(args.copy_tree_from, link=False)
100
101     if args.fastq:
102         logger.info("Building fastq extraction scripts")
103         flowcells = os.path.join(args.sequence, 'flowcells')
104         extractor = CondorFastqExtract(args.host, flowcells,
105                                        model=args.model,
106                                        compression=args.compression,
107                                        force=args.force)
108         extractor.create_scripts(results)
109
110     if args.scan_submission:
111         if name is None:
112             parser.error("Please define a submission name")
113         mapper.scan_submission_dirs(results)
114
115     if args.upload:
116         mapper.upload(results, args.dry_run)
117
118     if args.check_upload:
119         mapper.check_upload(results)
120
121     if args.sparql:
122         sparql_query(model, args.sparql)
123
124     if args.print_rdf:
125         writer = get_serializer()
126         print(writer.serialize_model_to_string(model))
127
128
129 def make_parser():
130     parser = argparse.ArgumentParser()
131
132     model = parser.add_argument_group('model')
133     model.add_argument('--name', help="Set submission name")
134     model.add_argument('--db-path', default=None,
135                      help="set rdf database path")
136     model.add_argument('--model', default=None,
137                      help="Load model database")
138     model.add_argument('--load-rdf', default=None,
139                      help="load rdf statements into model")
140     model.add_argument('--sparql', default=None, help="execute sparql query")
141     model.add_argument('--print-rdf', action="store_true", default=False,
142                      help="print ending model state")
143
144     # commands
145     commands = parser.add_argument_group('commands')
146     commands.add_argument('--make-link-tree-from',
147                         help="create directories & link data files",
148                         default=None)
149     commands.add_argument('--copy-tree-from',
150                         help="create directories & copy data files",
151                         default=None)
152     commands.add_argument('--fastq', default=False, action="store_true",
153                         help="generate scripts for making fastq files")
154     commands.add_argument('--scan-submission', default=False, action="store_true",
155                         help="cache md5 sums")
156     commands.add_argument('--upload', default=False, action="store_true",
157                         help="Upload files")
158     commands.add_argument('--check-upload', default=False, action='store_true',
159                           help='check to see files are actually uploaded')
160
161     parser.add_argument('--force', default=False, action="store_true",
162                       help="Force regenerating fastqs")
163     parser.add_argument('--compression', default=None,
164                       choices=['gzip'],
165                       help='select compression type for fastq files')
166     parser.add_argument('--library-url', default=None,
167                       help="specify an alternate source for library information")
168     parser.add_argument('--encoded', default='www.encodeproject.org',
169                       help='base url for talking to encode server')
170     parser.add_argument('--dry-run', default=False, action='store_true',
171                       help='avoid making changes to encoded')
172     
173     # debugging
174     parser.add_argument('--verbose', default=False, action="store_true",
175                       help='verbose logging')
176     parser.add_argument('--debug', default=False, action="store_true",
177                       help='debug logging')
178
179     api.add_auth_options(parser)
180      
181     parser.add_argument('libraries', nargs='+',
182                         help='mapping of library id to directory to be processed')
183
184     return parser
185
186 if __name__ == "__main__":
187     main()