X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=blobdiff_plain;f=htsworkflow%2Fsubmission%2Fcondorfastq.py;h=37d60edf9572ff8c51e6328b88ecc53467496ae8;hp=b6c22348f581f5148eeaa33cacfafe65f6ebc963;hb=4db6d9f86b96b7b289dc9e6a15b1379418c865ea;hpb=3101ecf2c080bff9e424ad11f4e153546d6723b5 diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py index b6c2234..37d60ed 100644 --- a/htsworkflow/submission/condorfastq.py +++ b/htsworkflow/submission/condorfastq.py @@ -27,11 +27,16 @@ import RDF LOGGER = logging.getLogger(__name__) +COMPRESSION_EXTENSIONS = { + None: '', + 'gzip': '.gz' +} class CondorFastqExtract(object): def __init__(self, host, sequences_path, log_path='log', model=None, + compression=None, force=False): """Extract fastqs from results archive @@ -40,16 +45,19 @@ class CondorFastqExtract(object): apidata (dict): id & key to post to the server sequences_path (str): root of the directory tree to scan for files log_path (str): where to put condor log files + compression (str): one of 'gzip', 'bzip2' force (bool): do we force overwriting current files? """ self.host = host self.model = get_model(model) self.sequences_path = sequences_path self.log_path = log_path + self.compression=compression self.force = force LOGGER.info("CondorFastq host={0}".format(self.host)) LOGGER.info("CondorFastq sequences_path={0}".format(self.sequences_path)) LOGGER.info("CondorFastq log_path={0}".format(self.log_path)) + LOGGER.info("Compression {0}".format(self.compression)) def create_scripts(self, result_map ): """ @@ -231,6 +239,7 @@ WHERE { 'lane': seq.lane_number, 'read': seq.read, 'cycle': seq.cycle, + 'compression_extension': COMPRESSION_EXTENSIONS[self.compression], 'is_paired': seq.ispaired } @@ -291,9 +300,14 @@ WHERE { for source in sources: paths.append(source.path) paths.sort() + compression_argument = '' + if self.compression: + compression_argument = '--'+self.compression + return { 'pyscript': desplit_fastq.__file__, 'target': target_pathname, + 'compression': compression_argument, 'sources': paths, 'ispaired': sources[0].ispaired, }