if not args.quantifications:
parser.error("Please list files to extract quantifications from")
- output_headers, matrix = load_matrixes(geneid_map,
- args.quantifications,
+ output_headers, matrix = load_matrixes(args.quantifications,
args.column)
- write_merged_matrix(args.output, output_headers, matrix, args.no_zeros)
+ if args.output:
+ outstream = open(args.output, 'wt')
+ else:
+ outstream = sys.stdout
+ write_merged_matrix(outstream,
+ geneid_map,
+ output_headers,
+ matrix,
+ args.no_zeros)
-def load_matrixes(geneid_map, quantifications, column_name):
+ if args.output:
+ outstream.close()
+
+def load_matrixes(quantifications, column_name):
"""Load a quantification from a list of quantification files.
This will also convert through a gene id to gene_name map.
if a gene name isn't found, it will default to the gene id.
Arguments:
- geneid_map (dict): mapping between gene ids and gene names
quantifications (list): list of filenames to load from
+ column_name (str): what column we should be looking for
Returns:
output_headers (list): list of column headers for matrix
for line in instream:
columns = line.split('\t')
- key = geneid_map.get(columns[0], columns[0])
+ key = columns[0]
matrix.setdefault(key, []).append(columns[column_to_use])
logger.info("Loaded %d matrixes in %d seconds",
return output_headers, matrix
-def write_merged_matrix(output, headers, matrix, drop_zeros=False):
+def write_merged_matrix(outstream, geneid_map, headers, matrix,
+ drop_zeros=False):
"""Save matrix
Arguments:
- output (str): output filename or None for stdout
+ outstream (stream): output to write to
+ geneid_map (dict): gene id to gene name mapping
headers (list): list of matrix column headers)
matrix (dict): gene_name: list of interested
drop_zeros (bool): should we drop rows that are all zero?
"""
logger.info("Writing matrix")
- if output:
- outstream = open(output, 'wt')
- else:
- outstream = sys.stdout
outstream.write('\t'.join(headers))
outstream.write(os.linesep)
break
else:
continue
+
+ label = []
+ gene_name = geneid_map.get(key, None)
+ if gene_name:
+ label.append(gene_name)
+ label.append(key)
- outstream.write(key)
+ outstream.write('-'.join(label))
outstream.write('\t')
outstream.write('\t'.join(matrix[key]))
outstream.write(os.linesep)
-
- if outstream != sys.stdout:
- outstream.close()
def make_parser():