+ def __init__(self, root_url, apidata):
+ self.root_url = root_url
+ self.apidata = apidata
+
+ self.lib_cache = {}
+ # ma is "map algorithm"
+ ma = 'TH1014'
+
+ self.patterns = [
+ ('*.bai', None),
+ ('*.bam', self._guess_bam_view),
+ ('*.splices.bam', 'Splices'),
+ ('*.jnct', 'Junctions'),
+ ('*.plus.bigwig', 'PlusSignal'),
+ ('*.minus.bigwig', 'MinusSignal'),
+ ('*.bigwig', 'Signal'),
+ ('*.tar.bz2', None),
+ ('*.condor', None),
+ ('*.daf', None),
+ ('*.ddf', None),
+ ('*denovo.genes.expr', 'GeneDeNovo'),
+ ('*denovo.transcripts.expr','TranscriptDeNovo'),
+ ('*novel.genes.expr', 'GeneDeNovo'),
+ ('*novel.transcripts.expr', 'TranscriptDeNovo'),
+ ('*genes.expr', 'GeneFPKM'),
+ ('*transcripts.expr', 'TranscriptFPKM'),
+ ('*transcript.expr', 'TranscriptFPKM'),
+ ('*_r1.fastq', 'FastqRd1'),
+ ('*_r2.fastq', 'FastqRd2'),
+ ('*.fastq', 'Fastq'),
+ ('*.gtf', 'GeneModel'),
+ ('*.ini', None),
+ ('*.log', None),
+ ('*.stats.txt', 'InsLength'),
+ ('*.srf', None),
+ ('*.wig', None),
+ ('*.zip', None),
+ ]
+
+ self.views = {
+ None: {"MapAlgorithm": "NA"},
+ "Paired": {"MapAlgorithm": ma},
+ "Single": {"MapAlgorithm": ma},
+ "Splices": {"MapAlgorithm": ma},
+ "Junctions": {"MapAlgorithm": ma},
+ "PlusSignal": {"MapAlgorithm": ma},
+ "MinusSignal": {"MapAlgorithm": ma},
+ "Signal": {"MapAlgorithm": ma},
+ "GeneDeNovo": {"MapAlgorithm": ma},
+ "TranscriptDeNovo": {"MapAlgorithm": ma},
+ "GeneDeNovo": {"MapAlgorithm": ma},
+ "TranscriptDeNovo": {"MapAlgorithm": ma},
+ "GeneFPKM": {"MapAlgorithm": ma},
+ "TranscriptFPKM": {"MapAlgorithm": ma},
+ "TranscriptFPKM": {"MapAlgorithm": ma},
+ "FastqRd1": {"MapAlgorithm": "NA", "type": "fastq"},
+ "FastqRd2": {"MapAlgorithm": "NA", "type": "fastq"},
+ "Fastq": {"MapAlgorithm": "NA", "type": "fastq" },
+ "GeneModel": {"MapAlgorithm": ma},
+ "InsLength": {"MapAlgorithm": ma},
+ }
+ # view name is one of the attributes
+ for v in self.views.keys():
+ self.views[v]['view'] = v
+
+ def find_attributes(self, pathname, lib_id):
+ """Looking for the best extension
+ The 'best' is the longest match
+
+ :Args:
+ filename (str): the filename whose extention we are about to examine
+ """
+ path, filename = os.path.splitext(pathname)
+ if not self.lib_cache.has_key(lib_id):
+ self.lib_cache[lib_id] = get_library_info(self.root_url,
+ self.apidata, lib_id)
+
+ lib_info = self.lib_cache[lib_id]
+ if lib_info['cell_line'].lower() == 'unknown':
+ logging.warn("Library %s missing cell_line" % (lib_id,))
+ attributes = {
+ 'cell': lib_info['cell_line'],
+ 'replicate': lib_info['replicate'],
+ }
+ is_paired = self._is_paired(lib_info)
+
+ if is_paired:
+ attributes.update(self.get_paired_attributes(lib_info))
+ else:
+ attributes.update(self.get_single_attributes(lib_info))
+
+ for pattern, view in self.patterns:
+ if fnmatch.fnmatch(pathname, pattern):
+ if callable(view):
+ view = view(is_paired=is_paired)
+
+ attributes.update(self.views[view])
+ attributes["extension"] = pattern
+ return attributes
+
+
+ def _guess_bam_view(self, is_paired=True):
+ """Guess a view name based on library attributes
+ """
+ if is_paired:
+ return "Paired"
+ else:
+ return "Align"