1 /*==========================================================================
2 SeqAn - The Library for Sequence Analysis
4 ============================================================================
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 ============================================================================
18 $Id: file_format_fasta_align.h,v 1.1 2008/08/25 16:20:03 langmead Exp $
19 ==========================================================================*/
21 #ifndef SEQAN_HEADER_FILE_FASTA_ALIGN_H
22 #define SEQAN_HEADER_FILE_FASTA_ALIGN_H
24 namespace SEQAN_NAMESPACE_MAIN
27 //////////////////////////////////////////////////////////////////////////////
28 // File Formats - Fasta alignment format
29 //////////////////////////////////////////////////////////////////////////////
31 //forward declarations
38 //////////////////////////////////////////////////////////////////////////////
41 .Tag.File Format.tag.Fasta alignment:
42 FASTA alignment file format for sequences.
44 struct TagFastaAlign_;
45 typedef Tag<TagFastaAlign_> const FastaAlign;
48 /////////////////////////////////////////////////////////////////////////
50 template <typename TFile, typename TSize>
51 void _fasta_align_scan_line(TFile & file, TSize & count) {
54 SEQAN_ASSERT(!_streamEOF(file))
57 typename Value<TFile>::Type c = _streamGet(file);
59 if (_streamEOF(file)) return;
60 if (c == '\n') return;
62 if ((c != '\r') && (c!='-')) {
68 //////////////////////////////////////////////////////////////////////////////
70 //////////////////////////////////////////////////////////////////////////////
71 template <typename TFile, typename TSource, typename TSpec>
72 void read(TFile & file, Align<TSource, TSpec> & align, FastaAlign) {
75 SEQAN_ASSERT(!_streamEOF(file))
77 typedef typename Value<TSource>::Type TSourceValue;
78 typedef typename Size<TSourceValue>::Type TSize;
79 TSize limit = supremumValue<TSize>();
81 //Determine begin position, end position and length of each sequence
82 String<TSize> beg_end_length;
84 typename Position<TFile>::Type begin_pos;
85 typename Position<TFile>::Type end_pos;
86 typename Value<TFile>::Type c;
89 while (!_streamEOF(file)) {
90 begin_pos = _streamTellG(file);
92 SEQAN_ASSERT(!_streamEOF(file))
99 _fasta_align_scan_line(file, count);
100 begin_pos = _streamTellG(file);
102 } else { //If no id first letter belongs to sequence
108 _fasta_align_scan_line(file, count);
110 typename Value<TFile>::Type c = _streamGet(file);
112 _streamSeek2G(file, -1);
113 end_pos = _streamTellG(file);
116 if (_streamEOF(file)) {
117 end_pos = _streamTellG(file);
120 if ((c != '\n') && (c != '\r') && (c!='-')) {
128 append(beg_end_length, begin_pos);
129 append(beg_end_length, end_pos);
130 append(beg_end_length, count);
133 // Resize alignment data structure
134 TSize numRows=length(beg_end_length) / 3;
135 resize(rows(align), numRows); //rows
137 typedef Align<TSource, TSpec> TAlign;
138 typedef typename Row<TAlign>::Type TRow;
140 for(TSize i=0;i<numRows;++i) {
141 TSize begin = beg_end_length[i*3];
142 // TSize end = beg_end_length[i*3+1];
143 count = beg_end_length[i*3+2];
147 createSource(row(align,i));
148 resize(source(row(align,i)),count);
149 if (length(source(row(align,i))) < count) {
150 count = length(source(row(align,i)));
152 setSourceEndPosition(row(align,i),count);
155 _streamSeekG(file, begin);
157 typename Position<TSource>::Type pos;
158 for (pos = 0; pos < count; ) {
159 c = _streamGet(file);
160 if ((c != '\n') && (c != '\r') && (c != '-')) {
161 source(row(align,i))[pos] = c;
165 insertGap(row(align,i), toViewPosition(row(align,i), pos));
170 _streamSeekG(file, 0);
173 //////////////////////////////////////////////////////////////////////////////
175 //////////////////////////////////////////////////////////////////////////////
177 template <typename TFile, typename TStringContainer>
178 void readIDs(TFile& file, TStringContainer& ids, FastaAlign) {
182 SEQAN_ASSERT(!_streamEOF(file))
184 typedef typename Value<TStringContainer>::Type TString;
185 typename Position<TFile>::Type start_pos;
186 typename Value<TFile>::Type c;
191 c = _streamGet(file);
192 while ((!_streamEOF(file)) && (c != '>')) c = _streamGet(file);
193 if (!_streamEOF(file)) {
194 start_pos = _streamTellG(file);
195 typename Size<TString>::Type count = 0;
196 _fasta_align_scan_line(file, count);
197 if (! count) clear(id);
200 if (length(id) < count) {
203 _streamSeekG(file, start_pos);
204 for (typename Position<TString>::Type pos = 0; pos<count; ++pos) {
205 id[pos] = _streamGet(file);
208 appendValue(ids, id);
213 _streamSeekG(file, 0);
216 //////////////////////////////////////////////////////////////////////////////
218 //////////////////////////////////////////////////////////////////////////////
220 //Fasta file records have no meta data
222 template <typename TFile, typename TMeta>
223 void readMeta(TFile & file, TMeta & meta, FastaAlign) {
229 //////////////////////////////////////////////////////////////////////////////
231 //////////////////////////////////////////////////////////////////////////////
232 template <typename TFile>
233 void goNext(TFile & file, FastaAlign) {
235 SEQAN_ASSERT(!_streamEOF(file))
241 //////////////////////////////////////////////////////////////////////////////
243 //////////////////////////////////////////////////////////////////////////////
245 template <typename TFile, typename TStringContainer, typename TSource, typename TSpec>
246 void _write_impl(TFile& file, Align<TSource, TSpec>& align, TStringContainer& ids, FastaAlign) {
249 typedef Align<TSource, TSpec> const TAlign;
250 typedef typename Row<TAlign>::Type TRow;
251 typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
252 typedef typename Position<TAlign>::Type TPosition;
253 TRowsPosition row_count = length(rows(align));
255 for(TRowsPosition i=0;i<row_count;++i) {
256 TRow & row_ = row(align, i);
258 typedef typename Iterator<typename Row<TAlign>::Type const, Standard>::Type TIter;
259 TIter begin_ = iter(row_, beginPosition(cols(align)));
260 TIter end_ = iter(row_, endPosition(cols(align)));
262 _streamPut(file, '>');
263 _streamWrite(file, getValue(ids,i));
264 _streamPut(file, '\n');
267 while(begin_ != end_) {
269 _streamPut(file, '\n');
272 if (isGap(begin_)) _streamPut(file, gapValue<char>());
273 else _streamPut(file, getValue(source(begin_)));
277 _streamPut(file, '\n');
281 //____________________________________________________________________________
283 template <typename TFile, typename TSource, typename TSpec>
284 void write(TFile & file, Align<TSource, TSpec>& align, FastaAlign) {
286 _write_impl(file, align, String<String<char> >(), FastaAlign());
289 //____________________________________________________________________________
291 template <typename TFile, typename TStringContainer, typename TSource, typename TSpec>
292 void write(TFile & file, Align<TSource, TSpec> & align, TStringContainer& ids, FastaAlign) {
294 _write_impl(file, align, ids, FastaAlign());
298 //VisualC++ const array bug workaround
299 template <typename TFile, typename TStringContainer, typename TSource, typename TSpec>
300 void write(TFile & file, Align<TSource, TSpec>* align, TStringContainer & ids, FastaAlign) {
302 _write_impl(file, align, ids, FastaAlign());
305 //____________________________________________________________________________
307 template <typename TFile, typename TStringContainer, typename TSource, typename TSpec, typename TMeta>
308 void write(TFile & file, Align<TSource, TSpec> & align, TStringContainer& ids, TMeta &, FastaAlign) {
310 _write_impl(file, align, ids, FastaAlign());
315 //////////////////////////////////////////////////////////////////////////////
316 } //namespace SEQAN_NAMESPACE_MAIN
318 //////////////////////////////////////////////////////////////////////////////
320 #endif //#ifndef SEQAN_HEADER_...