X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Fsequence.cpp;h=2e845c67bdfaa84d5843e0b05a5e3a766e363eaf;hp=05ec0a928dfc63390421f87927444b4179332063;hb=02adcab9918657891638e68580b8b88e6d935875;hpb=7d16486e23b3d94c49986b082b9de1b508fd6183 diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 05ec0a9..2e845c6 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -710,10 +710,97 @@ Sequence::save(fs::fstream &save_file) //save_file.close(); } -void -Sequence::load_museq(fs::path load_file_path, int seq_num) +//void +//Sequence::load_museq(fs::path load_file_path, int seq_num) +//{ +// fs::fstream load_file; +// std::string file_data_line; +// int seq_counter; +// //annot an_annot; +// int annot_begin; +// int annot_end; +// std::string annot_name; +// std::string annot_type; +// +// std::string::size_type space_split_i; +// std::string annot_value; +// +// annotation_list.reset(new SeqSpanRefList); +// +// load_file.open(load_file_path, std::ios::in); +// +// seq_counter = 0; +// // search for the seq_num-th sequence +// while ( (!load_file.eof()) && (seq_counter < seq_num) ) +// { +// getline(load_file,file_data_line); +// if (file_data_line == "") +// seq_counter++; +// } +// getline(load_file, file_data_line); +// // looks like the sequence is written as a single line +// set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand); +// getline(load_file, file_data_line); +// getline(load_file, file_data_line); +// if (file_data_line == "") +// { +// getline(load_file, file_data_line); +// species = file_data_line; +// while ( (!load_file.eof()) && (file_data_line != "") ) +// { +// getline(load_file,file_data_line); +// if ((file_data_line != "") && (file_data_line != "")) +// { +// // need to get 4 values...almost same code 4 times... +// // get annot start index +// space_split_i = file_data_line.find(" "); +// annot_value = file_data_line.substr(0,space_split_i); +// annot_begin = atoi (annot_value.c_str()); +// file_data_line = file_data_line.substr(space_split_i+1); +// // get annot end index +// space_split_i = file_data_line.find(" "); +// annot_value = file_data_line.substr(0,space_split_i); +// annot_end = atoi (annot_value.c_str()); +// +// if (space_split_i == std::string::npos) // no entry for type or name +// { +// std::cout << "seq, annots - no type or name\n"; +// annot_name = ""; +// annot_type = ""; +// } +// else // else get annot type +// { +// file_data_line = file_data_line.substr(space_split_i+1); +// space_split_i = file_data_line.find(" "); +// annot_value = file_data_line.substr(0,space_split_i); +// //an_annot.type = annot_value; +// annot_type = annot_value; +// if (space_split_i == std::string::npos) // no entry for name +// { +// std::cout << "seq, annots - no name\n"; +// annot_name = ""; +// } +// else // get annot name +// { +// file_data_line = file_data_line.substr(space_split_i+1); +// space_split_i = file_data_line.find(" "); +// annot_value = file_data_line.substr(0,space_split_i); +// // this seems like its wrong? +// annot_type = annot_value; +// } +// } +// add_annotation(annot_name, annot_type, annot_begin, annot_end); +// } +// //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end +// // << "-->" << an_annot.type << "::" << an_annot.name << std::endl; +// } +// } +// load_file.close(); +//} + +SequenceRef Sequence::load_museq(boost::filesystem::fstream& load_file) { - fs::fstream load_file; + boost::shared_ptr seq(new Sequence); std::string file_data_line; int seq_counter; //annot an_annot; @@ -725,27 +812,34 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) std::string::size_type space_split_i; std::string annot_value; - annotation_list.reset(new SeqSpanRefList); - - load_file.open(load_file_path, std::ios::in); + //seq->annotation_list.reset(new SeqSpanRefList); seq_counter = 0; - // search for the seq_num-th sequence + // search for the next sequence + int seq_num = 1; while ( (!load_file.eof()) && (seq_counter < seq_num) ) { getline(load_file,file_data_line); if (file_data_line == "") seq_counter++; } + + // Could not find next sequence + if (load_file.eof()) + { + seq.reset(); + return seq; + } + getline(load_file, file_data_line); // looks like the sequence is written as a single line - set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand); + seq->set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand); getline(load_file, file_data_line); getline(load_file, file_data_line); if (file_data_line == "") { getline(load_file, file_data_line); - species = file_data_line; + seq->set_species(file_data_line); while ( (!load_file.eof()) && (file_data_line != "") ) { getline(load_file,file_data_line); @@ -789,13 +883,14 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) annot_type = annot_value; } } - add_annotation(annot_name, annot_type, annot_begin, annot_end); + seq->add_annotation(annot_name, annot_type, annot_begin, annot_end); } //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end // << "-->" << an_annot.type << "::" << an_annot.name << std::endl; } } - load_file.close(); + //load_file.close(); + return seq; }