report filename for invalid fasta files
authorDiane Trout <diane@caltech.edu>
Mon, 30 Oct 2006 20:25:21 +0000 (20:25 +0000)
committerDiane Trout <diane@caltech.edu>
Mon, 30 Oct 2006 20:25:21 +0000 (20:25 +0000)
ticket:158

alg/sequence.cpp
alg/test/test_sequence.cpp
examples/seq/broken.fa [new file with mode: 0644]

index 7538e4649cb2852d468e1549a53469d60a5c94b1..004f096a5c519ee55048db93bd9f661000d2484d 100644 (file)
@@ -193,6 +193,11 @@ void Sequence::load_fasta(fs::path file_path, alphabet_ref a,
       errormsg << file_path.native_file_string()
                << " did not have any fasta sequences" << std::endl;
       throw sequence_empty_file_error(errormsg.str());
+    } catch(sequence_invalid_load_error e) {
+      std::ostringstream msg;
+      msg << file_path.native_file_string();
+      msg << " " << e.what();
+      throw sequence_invalid_load_error(msg.str());
     }
   }
 }
@@ -210,6 +215,7 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
 {
   std::string file_data_line;
   int header_counter = 0;
+  size_t line_counter = 0;
   bool read_seq = true;
   std::string rev_comp;
   std::string sequence_raw;
@@ -224,6 +230,7 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
   while ( (!data_file.eof()) && (header_counter < seq_num) )
   {
     multiplatform_getline(data_file, file_data_line);
+    ++line_counter;
     if (file_data_line.substr(0,1) == ">")
       header_counter++;
   }
@@ -235,6 +242,7 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
 
     while ( !data_file.eof() && read_seq ) {
       multiplatform_getline(data_file,file_data_line);
+      ++line_counter;
       if (file_data_line.substr(0,1) == ">")
         read_seq = false;
       else {
@@ -245,7 +253,10 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
            if(alpha.exists(*line_i)) {
              sequence_raw += *line_i;
            } else {
-            throw sequence_invalid_load_error("Unrecognized characters in fasta sequence");
+            std::ostringstream msg;
+            msg << "Unrecognized characters in fasta sequence at line ";
+            msg << line_counter;
+            throw sequence_invalid_load_error(msg.str());
            }
          }
       }
index a3c63b6d87d8c4710a4cb76777a0e9b6775b711f..26b5be981ec1078a001f3eb01361fc8a22ba1efa 100644 (file)
@@ -178,6 +178,27 @@ BOOST_AUTO_TEST_CASE( sequence_load )
                                     "5' flank");
 }
 
+BOOST_AUTO_TEST_CASE( sequence_load_error )
+{
+  fs::path seq_path(fs::path(EXAMPLE_DIR, fs::native)/"seq");
+  seq_path /= "broken.fa";
+  bool exception_thrown = false;
+  std::string exception_filename;
+  Sequence s;
+  try {
+    s.load_fasta(seq_path);
+  } catch(sequence_invalid_load_error e) {
+    exception_thrown = true;
+    size_t native_string_size = seq_path.native_file_string().size();
+    std:string estr(e.what());
+    BOOST_REQUIRE(estr.size() > native_string_size);
+    std::copy(estr.begin(), estr.begin()+native_string_size,
+              std::back_inserter(exception_filename));
+  }
+  BOOST_CHECK_EQUAL(exception_thrown, true);
+  BOOST_CHECK_EQUAL(seq_path.native_file_string(), exception_filename);
+}
+
 BOOST_AUTO_TEST_CASE( sequence_load_dna_reduced )
 {
   std::string reduced_dna_fasta_string(">foo\nAAGGCCTTNN\n");
diff --git a/examples/seq/broken.fa b/examples/seq/broken.fa
new file mode 100644 (file)
index 0000000..d26ccea
--- /dev/null
@@ -0,0 +1,3 @@
+>This fasta file is invalid
+AAAGGGCCTTT*
+