Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
read_fasta.hpp
1 
2 #ifndef READ_FASTA_HPP
3 #define READ_FASTA_HPP
4 
5 #include <iostream>
6 #include <fstream>
7 #include <string>
8 
9 #include "utils/exceptions.hpp"
10 #include "utils/scaffolding_typedefs.hpp"
11 
12 
13 namespace scaffold{ namespace io {
14 
15  // read a fasta file into an associative name_to_seq mapping a contig name to its sequence
16  void read_fasta_file(std::istream& input, SequenceMap& name_to_seq, const std::string& acceptable_bases = BASES)
17  {
18  SequenceMap::iterator current_entry;
19  bool success;
20  unsigned line_no = 0;
21  std::string line; // input buffer
22 
23  name_to_seq.clear();
24  while(std::getline(input, line).good()){
25  ++line_no;
26  if(!line.empty()){
27  if(line[0] == '>'){
28  // if the line starts with '>' it's a contig name
29  // step 1: remove leading and trailing whitespaces of the contig name
30  std::string trimmed_line(trim(line.erase(0, 1)));
31  if(trimmed_line.empty()) throw except::bad_syntax(line_no, "empty contig name");
32  // step 2: insert into the unordered_map
33  boost::tie(current_entry, success) = name_to_seq.DEEP_EMPLACE(trim(line), "");
34  if(!success) throw except::bad_syntax(line_no, (std::string)"repeated contig name: " + line);
35  } else {
36  // if the line is not empty and does not start with '>', then it's part of the contig sequence
37  if(current_entry == name_to_seq.end()) throw except::bad_syntax(line_no, "missing contig name");
38  // sanity check
39  if(line.find_first_not_of(acceptable_bases) != std::string::npos)
40  throw except::bad_syntax(line_no, (std::string)"contains a base that's not in " + acceptable_bases);
41  // append the line to the current sequence
42  current_entry->second.sequence += line;
43  }// if
44  }// if line not empty
45  }// while file contains data
46  }// function
47 }}// namespace
48 
49 #endif
an exception for the case that a given file does not conform to expected syntax
Definition: exceptions.hpp:56
Definition: read_adj_list.hpp:22
Definition: read_phytree.hpp:10
std::string trim(const std::string &str, const std::string &to_remove=WHITESPACES)
remove leading & trailing chars (whitespaces by default) from str
Definition: string_utils.hpp:41