Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
read_adj_list.hpp
1 /*
2  Reads graphs as list of adjacency lists ("&" is comment) format.
3  This works, but could use some polishing.
4 */
5 
6 /* ----------------------------------------------------------------- */
7 
8 #ifndef BOOST_GRAPH_READ_ADJ_LIST_HPP
9 #define BOOST_GRAPH_READ_ADJ_LIST_HPP
10 
11 #include <vector>
12 #include <iostream>
13 #include <cstdio>
14 #include <cstring>
15 #include <cstdlib>
16 
17 #include "utils/string_utils.hpp"
18 #include "utils/exceptions.hpp"
19 #include "utils/string_utils.hpp"
20 #include "utils/instance.hpp"
21 
22 namespace scaffold { namespace io {
23 
24  // read the graph g from the instream "in"
25  Instance* read_adj_list(std::istream& in, const unsigned num_paths, const unsigned num_cycles, const unsigned num_objects){
26  unsigned num_vertices = 0;
27  unsigned no_lines = 0;
28  std::string in_line; // buffer for the next line
29  Instance* I = new Instance(num_paths, num_cycles, num_objects);
30 
31  std::vector<ScafVertex> verts;
32 
33  try{
34  while(std::getline(in, in_line)) {
35  ++no_lines;
36 
37  if(num_vertices == 0){
38  // read number of vertices
39  if(std::sscanf(in_line.c_str(), "%u", &num_vertices) != 1) throw except::bad_syntax(no_lines, "failed to read #vertices");
40  verts.reserve(num_vertices);
41  for(unsigned vi = 0; vi < num_vertices; ++vi) verts.push_back(I->add_vertex(std::to_string(vi), vi));
42  // if v is even, then add a matching edge to v+1
43  for(unsigned vi = 0; vi < num_vertices; vi+=2)
44  I->add_matching_edge(verts[vi], verts[vi+1]);
45  } else {
46  switch (in_line[0]) {
47  case '&': /* skip lines with comments */
48  case '\n': /* skip empty lines */
49  case '\0': /* skip empty lines at the end of file */
50  break;
51  default: // format: <vertex id>[ \t]*<space separated adj list of <vertex>,<weight>>
52  try{
53  unsigned u, weight;
54  const unsigned v = read_single_number(in_line);
55  while(!in_line.empty()) {
56  // skip all delimeters
57  skip_to(in_line, "0123456789");
58  // if we read everything we could, we're good for the next line, otherwise, skip to the next non-delimeter
59  if(in_line.empty()) break;
60  // read the next vertex
61  u = read_single_number(in_line);
62  // check vertex bounds
63  if(u < num_vertices){
64  // if next is a comma, read the edge weight
65  if(in_line[0] == ','){
66  // skip to the next weight
67  skip_to(in_line, "0123456789");
68  if(in_line.empty()) throw except::bad_syntax(no_lines, "indicated weight not found");
69  // read the weight
70  weight = read_single_number(in_line);
71  } else weight = 0; // per default the weight of an edge is 0
72  } else throw except::bad_syntax(no_lines, "vertex number out of bounds");
73  I->add_edge(verts[u], verts[v], ScafEdgeProperty(weight, 0, 1, ""));
74  } // while
75  } catch(std::invalid_argument ex){
76  // first characted has to be a digit to read a vertex
77  throw except::bad_syntax(no_lines, "unexpected format, expected vertex");
78  }
79  } // switch
80  } // if
81  } // while
82  if ( in.eof() == 0 ) throw except::bad_syntax(no_lines, "expected EOF");
83  if ( no_lines == 0 ) throw except::read_error(no_lines, "input stream is empty");
84  } catch(except::bad_syntax ex){
85  std::cout << "Syntax error in line "<<ex.line_no<<": "<<ex.what()<<std::endl;
86  delete I;
87  return NULL;
88  } catch(except::read_error ex){
89  std::cout << "Error reading after "<<ex.line_no<<" lines: "<<ex.what()<<std::endl;
90  delete I;
91  return NULL;
92  }
93 #warning TODO: support insert size and other information in DIMACS format
94  /* Thank God! all is done */
95  return I;
96  } // function
97 }} // namespace
98 
99 #endif
an exception for the case that a given file does not conform to expected syntax
Definition: exceptions.hpp:56
an exception for the case that a given file could not be read on a low level
Definition: exceptions.hpp:46
Definition: read_adj_list.hpp:22
Definition: read_phytree.hpp:10
long read_single_number(std::string &s)
consume an integer from the beginning of s and return it
Definition: string_utils.hpp:31
void skip_to(std::string &s, const std::string &skip_to)
remove all characters not in 'skip_to' from the beginning of s
Definition: string_utils.hpp:17
const unsigned line_no
store the line number on which the exception ocurred
Definition: exceptions.hpp:47