Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
read_dimacs_graph.hpp
1 /*
2  Reads coloring problem in DIMACS format.
3  This works, but could use some polishing.
4 */
5 
6 /* ----------------------------------------------------------------- */
7 
8 #ifndef BOOST_GRAPH_READ_DIMACS_GRAPH_HPP
9 #define BOOST_GRAPH_READ_DIMACS_GRAPH_HPP
10 
11 #include <vector>
12 #include <iostream>
13 #include <cstdio>
14 #include <cstring>
15 #include <cstdlib>
16 
17 
18 #include <boost/graph/graph_traits.hpp>
19 
20 #include "utils/exceptions.hpp"
21 #include "utils/instance.hpp"
22 
23 namespace scaffold{ namespace io {
24 
25  // read the graph g from the instream "in"
26  Instance* read_dimacs_graph(std::istream& in, const unsigned num_paths, const unsigned num_cycles, const unsigned num_objects){
27  const uint P_FIELDS = 2; // number of fields expected in a "p" line
28  const uint E_FIELDS = 2; // number of fields expected in an "e" line
29  unsigned no_lines = 0; // number of lines read
30  unsigned no_elines = 0; // number of lines defining edges
31  unsigned num_vertices, num_edges; // number of vertices & edges in the graph
32  std::string in_line; // buffer for the next line
33  Instance* I = new Instance(num_paths, num_cycles, num_objects);
34 
35  std::vector<ScafVertex> verts;
36 
37  try{
38  while(std::getline(in, in_line)) {
39  ++no_lines;
40  switch (in_line[0]) {
41  case 'c': /* skip lines with comments */
42  case '\n': /* skip empty lines */
43  case '\0': /* skip empty lines at the end of file */
44  break;
45  case 'p': /* contains "edge NUMBER_NODES NUMBER_EDGES */
46  if(std::sscanf(in_line.c_str(), "%*c %*s %u %u", &num_vertices, &num_edges) != P_FIELDS)
47  throw except::bad_syntax(no_lines, "unexpected p-line format");
48  verts.reserve(num_vertices);
49  for(unsigned vi = 0; vi < num_vertices; ++vi) verts.push_back(I->add_vertex(std::to_string(vi), vi));
50  break;
51  case 'e': /* edge description */
52  unsigned u,v; // store the indices of the incident vertices
53  if(no_elines >= num_edges) throw except::bad_syntax(no_lines,"too many edges specified or no p line found");
54  if(std::sscanf(in_line.c_str(),"%*c %u %u", &u, &v) != E_FIELDS)
55  throw except::bad_syntax(no_lines, "unexpected e-line format");
56  --u; // index from 0, not 1
57  --v;
58  if((u < num_vertices) && (v < num_vertices)){
59  ScafEdge e;
60  bool success;
61  boost::tie(e, success) = I->add_edge(verts[u], verts[v]);
62  assert(success);
63  } else throw except::bad_syntax(no_lines, "vertex index out of bounds");
64  ++no_elines;
65  break;
66  default: /* unknown type of line */
67  throw except::bad_syntax(no_lines, "unrecognized line type");
68  } // switch
69  } // while
70  if ( in.eof() == 0 ) throw except::bad_syntax(no_lines, "read error");
71  if ( no_lines == 0 ) throw except::read_error(no_lines, "input stream is empty");
72  if ( no_elines < num_edges ) throw except::bad_syntax(no_lines, "did not read enough edges");
73  } catch(except::bad_syntax ex){
74  std::cout << "Syntax error in line "<<ex.line_no<<": "<<ex.what()<<std::endl;
75  delete I;
76  return NULL;
77  } catch(except::read_error ex){
78  std::cout << "Error reading after "<<ex.line_no<<" lines: "<<ex.what()<<std::endl;
79  delete I;
80  return NULL;
81  }
82 #warning TODO: support insert size and other information in DIMACS format
83  /* Thank God! all is done */
84  return I;
85  } // function
86 }} // namespace
87 
88 #endif
an exception for the case that a given file does not conform to expected syntax
Definition: exceptions.hpp:56
an exception for the case that a given file could not be read on a low level
Definition: exceptions.hpp:46
Definition: read_adj_list.hpp:22
Definition: read_phytree.hpp:10
const unsigned line_no
store the line number on which the exception ocurred
Definition: exceptions.hpp:47