Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
dp_solution.hpp
1 
2 #ifndef DP_SOLUTION_HPP
3 #define DP_SOLUTION_HPP
4 
5 #include <vector>
6 #include <list>
7 #include <iostream>
8 
9 #include "utils/graph_typedefs.hpp"
10 #include "utils/min_matrix.hpp"
11 
12 namespace scaffold{ namespace solv{ namespace DP{
13 
14 
15  // a solution is a matrix of weights: S[x,y] = minimum weight of a set X of edges such that G - X is a collection of x paths & y cycles
16  // each entry S[x,y] also has a list of pairs of vertex names attached to it, which indicates which edges to delete to get to the solution
17  // on top, there is a global modifier for weights and edge lists that applies to all entries
18  template<class Compare = std::less<size_t> >
19  class DynProgSolution : public PCMinMatrix<Compare> {
20  protected:
21  // global weight & edges
22  MatrixEntry global;
23  public:
24 
25  // copy
27  DynProgSolution(const DynProgSolution& S): PCMinMatrix<Compare>(S), global(S.global) {}
28  // inherit constructors of PCMinMatrix
35  typedef typename PCMinMatrix<Compare>::entries_t entries_t;
36 
37  // return whether there are no solutions stored in the object
38  bool no_solutions() const
39  {
40  return entries->empty();
41  }
42  // invalidate all solutions stored in the object
43  void invalidate()
44  {
45  entries->clear();
46  }
47 
48  // combine two solutions to a solution to the disjoint union of the graphs
49  // NOTE: max_paths and max_cycles remain unchanged. Therefor, this operation is NOT SYMMETRIC
50  void combine_disjoint_union(const DynProgSolution& S)
51  {
52  // combine all entries of S with all our entries and store the minima in a new matrix
53  entries_t* old_entries = entries;
54  entries = new entries_t();
55  for(const auto& E : *old_entries){
56  const PathsAndCycles pc = E.first;
57  for(const auto& SE : *S.entries) put(pc + SE.first, *E.second, *SE.second);
58  }// for
59  // replace our matrix with the result
60  delete old_entries;
61  // finally, merge global modifiers
62  global += S.global;
63  } // function
64 
65  // combine two concurrent solutions by taking the min of the two for each entry.
66  // NOTE: this requires copying the global solution into each of the local solutions since the globals are likely not shared between the two solutions
67  void combine_min_of_two(const DynProgSolution& S){
68  DEBUG4(std::cout << "combining solutions"<<std::endl);
69  // Step 1: merge global modifier into entries if necessary
70  if(global.weight || !global.has_no_edges()){
71  for(auto& E : *entries){
72  const PathsAndCycles pc = E.first;
73  const typename entries_t::const_iterator SE = S.entries->find(pc);
74  // if S has an entry for pc and its better than our own, then replace our own by it, otherwise add global modifier to E
75  if((SE == S.entries->end()) || !put(pc, *SE->second, S.global)) *E.second += global;
76  }// for
77  // Step 2: clear global modifier
78  global.clear();
79  }
80  // Step 2: go through S and update our entries with theirs if necessary
81  for(const auto& SE : *(S.entries)) put(SE.first, *SE.second, S.global);
82  }// function
83 
84 
85  // append the global and all local solutions of S onto the global solution of us, clearing S' entries in the process
86  void destructive_append(DynProgSolution& S){
87  assert(entries->size() == 0);
88  // Step 1: kill our (empty) entries
89  delete entries;
90  // Step 2: add global modifier of S
91  global += S.global;
92  // Step 3: make the local entries of S our own & update best_index
93  entries = S.entries;
94  best_index = S.best_index;
95  // Step 4: give S new entries
96  S.entries = new entries_t();
97  }
98 
99  // add edge to solution
100  void add_edge_global(const VertexName& u, const VertexName& v, const size_t weight){
101  DEBUG4(std::cout << "globally adding ("<<u<<","<<v<<") to solution"<<std::endl);
102  global.append_edge(EdgeName(u,v), weight);
103  }
104 
105 
106  // << operator for solutions outputting a table of weights
107  friend std::ostream& operator<<(std::ostream& os, const DynProgSolution<Compare>& S){
108  return os << "globally +"<<S.global.weight<<std::endl << (PCMinMatrix<Compare>)(S);
109  } // function
110 
111 
112  };
113 
114  // given an instance that is connected and deg-2, add it as path solution or add the cycle and the cheapest subpath as solutions
115  template<class Compare = std::less<unsigned> >
116  void treat_deg_two(Instance& I, DynProgSolution<Compare>* const S, const Compare& is_better = Compare())
117  {
118  // get the number of paths and cycles in I.g
119  const RawScaffoldGraph& g = I.get_graph();
120  const unsigned n = boost::num_vertices(g);
121  const unsigned m = boost::num_edges(g);
122  const bool is_cycle = (n == m);
123  DEBUG4(std::cout << "instance has "<<n<<" vertices, "<<m<<" edges. Requested: "<<I.num_paths<<" & "<<I.num_cycles<<std::endl);
124  // if it's a cycle, add the cycle solution & find the cheapest subpath to also add
125  if(is_cycle){
126  if(I.num_cycles) S->put_all(PathsAndCycles(0,1), g);
127  if(I.num_paths){
128  // delete an edge of the cycle to turn it into a path
129  const std::pair<ScafVertexPair, unsigned> nb = I.get_infos().bridges.get_cheapest_nonbridge();
130  const EdgeName nb_name(g[nb.first.first].name, g[nb.first.second].name);
131 
132  if(is_better(0,1)){
133  // if we are looking for a MINIMUM solution, then put the cheapest nonbridge & its weight at (1,0)
134  S->put(PathsAndCycles(1,0), nb_name, nb.second);
135  } else {
136  // if we are looking for a MAXIMUM solution, then put everything but the non-bridge
137  MatrixEntry* ME = new MatrixEntry(0);
139  if(!is_equal(nb.first, *r, g))
140  ME->append_edge(I.get_edge_name(*r), nb.second);
141  if(!S->put(PathsAndCycles(1,0), ME)) delete ME;
142  }
143  }// if
144  } if(I.num_paths) S->put_all(PathsAndCycles(1,0), g);
145  }// function
146 
147 
148 }}} // namespace
149 
150 
151 #endif
Definition: min_matrix.hpp:191
Definition: read_adj_list.hpp:22
unsigned num_cycles
max number of cycles in solutions
Definition: instance.hpp:33
an instance is a ScaffoldGraph with given path- & cycle- numbers and a solution to keep track of dele...
Definition: instance.hpp:27
an edge iterator that skips over all edges for which the predicate evaluates to false ...
Definition: adv_edge_iters.hpp:19
Definition: dp_solution.hpp:19
const Graph & get_graph() const
get a const reference to the underlying graph
Definition: scaffold_graph.hpp:227
EdgeName get_edge_name(const VertexPair< Graph > &uv) const
get a copy of the name of the edge uv; independent of its presence
Definition: scaffold_graph.hpp:421
unsigned num_paths
max number of paths in solutions
Definition: instance.hpp:32
Definition: min_matrix.hpp:36
const GraphInfos< Graph > & get_infos() const
get a const reference to the graph infos
Definition: scaffold_graph.hpp:221
Definition: graph_typedefs.hpp:26
Predicated_EdgeIter< Graph, Predicate, EdgeIter< Graph > > get_edges(const Predicate &pred=Predicate()) const
get all edges satisfying a predicate
Definition: scaffold_graph.hpp:468