Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
trees.hpp
1 
2 #ifndef TREES_HPP
3 #define TREES_HPP
4 
5 #include <queue>
6 #include "utils/dp_solution.hpp"
7 
8 using namespace boost;
9 
10 namespace scaffold{ namespace solv { namespace trees {
11 
12 
13 
14  template<class Graph>
15  Edge<Graph>* get_edge_to_first_unvisited_neighbor(const Vertex<Graph>& u, const unordered_set<Vertex<Graph> >& visited, const Graph& g)
16  {
17  for(OEdgeIterRange<Graph> r = out_edges(u, g); r.first != r.second; ++r.first)
18  if(!contains(visited, target(*r.first, g)))
19  return new Edge<Graph>(*r.first);
20  return NULL;
21  }
22 
23 
24 
25  template<class Compare = std::less<size_t> >
26  struct TreeDPinfos {
27  // the table is indexed by \alpha and p
28  typedef PathsAndCycles IndexType;
30  // the DP values for 0/1 edges incident with v in S
31  entries_t *table;
32  // a reminder of whether the matching edge has already been seen
33  size_t childs_done;
34  const size_t max_paths;
35  bool seen_matching;
36  Compare is_better;
37 
38  TreeDPinfos(const size_t _max_paths): table(new entries_t(1, _max_paths)), childs_done(0), max_paths(_max_paths), seen_matching(false) {
39  table->put_empty(IndexType(0,0));
40  }
41  ~TreeDPinfos(){ delete table; }
42 
43  // combine our entry with the one of our child as described in the paper
44  void combine_with_child(const TreeDPinfos& child, const bool is_matching_edge, const EdgeName& uv, const size_t uv_weight){
45  entries_t *result = new entries_t(table->max);
46  for(auto E_i = table->get_entries(); E_i.first != E_i.second; ++E_i.first){
47  const auto& E = *E_i.first;
48  const IndexType ap = E.first;
49  for(auto cE_i = child.table->get_entries(); cE_i.first != cE_i.second; ++cE_i.first){
50  const auto& cE = *cE_i.first;
51  const IndexType cap = cE.first;
52  DEBUG5(std::cout << "combining index ("<<(int)ap.c<<","<<ap.p<<") with ("<<(int)cap.c<<","<<cap.p<<") and the matching flag is "<<is_matching_edge<<" weight: "<<uv_weight<<std::endl);
53  // if the subtree of u has 0 paths, then it cannot have an edge incident to u; likewise for the subtree s_v[j]
54  assert(ap.c <= ap.p);
55  assert(cap.c <= cap.p);
56 
57  if(is_matching_edge){
58  // uv in M
59  seen_matching = true;
60  const size_t sum_p = 1 + (ap.p - ap.c) + (cap.p - cap.c);
61  if(sum_p <= max_paths) result->put(IndexType(sum_p, ap.c), *E.second, *cE.second);
62  } else {
63  // uv is not in M
64  // first, we don't delete uv: then, ap.c and cap.c have to be 0
65  if((ap.c == 0) && (cap.c == 0)){
66  // combine [..] + [0,...]
67  const size_t sum_p = ap.p + cap.p - (seen_matching ? 1 : 0);
68  if(sum_p <= max_paths) {
69  MatrixEntry* ME = new MatrixEntry(*E.second);
70  *ME += *cE.second;
71  // if we're looking for a MAXIMUM solution, then add the non-deleted edge
72  if(is_better(1,0)){
73  ME->append_edge(uv);
74  ME->weight += uv_weight;
75  }
76  if(!result->put(IndexType(sum_p, 1), ME)) delete ME;
77  }
78  }
79  // second, we delete uv
80  const size_t sum_p = ap.p + cap.p;
81  if(sum_p <= max_paths){
82  MatrixEntry* ME = new MatrixEntry(*E.second);
83  *ME += *cE.second;
84  // if we're looking for a MINIMUM solution, then add the deleted edge
85  if(is_better(0,1)){
86  ME->append_edge(uv);
87  ME->weight += uv_weight;
88  }
89  if(!result->put(IndexType(sum_p, ap.c), ME)) delete ME;
90  }// if
91  }// else
92  } // for
93  }// for
94  delete table;
95  table = result;
96  }// function
97 
98  // << operator for solutions outputting a table of weights
99  friend std::ostream& operator<<(std::ostream& os, const TreeDPinfos& infos){
100  // write table
101  for(size_t p = 0; p <= infos.max_paths; ++p){
102  os << p<<":\t";
103  try{ os << infos.table->at(IndexType(p,0)).weight << "\t"; } catch(...) { os << "\t"; };
104  try{ os << infos.table->at(IndexType(p,1)).weight << "\t"; } catch(...) { os << "\t"; };
105  os << std::endl;
106  } // for
107  return os;
108  } // function
109 
110  };
111 
112 
113  // get a queue of leaves in I and init their values in the DPmap
114  template<class Graph, class Compare = std::less<size_t> >
115  void init_leaves(const Instance<Graph>& I, std::queue<Vertex<Graph> >& leaves, unordered_map<Vertex<Graph>, TreeDPinfos<Compare> >& DPmap){
116  DEBUG4(std::cout << "initializing leaves"<<std::endl);
117  for(VertexIterRange<Graph> r = vertices(*I.g); r.first != r.second; ++r.first)
118  if(degree(*r.first, *I.g) == 1){
119  // construct the entry for *r.first initializing the leaf data in the TreeDPinfos
120  DPmap.emplace(std::piecewise_construct, std::make_tuple(*r.first), std::make_tuple(I.num_paths));
121  leaves.push(*r.first);
122  }//if
123  DEBUG4(std::cout << "initialized "<<leaves.size()<<" leaves"<<std::endl);
124  }//function
125 
126  // solve an instance of scaffolding on a tree
127  template<class Graph, class Compare = std::less<size_t> >
128  void solve_on_tree(Instance<Graph>& I, DynProgSolution<Compare>* const S){
129  assert(I.is_acyclic());
130 
131  unordered_map<Vertex<Graph>, TreeDPinfos<Compare> > DPmap;
132  unordered_set<Vertex<Graph> > visited;
133 
134  // Step 1: get the leaves
135  std::queue<Vertex<Graph> > leaves;
136  init_leaves(I, leaves, DPmap);
137 
138  // Step 2: for each leaf, update the infos upwards
139  Vertex<Graph> u;
140  typename unordered_map<Vertex<Graph>, TreeDPinfos<Compare> >::const_iterator u_infos;
141  while(!leaves.empty()){
142  DEBUG4(std::cout << "queue has "<<leaves.size()<<" items, next one is "<<VertexAndGraph<Graph>(leaves.front(), *I.g)<<std::endl);
143  u = leaves.front(); leaves.pop();
144  // get the DP infos
145  u_infos = DPmap.find(u);
146  assert(u_infos != DPmap.end());
147  DEBUG4(std::cout << "with DP entry: "<<std::endl<<u_infos->second<<std::endl);
148  // u should have a single neighbor that is unvisited
149  Edge<Graph>* to_parent = get_edge_to_first_unvisited_neighbor(u, visited, *I.g);
150  // if u does not have a parent, then u is the root, so break here
151  if(!to_parent) break;
152  const Vertex<Graph> parent = target(*to_parent, *I.g);
153  // get the parents DP infos, constructing them if necessary
154  // update the parent's table
155  const bool is_matching_edge = (I.matched.at(u) == parent);
156  const EdgeName uv_name = std::make_pair(get(vertex_name, *I.g, u), get(vertex_name, *I.g, parent));
157  const size_t uv_weight = get(edge_weight, *I.g, *to_parent);
158  // if the parent does not yet have a DP entry, create an empty one
159  TreeDPinfos<Compare>& p_infos = DPmap.emplace(parent, I.num_paths).first->second;
160 
161  p_infos.combine_with_child(u_infos->second, is_matching_edge, uv_name, uv_weight);
162  p_infos.childs_done++;
163  // if all children of the parent have been considered, then add the parent to the queue
164  if(p_infos.childs_done == degree(parent, *I.g) - 1) leaves.push(parent);
165  // finally, forget the infos of u & mark u visited
166  visited.insert(u);
167  DPmap.erase(u_infos);
168  delete to_parent;
169  }// while
170  // now, u is the root, so construct a solution from it
171  for(auto E = u_infos->second.table->get_entries(); E.first != E.second; ++E.first)
172  S->put(PathsAndCycles(E.first->first.p,0), E.first->second);
173  }// function
174 
175 
176 
177 
178 }}}// namespace
179 
180 
181 #endif
182 
Definition: graph_utils.hpp:18
Definition: min_matrix.hpp:191
Definition: read_adj_list.hpp:22
Definition: trees.hpp:26
unsigned num_paths
max number of paths in solutions
Definition: instance.hpp:32
bool contains(const Set &s, const Element &el)
a more readable containment check
Definition: utils.hpp:171
Definition: min_matrix.hpp:36
Definition: graph_typedefs.hpp:26