Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
tw_dp.hpp
1 
2 #ifndef TW_DP_HPP
3 #define TW_DP_HPP
4 
5 #include <stack>
6 #include <vector>
7 #include <queue>
8 #include <boost/graph/subgraph.hpp>
9 
10 #include "TOL/GreedyFillIn.h"
11 #include "TOL/TreeDecomposition.h"
12 
13 #include "utils/utils.hpp"
14 #include "utils/graph_typedefs.hpp"
15 #include "utils/solution.hpp"
16 #include "utils/instance.hpp"
17 #include "utils/convert.hpp"
18 #include "utils/td_subgraph.hpp"
19 #include "utils/scaffolding_utils.hpp"
20 
21 
22 #include "preprocess/pp_pendant_match.hpp"
23 #include "solv/dp_table.hpp"
24 #include "solv/trees.hpp"
25 
26 using namespace boost;
27 
28 namespace scaffold{ namespace solv { namespace DP{
29 
30  using scaffold::operator<<;
31 
32  // get a TOL tree decomposition of *I.g
33  template<class Graph>
34  TOL::TreeDecomposition* get_tree_decomp(const Graph& g, TOL::VTranslateBoostTOL<Graph>* vtranslate = NULL)
35  {
36  // step 1: prepare the TOL graph & the elimination ordering
37  TOL::Graph TOLg(0, num_vertices(g));
38  convert(g, TOLg, vtranslate);
39  // Step 2: run the greedy fill-in
40  TOL::EliminationOrdering eo(TOLg);
41  TOL::GreedyFillIn gfi(eo);
42  gfi.run(TOLg);
43  // Step 3: return the result
44  return new TOL::TreeDecomposition(TOLg, eo);
45  }
46 
47 
48 
49 
50  // process an edge of the tree decomposition
51  template<class Graph>
52  void DP_treat_edge(DP_Table<Graph>& child_table, DP_Table<Graph>& parent_table, EdgeSet<Graph>& already_introduced, const Graph& g)
53  {
54  DEBUG2(std::cout << "treating edge from child with bag ";
55  for(auto& i: child_table.index->left) std::cout << VertexAndGraph<Graph>(i.first, g)<<" ";
56  std::cout << " to parent with bag ";
57  for(auto& i: parent_table.index->left) std::cout << VertexAndGraph<Graph>(i.first, g)<<" ";
58  std::cout << std::endl);
59  // Step 0: get all vertices that will be forgotten (i.e. all vertices in child that are not in parent)
60  VertexSet<Graph> to_forget;
61  for(auto& vi: child_table.index->left){
62  const Vertex<Graph> v = vi.first;
63  const auto& parent_vset = parent_table.index->left;
64  if(!contains(parent_vset, v)) to_forget.emplace(v);
65  }
66 
67  // Step 1: introduce all edges incident to vertices that will be forgotten
68  for(const Vertex<Graph>& v : to_forget){
69  DEBUG3(std::cout << "introducing edges incident with "<<VertexAndGraph<Graph>(v, g)<<":"<<std::endl);
70  for(OEdgeIterRange<Graph> r = out_edges(v, g); r.first != r.second; ++r.first)
71  // introduce the edge only if it is not already introduced and if the target is also in the bag
72  if(!contains(already_introduced, *r.first) && (contains(child_table.index->left, target(*r.first, g)))){
73  child_table.intro_edge(*r.first);
74  already_introduced.insert(*r.first);
75  already_introduced.insert(edge(target(*r.first, g), source(*r.first, g), g).first);
76  }
77  }
78 
79  // Step 2: forget all vertices in child that are not in parent
80  child_table.forget_vertices(to_forget);
81 
82  // Step 3: introduce all vertices in parent that are not in child
83  VertexSet<Graph> to_introduce;
84  for(auto& vi: parent_table.index->left){
85  const Vertex<Graph> v = vi.first;
86  const auto& child_vset = child_table.index->left;
87  if(!contains(child_vset, v)) to_introduce.emplace(v);
88  }
89  child_table.intro_vertices(to_introduce);
90 
91  // Step 4: merge with the parent
92  DEBUG3(std::cout << "joining tables..."<<std::endl);
93  parent_table.join(child_table);
94 
95  DEBUG5(std::cout << "final table of parent:"<<std::endl<<parent_table<<std::endl);
96 
97  DEBUG2(
98  size_t size = parent_table.size();
99  if(size > 1000000) std::cout << "table size: "<<parent_table.size()<<" entries"<<std::endl;
100  )
101  }
102 
103 
104  // an instance comparator based on number of vertices, by default, smaller instances are considered "<"
105  template<class Graph, class Compare = std::less<size_t> >
107  {
108  const Compare is_better;
109  bool operator()(const Instance<Graph>* I1, const Instance<Graph>* I2)
110  {
111  return is_better(num_vertices(*I1->g), num_vertices(*I2->g));
112  }
113  };
114 
115  // solv an instance of scaffolding using the dynamic programming on tree decompositions
116  template<class Graph>
117  void TW_solv(Instance<Graph>& I, DynProgSolution<std::greater<size_t> >& S){
118  // Step 0: apply preprocessing
119  DEBUG3(std::cout << "running preprocessing..."<< std::endl);
120  preprocess::pp_matching_pendant(I);
121  DEBUG3(std::cout << "after preprocessing: "<<boost::num_vertices(*I.g)<<" vertices & "<<boost::num_edges(*I.g)<<" edges"<<std::endl);
122  // Step 1: run for each component separately
123  std::priority_queue<Instance<Graph>*, std::vector<Instance<Graph>*>, Compare_Instance_Size<Graph, std::greater<size_t> > > components;
124  while(true){
125  Instance<Graph> *J = I.split_off_instance();
126  if(J) components.push(J); else break;
127  }
128  DEBUG3(std::cout << "split off "<<components.size()<< " connected components for independent solving"<< std::endl);
129  if(!components.empty()){
130  // add I as final compoenent
131  components.push(&I);
132  DEBUG2(std::cout << "instance has "<<components.size()<<" components"<<std::endl);
133  // prepare S for merging the component solutions
134  S.put_empty(PathsAndCycles(0,0));
135  // keep track of how many paths any cycles got used in previous components
136  PathsAndCycles used_up(0,0);
137  // treat components in ascending order of size
138  while(!components.empty()){
139  Instance<Graph>* J = components.top(); components.pop();
140  J->num_paths -= used_up.p;
141  J->num_cycles -= used_up.c;
142 
143  DEBUG3(std::cout << "next component has "<<num_vertices(*J->g)<<" vertices & we're looking for "<<J->num_paths<<" paths & "<<J->num_cycles<<" cycles"<<std::endl);
144 
145  DynProgSolution<std::greater<size_t> > solJ(*J);
146  TW_solv(*J, solJ);
147 
148  if(J != &I) delete J;
149 
150  if(!solJ.no_solutions()){
151  DEBUG4(std::cout << " joining solution" << solJ<<std::endl);
152  S.combine_disjoint_union(solJ);
153  // update the number of paths and cycles used
154  used_up = S.smallest_index();
155  DEBUG4(std::cout << " S now has "<<S.size()<<" entries, using up "<<used_up<<" paths & cycles "<<std::endl);
156  } else {
157  // if either component does not have a solution, return failure
158  S.invalidate();
159  return;
160  }
161  }
162  return;
163  }
164  // solve base cases
165  if(I.max_deg_two()) {
166  DEBUG3(std::cout << "it's max-deg 2"<<std::endl);
167  treat_deg_two(I, &S);
168  return;
169  }
170  if(I.is_acyclic()){
171  DEBUG3(std::cout << "it's a tree"<<std::endl);
172  solve_on_tree(I, &S);
173  return;
174  }
175 
176 
177  // Step 2: get a tree decomposition
178  TOL::VTranslateBoostTOL<Graph>* vtranslate = new TOL::VTranslateBoostTOL<Graph>();
179  const TOL::TreeDecomposition *TD = get_tree_decomp(*I.g, vtranslate);
180  const TOL::Tree t(TD->tree());
181  DEBUG2(std::cout << "using tree decomposition of width "<<TD->width()<<std::endl);
182  // Step 3: setup data structures
183  // Step 3b: stack of child iterators, together with the table pointer for this node
184  std::stack<std::pair<std::pair<TOL::Tree::const_child_iterator, TOL::Tree::const_child_iterator>, DP_Table<Graph>*> > child_stack;
185  // Step 3c: note who has been visited
186  unordered_set<TOL::Tree::Node> visited;
187  // Step 3d: note set of introduced edges
188  EdgeSet<Graph> introduced_edges(default_buckets, *I.g);
189 
190  // make sure the root has children (if not, the graph is a clique)
191  const TOL::Tree::Node r = t.root();
192  assert(!t.is_leaf(r));
193 
194  // Step 5: go through the queue in-order to minimize the # of tables we have to keep in memory at the same time
195  child_stack.emplace(std::make_pair(t.all_children(r), new DP_Table<Graph>(I, TD->subset(r), vtranslate)));
196  visited.insert(r);
197  do{
198  const TOL::Tree::Node& u = *(child_stack.top().first.first);
199 
200  // if we haven't seen u before, then push the children on the stack and dive into the subtree
201  if(!contains(visited, u)){
202  child_stack.emplace(make_pair(t.all_children(u), new DP_Table<Graph>(I, TD->subset(u), vtranslate)));
203  // if u is a leaf then initialize its DP table as leaf table
204  if(t.is_leaf(u)) child_stack.top().second->init_as_leaf();
205 
206  visited.insert(u);
207  DEBUG1(
208  std::cout << "adding to stack: "<<u<<" with VSet:\t";
209  std::list<Vertex<Graph> > l;
210  child_stack.top().second->get_VSet(l);
211  std::cout << VertexListAndGraph<Graph>(l, *I.g)<<"\t\t children: ";
212  for(auto ra = child_stack.top().first.first; ra != child_stack.top().first.second; ++ra) std::cout << *ra<< " ";
213  std::cout << std::endl;
214  );
215 
216  } else ++child_stack.top().first.first; // otherwise, we came back from a dive into the subtree, so prepare to dive to the next child
217 
218  // if all children have been considered, merge our table with the parent table
219  std::pair<TOL::Tree::const_child_iterator, TOL::Tree::const_child_iterator>& childs = child_stack.top().first;
220  if(childs.first == childs.second){
221  // if there is only the root entry on the stack, then break the while-loop
222  if(child_stack.size() > 1){
223  DP_Table<Graph>* child_table = child_stack.top().second;
224  child_stack.pop();
225  DP_Table<Graph>* parent_table = child_stack.top().second;
226  // combine the DP tables
227  DP_treat_edge(*child_table, *parent_table, introduced_edges, *I.g);
228  // afterwards, delete the child's table since we won't need it anymore
229  delete child_table;
230  // NOTE: if the table we produced is, in fact, empty, then there is no solution!
231  if(parent_table->empty()){
232  while(!child_stack.empty()){
233  DP_Table<Graph>* current_table = child_stack.top().second;
234  current_table->clear();
235  delete current_table;
236  child_stack.pop();
237  }
238  S.invalidate();
239  return;
240  }
241  } else break;
242  }// if
243  } while(true);
244  // finally read the result from the only remaining table on the stack (should be the root table)
245  // to this end, merge with the empty bag
246  DP_Table<Graph> root_table(I, VertexSet<Graph>());
247  DP_Table<Graph>* last_table = child_stack.top().second;
248  DP_treat_edge(*last_table, root_table, introduced_edges, *I.g);
249  delete last_table;
250  root_table.read_solution(S);
251  }// function
252 
253 }}}// namespace
254 
255 
256 #endif
257 
Definition: graph_utils.hpp:18
Definition: read_adj_list.hpp:22
unsigned num_cycles
max number of cycles in solutions
Definition: instance.hpp:33
Definition: dp_solution.hpp:19
unsigned num_paths
max number of paths in solutions
Definition: instance.hpp:32
bool contains(const Set &s, const Element &el)
a more readable containment check
Definition: utils.hpp:171
Definition: graph_typedefs.hpp:26