Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
FES_branching.hpp
1 
2 
3 #ifndef FES_BRANCHING_HPP
4 #define FES_BRANCHING_HPP
5 
6 #include <pthread.h>
7 
8 #include "utils/graph_typedefs.hpp"
9 #include "utils/graph_utils.hpp"
11 #include "utils/instance.hpp"
12 #include "utils/scaffolding_utils.hpp"
13 #include "degree_branching.hpp"
14 
15 // if a bridge splits away an FES bigger than this, we take it
16 #define BRIDGE_THRESHOLD 1
17 // apply preprocessing each x layers
18 #define PP_EACH_DEPTH 2
19 
20 #define MIN_FES_TO_PARALLELIZE 8
21 #define MAX_NUM_THREADS 16
22 
23 
24 namespace scaffold{
25  namespace solv{
26  pthread_mutex_t thread_lock;
27  size_t num_threads = 0;
28 
29  template<class Graph>
30  void FES_solve_recursively(Instance<Graph>&, DynProgSolution<>*&, size_t depth = 0 );
31 
32  // pthread stuff
33  template<class Graph>
35  Instance<Graph>* I;
36  DynProgSolution<>* S;
37  size_t depth;
38 
39  pthread_data_block(Instance<Graph>* const _I, DynProgSolution<>* const _S, const size_t _depth) : I(_I), S(_S), depth(_depth) {}
40  };
41  // parallel version of FES_solve_recursively
42  template<class Graph>
43  void* FES_solve_rec_parallel(void* _data){
45  DEBUG1(std::cout << "thread "<<pthread_self()<<" starting..."<<std::endl);
46  FES_solve_recursively(*data->I, data->S, data->depth);
47  DEBUG1(std::cout << "thread "<<pthread_self()<<" finishing..."<<std::endl);
48  // update number of threads
49  pthread_mutex_lock(&thread_lock);
50  --num_threads;
51  pthread_mutex_unlock(&thread_lock);
52  return NULL;
53  }
54 
55 
56 
57 
58  // store all branches around v in a list
59  template<class FGraph, class Graph>
60  void get_FES_branches_for_vertex(const Instance<Graph>& I, const FGraph& fg, const Vertex<FGraph>& v, std::vector<Branch<Graph> >& branches){
61  // reserve space for #branches = degree(v)
62  branches.clear();
63  branches.reserve(degree(v, fg));
64  // add the "delete-all" branch
65  branches.emplace_back(DeleteBranch);
66  // for each edge uv incident to v, create the branch that takes uv and add uv to the "delete-all" branch
67  for(AdjIterRange<FGraph> r = adjacent_vertices(v, fg); r.first != r.second; ++r.first)
68  if(v != I.matched_with(*r.first)) {
69  const VertexPair<Graph> p(v, *r.first);
70  // construct a new branch at the end of branches
71  branches.emplace_back(TakeBranch, p);
72  // add the edge to the "delete all"-branch at the front
73  branches.front().edges.emplace_back(p);
74  }// if
75  }// function
76 
77  // get the vertex of maximum degree in fg, breaking ties with the degree in g
78  template<class FGraph, class Graph>
79  Vertex<Graph> get_best_branching_vertex(const FGraph& fg, const Graph& g){
80  VertexIterRange<Graph> r = vertices(g);
81  // assume we have vertices
82  assert(r.first != r.second);
83  VertexIter<Graph> max_v = r.first;
84  size_t max_deg_fg = degree(*max_v, fg);
85  size_t max_deg_g = degree(*max_v, g);
86  ++r.first;
87  for(; r.first != r.second; ++r.first){
88  const Vertex<Graph>& v = *r.first;
89  const size_t deg_fg = degree(v, fg);
90  if(deg_fg >= max_deg_fg){
91  const size_t deg_g = degree(v, g);
92  if((deg_fg == max_deg_fg) ? (deg_g > max_deg_g) : true){
93  max_deg_fg = deg_fg;
94  max_deg_g = deg_g;
95  max_v = r.first;
96  }// if
97  }// if
98  }// for
99  return *max_v;
100  }// function
101 
102  // outsource the branching
103  template<class Graph>
104  void branch_to_get_min_sol(Instance<Graph>& I, std::vector<Branch<Graph> >& branches, DynProgSolution<>& min_sol, const size_t depth){
105  const size_t bnum = branches.size();
106  Instance<Graph>* instances[bnum];
107  DynProgSolution<>* solutions[bnum];
108  pthread_data_block<Graph>* pthread_data_blocks[bnum];
109  pthread_t threads[bnum];
110  // to create joinable threads
111  pthread_attr_t attr;
112  pthread_attr_init(&attr);
113  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
114  size_t threads_created = 0;
115 
116  // Step 1: apply the branches to the graph, creating a list of graphs
117  for(size_t i = 0; i < bnum; ++i){
118  // Step 1a: get a copy H of I to which the branch has been applied
119  solutions[i] = new DynProgSolution<>(I);
120  instances[i] = copy_and_apply_branch(I, branches[i], solutions[i]);
121  }// for
122  // Step 2: recurse for each created instance
123  for(size_t i = 0; i < bnum; ++i){
124  // prepare data block
125  DEBUG3(std::cout << "depth "<<depth<<":"<<std::endl<<"branching: "<< BranchAndGraph<Graph>(branches[i], *I.g)<<std::endl);
126 
127  // don't go parallel if the FES is too low.
128  bool create_parallel = false;
129  if(I.infos.FES() > MIN_FES_TO_PARALLELIZE){
130  // see whether we have enough slots to branch in parallel
131  pthread_mutex_lock(&thread_lock);
132  create_parallel = (num_threads < MAX_NUM_THREADS);
133  if(create_parallel) ++num_threads;
134  pthread_mutex_unlock(&thread_lock);
135  if(!create_parallel) std::cout << "not creating new thread since we have already "<<num_threads<<" of them"<<std::endl;
136  }// if
137  if(create_parallel){
138  // prepare the data block
139  pthread_data_blocks[threads_created] = new pthread_data_block<Graph>(instances[i], solutions[i], depth + 1);
140  // create the thread
141  int pt_err = pthread_create(&threads[threads_created], &attr, FES_solve_rec_parallel<Graph>, (void*)pthread_data_blocks[threads_created]);
142  if(pt_err){
143  std::cerr << "error "<<pt_err<<" while creating thread"<<std::endl;
144  exit(-1);
145  }
146  ++threads_created;
147  } else FES_solve_recursively(*(instances[i]), solutions[i], depth + 1);
148 
149  DEBUG1(if(depth == 0) std::cout << (i+1)*100/bnum <<"% done" << std::endl);
150  }// for
151  // wait for all threads to finish
152  if(threads_created){
153  pthread_attr_destroy(&attr);
154  for(size_t i = 0; i < threads_created; ++i){
155  int pt_err = pthread_join(threads[i], NULL);
156  if(pt_err) {
157  std::cerr << "error "<<pt_err<<" while joining threads"<<std::endl;
158  exit(-1);
159  }// if
160  delete pthread_data_blocks[i];
161  }// for
162  }// if
163  // Step 3: update solution
164  for(size_t i = 0; i < bnum; ++i){
165  if(!solutions[i]->no_solutions()) min_sol.combine_min_of_two(*(solutions[i]));
166  delete instances[i];
167  delete solutions[i];
168  }// for
169  // clean up
170  }// function
171 
172  /*
173  // outsource the branching
174  template<class Graph>
175  void branch_to_get_min_sol(const Instance<Graph>& I, std::vector<Branch<Graph> >& branches, DynProgSolution<>& min_sol, const size_t depth, size_t& searchtree_nodes){
176  const size_t bnum = branches.size();
177  std::vector<Instance<Graph>*> instances(bnum, NULL);
178  std::vector<DynProgSolution<>*> solutions(bnum, NULL);
179  // Step 1: apply the branches to the graph, creating a list of graphs
180  for(size_t i = 0; i < bnum; ++i){
181  // Step 1a: get a copy H of I to which the branch has been applied
182  solutions[i] = new DynProgSolution<>(I);
183  instances[i] = copy_and_apply_branch(I, branches[i], solutions[i]);
184  }// for
185  // Step 2: recurse for each created instance
186 // #pragma omp parallel for if (depth < 5) num_threads(8)
187  for(size_t i = 0; i < bnum; ++i){
188  size_t tmp = 0;
189  DEBUG3(std::cout << "depth "<<depth<<":"<<std::endl<<"branching: "<< BranchAndGraph<Graph>(branches[i], *I.g)<<std::endl);
190  FES_solve_recursively(*(instances[i]), solutions[i], depth + 1, tmp);
191  DEBUG4(std::cout << "depth "<<depth<<": returned with solution"<<std::endl);
192  DEBUG1(if(depth == 0) std::cout << (i+1)*100/bnum <<"% done" << std::endl);
193  delete instances[i];
194  }
195  // Step 3: update solution
196  for(size_t i = 0; i < bnum; ++i){
197  if(!solutions[i]->no_solutions()) min_sol.combine_min_of_two(*(solutions[i]));
198  delete solutions[i];
199  }// for
200  }// function
201  */
202 
203  // recursively transform I into a solution, TODO: add upper and lower bounds
204  template<class Graph>
205  void FES_solve_recursively(Instance<Graph>& I, DynProgSolution<>*& S, size_t depth = 0){
206  DEBUG3(std::cout << "depth "<< depth<<": solving instance with " << num_vertices(*I.g) << " vertices and "<<num_edges(*I.g)<<" edges." << std::endl);
207  DEBUG5(write_adj_list_graph(std::cout, *I.g); std::cout << std::endl);
208 
209  // Step 0: preprocess
210  if(depth % PP_EACH_DEPTH == 0)
211  while(preprocess::pp_matching_pendant(I, S)) {};
212 
213  // Step 1: check for connected components
214  Instance<Graph> *J = I.split_off_instance();
215  if(J){
216  // Step 1c: if a component has been split off, then recurse for the split-off instance
217  DEBUG3(std::cout << "splitting into components of size "<<num_vertices(*I.g)<<" and "<<num_vertices(*J->g)<<std::endl);
218  DynProgSolution<>* solJ = new DynProgSolution<>(*J);
219  FES_solve_recursively(I, S, depth);
220  if(!S->no_solutions()){
221  FES_solve_recursively(*J, solJ, depth);
222  delete J;
223 
224  DEBUG3(std::cout << "depth "<<depth<<": rejoining solutions for components"<<std::endl);
225 
226  if(!solJ->no_solutions()){
227  // if both components have solutions, unite them and find a solution subset
228  S->combine_disjoint_union(*solJ);
229  delete solJ;
230  return;
231  }
232  }
233  if(S->no_solutions() || solJ->no_solutions()){
234  // if either component does not have a solution, return failure
235  S->invalidate();
236  delete solJ;
237  return;
238  }
239  }
240  // Step 2: base cases
241  if(I.max_deg_two()) {
242  DEBUG3(std::cout << "it's already max-deg 2, getting all sub-solutions"<<std::endl);
243  treat_deg_two(I, S);
244  return;
245  }
246  if(I.is_acyclic()){
247  DEBUG3(std::cout << "it's a tree, getting all sub-solutions"<<std::endl);
248  solve_on_tree(I, S);
249  return;
250  }
251  // Step 4: find a good branching
252  // Step 4a: try the bridges
253  std::vector<Branch<Graph> > branches;
254  const size_t I_FES = I.infos.FES();
255  typename BridgeMap<Graph>::const_iterator best_b = I.get_best_bridge();
256  // if there is a bridge splitting the FES nicely...
257  if( (best_b != I.infos.bridges.end()) ? ((best_b->second > BRIDGE_THRESHOLD) && (best_b->second < I_FES - BRIDGE_THRESHOLD)) : false){
258  // ... then take it
259  std::pair<Edge<Graph>,bool> e = edge(best_b->first.first, best_b->first.second, *I.g);
260  assert(e.second);
261  DEBUG4(std::cout << "branching on bridge: "<< VertexPairAndGraph<Graph>(best_b->first, *I.g)<<" splitting away an FES of "<<best_b->second<<"/"<<I_FES<<std::endl);
262  branches.emplace_back(TakeBranch, e.first, *I.g);
263  branches.emplace_back(DeleteBranch, e.first, *I.g);
264  } else {
265  // otherwise, get the maximum degree vertex v in fg, and
266  // Step 4b: create a filtered graph, edge filtered by "not in bridges" (invert the "is bridges"-filter)
267  typedef VP_double_contained_filter<Graph, BridgeMap<Graph>, Matching<Graph> > BridgeFilter;
268  typedef filtered_graph<Graph, BridgeFilter> FGraph;
269  BridgeFilter bfilter(&I.infos.bridges, &I.matched, I.g, true);
270  FGraph fg(*I.g, bfilter);
271 
272  const Vertex<Graph> v = get_best_branching_vertex(fg, *I.g);
273  DEBUG4(std::cout << "branching on vertex: "<< VertexAndGraph<Graph>(v, *I.g)<<std::endl);
274  // get all branches for v
275  get_FES_branches_for_vertex(I, fg, v, branches);
276  }
277 
278  DEBUG1(if(depth == 0) std::cout << "starting with solution "<<std::endl<<*S<<std::endl);
279  // Step 5: for each branch, make a copy of I, apply the branching, and recurse
280  DynProgSolution<> min_sol(I);
281  branch_to_get_min_sol(I, branches, min_sol, depth);
282  // Step 6: evaluate solution
283  if(min_sol.no_solutions()) S->invalidate(); else S->destructive_append(min_sol);
284  DEBUG3(std::cout << "returning from depth "<<depth<<" with "; if(S->no_solutions()) std::cout << "no solutions"<<std::endl; else std::cout << "solutions: "<<*S<<std::endl;);
285  } // function
286 
287  } // namespace
288 } // namespace
289 
290 #endif
Definition: read_adj_list.hpp:22
Definition: FES_branching.hpp:34