Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
ilp_model_multi_manygraph.hpp
1 
2 
3 #ifndef ILP_MODEL_MULTI_HPP
4 #define ILP_MODEL_MULTI_HPP
5 
6 #include "boost/multi_array.hpp"
7 
8 #include <string>
9 #include <vector>
10 
11 #include "ilp_common.hpp"
12 #include "utils/profiling.hpp"
13 
14 // support keeping track of the subgraph number, the layer, and the corresponding vertex in the original graph
15 namespace boost{
16  enum vertex_subgraph_nr_t { vertex_subgraph_nr = 201 };
17  BOOST_INSTALL_PROPERTY(vertex, subgraph_nr);
18 
19  enum vertex_layer_t { vertex_layer = 202 };
20  BOOST_INSTALL_PROPERTY(vertex, layer);
21 };
22 
23 
24 namespace scaffold{
25 
26  // auxiliary graph for callback construction
27  typedef boost::adjacency_list<
28  boost::hash_setS,
29  boost::listS,
30  boost::bidirectionalS,
31  boost::property<boost::vertex_subgraph_nr_t, size_t,
32  boost::property<boost::vertex_layer_t, size_t>
33  >,
34  boost::no_property
35  > AuxGraphs;
36 
37  // the y variable map has an entry for each pairs of subgraphs
38  template<class Graph>
39  using EdgeVarMapMatrix = boost::multi_array<EdgeVarMap<Graph>, 2>;
40 
41  // the vertex-translator needs to have an entry for each (subgraph)x(layer)
42  template<class Graph>
43  using TranslateMatrix = boost::multi_array<TranslateBiMap<Graph, AuxGraphs>, 2>;
44 
45 
46  template<class Graph>
47  struct var_collection {
48  const Instance<Graph>& I;
49  const size_t max_multi;
50 
51  EdgeVarMap<Graph> x,z;
52  EdgeVarMapMatrix<Graph> y;
53  std::vector<vertex_var_map<Graph> > s, tp, tc;
54 
55  var_collection(const Instance<Graph>& _I):
56  I(_I),
57  max_multi(_I.get_max_multiplicity_no_update()),
58  y(boost::extents[max_multi][max_multi]),
59  s(max_multi),
60  tp(max_multi),
61  tc(max_multi)
62  {}
63  };
64 
65 
66  // block isolated cycles as lazy constraint callback
67  template<class Graph>
68  class LazyCyclesBlockCallback: public IloCplex::LazyConstraintCallbackI{
69  const var_collection<Graph>& vars;
70  callback_statistics& stats;
71 
72  public:
73 
74  void build_aux_graph(AuxGraphs& aux, TranslateMatrix<Graph>& translate);
75  void cleanup_aux_graph(AuxGraphs& aux, TranslateMatrix<Graph>& translate);
76  void main();
77 
78  IloCplex::CallbackI* duplicateCallback() const{
79  return (new (getEnv()) LazyCyclesBlockCallback(getEnv(), vars, stats));
80  }
81 
82  LazyCyclesBlockCallback(IloEnv env, const var_collection<Graph>& _vars, callback_statistics& _stats):
83  IloCplex::LazyConstraintCallbackI(env),
84  vars(_vars),
85  stats(_stats)
86  {}
87 
88  }; // class
89 
90  // callback function
91  template<class Graph>
92  IloCplex::Callback GetLazyCyclesBlock(IloEnv env, const var_collection<Graph>& vars, callback_statistics& stats) {
93  return (new (env) LazyCyclesBlockCallback<Graph>(env, vars, stats));
94  }
95 
96  // build the auxiliary graph using the var collection vars
97  template<class Graph>
98  void LazyCyclesBlockCallback<Graph>::build_aux_graph(AuxGraphs& aux, TranslateMatrix<Graph>& translate){
99  const Instance<Graph>& I = vars.I;
100 
101  // construct the auxiliary graph from the current solution
102  for(size_t i = 0; i < vars.max_multi; ++i){
103  for(size_t j = 0; j < vars.max_multi; ++j){
104  for(const auto& y : vars.y[i][j]){
105  const VertexPair<Graph>& uv = y.first;
106  const Vertex<Graph>& u = uv.first;
107  const Vertex<Graph>& v = uv.second;
108  const IloNumVar& y_val = y.second;
109  const unsigned layer = I.is_matching_edge(uv) ? 0 : 1;
110 
111  if(is_true(getValue(y_val))){
112  // add vertices: first see if they're already there & if they are not there, add them to aux
113 
114  auto u_it = translate[i][layer].left.find(u);
115  if(u_it == translate[i][layer].left.end()){
116  const Vertex<AuxGraphs> au = boost::add_vertex(aux);
117  boost::put(vertex_subgraph_nr, aux, au, i);
118  boost::put(vertex_layer, aux, au, layer);
119  u_it = translate[i][layer].left.insert(u_it, typename TranslateBiMap<Graph, AuxGraphs>::left_map::value_type(u, au));
120  }
121 
122  auto v_it = translate[j][1-layer].left.find(v);
123  if(v_it == translate[j][1-layer].left.end()){
124  const Vertex<AuxGraphs> av = boost::add_vertex(aux);
125  boost::put(vertex_subgraph_nr, aux, av, j);
126  boost::put(vertex_layer, aux, av, 1-layer);
127  v_it = translate[j][1-layer].left.insert(v_it, typename TranslateBiMap<Graph, AuxGraphs>::left_map::value_type(v, av));
128  }
129 
130  // add edge
131  const Vertex<AuxGraphs>& aux_u = u_it->second;
132  const Vertex<AuxGraphs>& aux_v = v_it->second;
133  boost::add_edge(aux_u, aux_v, aux);
134  } // if
135  }// for
136  }// for
137  }// for
138  }// function
139 
140 
141  // remove all paths that are incident with s (that is, incident with a vertex in pending)
142  template<class Graph>
143  void LazyCyclesBlockCallback<Graph>::cleanup_aux_graph(AuxGraphs& aux, TranslateMatrix<Graph>& translate){
144  // construct the set of vertices in layer 1 that are reached from s
145  VertexSet<AuxGraphs>* pending = new VertexSet<Graph>();
146  DEBUG5(std::cout << "reachable from s: ");
147  for(size_t i = 0; i < vars.max_multi; ++i)
148  for(const auto& y_su : vars.s[i])
149  if(is_true(getValue(y_su.second))) {
150  const Vertex<Graph>& u = y_su.first;
151  DEBUG5(std::cout << VertexAndGraph<Graph>(u, *vars.I.g) <<" ");
152  // CPLEX might give us a solution with s->u->t_p paths; if, in this case, u doesn't have an edge in aux, we can't translate
153  auto u_iter = translate[i][0].left.find(u);
154  if(u_iter != translate[i][0].left.end()) pending->insert(u_iter->second);
155  }// if
156  DEBUG5(std::cout << "."<< std::endl);
157 
158  // parse & remove all paths & cycles incident with pending
159  unsigned current_layer = 0;
160  while(!pending->empty()){
161  VertexSet<AuxGraphs>* neighbors = new VertexSet<AuxGraphs>();
162  for(const Vertex<AuxGraphs>& aux_u : *pending){
163  const size_t subgraph_nr = boost::get(vertex_subgraph_nr, aux, aux_u);
164  const size_t layer = boost::get(vertex_layer, aux, aux_u);
165  // step 1: add all neighbors of u to the list of next pending vertices
166  for(OEdgeIterRange<AuxGraphs> r = boost::out_edges(aux_u, aux); r.first != r.second; ++r.first)
167  neighbors->insert(boost::target(*r.first, aux));
168  // step 2: remove u from the translate map
169  translate[subgraph_nr][layer].right.erase(aux_u);
170  // step 3: remove u from the aux graph
171  boost::clear_vertex(aux_u, aux);
172  boost::remove_vertex(aux_u, aux);
173  }// for
174  current_layer = 1 - current_layer;
175  delete pending;
176  pending = neighbors;
177  }// while
178  delete pending;
179  }// function
180 
181  template<class Graph>
182  void LazyCyclesBlockCallback<Graph>::main(){
183  stats.time_spent.resume();
184  DEBUG5(std::cout << "callback starts"<<std::endl);
185 
186  const Graph& g = *vars.I.g;
187  AuxGraphs aux;
188  // a translate map for each 2-layered "subgraph"
189  TranslateMatrix<Graph> translate(boost::extents[vars.max_multi][2]);
190 
191  // Step 1: build the auxiliary graph & remove everything that is reachable from s
192  DEBUG5(std::cout << "building auxiliary graph..."<<std::endl);
193  build_aux_graph(aux, translate);
194  DEBUG5(std::cout << "built aux graph with "<<boost::num_vertices(aux)<<" vertices & "<<boost::num_edges(aux)<<" edges"<<std::endl);
195 
196  DEBUG5(std::cout << "removing valid paths/cycles from auxiliary graph..."<<std::endl);
197  cleanup_aux_graph(aux, translate);
198  DEBUG5(std::cout << "after cleanup, "<<boost::num_vertices(aux)<<" vertices & "<<boost::num_edges(aux)<<" edges remain"<<std::endl);
199 
200  // whatever remains of aux are cycles that are not attached to s, that is, invalid cycles;
201  // Step 2: forbid them by adding a constraint forcing at least one arc going out of the cycle
202 
203  unsigned num_new_cuts = 0;
204 #ifdef LAZY_ADD_ALL_CYCLES
205  while(boost::num_edges(aux)){
206 #else
207  if(boost::num_edges(aux)){
208 #endif
209  IloExpr expr(getEnv()), rev_expr(getEnv());
210  VertexSet<AuxGraphs> S;
211 
212  DEBUG5(std::cout << "forbidding cyclic structure: ");
213  // find a set S of vertices in aux such that aux[S] is a cycle; write down the maximum multiplicity within S
214  size_t max_multiplicity = 1;
215  const Edge<AuxGraphs> e = *(boost::edges(aux).first);
216  const Vertex<AuxGraphs> x = boost::source(e, aux);
217  VertexQueue<AuxGraphs> pending;
218  pending.push(x);
219  S.insert(x);
220  while(!pending.empty()){
221  const Vertex<AuxGraphs> aux_u = pending.front(); pending.pop();
222  const unsigned u_subgraph_nr = boost::get(vertex_subgraph_nr, aux, aux_u);
223  const unsigned u_layer = boost::get(vertex_layer, aux, aux_u);
224  const Vertex<Graph>& u = translate[u_subgraph_nr][u_layer].right.at(aux_u);
225  for(OEdgeIterRange<AuxGraphs> r = boost::out_edges(aux_u, aux); r.first != r.second;){
226  const Edge<AuxGraphs> aux_uv = *r.first;
227  const Vertex<AuxGraphs> aux_v = boost::target(aux_uv, aux);
228  const unsigned v_subgraph_nr = boost::get(vertex_subgraph_nr, aux, aux_v);
229  const unsigned v_layer = boost::get(vertex_layer, aux, aux_v);
230 
231  const Vertex<Graph>& v = translate[v_subgraph_nr][v_layer].right.at(aux_v);
232  const VertexPair<Graph> uv = VertexPair<Graph>(u,v);
233  const VertexPair<Graph> vu = VertexPair<Graph>(v,u);
234 
235  const Edge<Graph> e = boost::edge(u, v, g).first;
236  max_multiplicity = std::max(max_multiplicity, boost::get(edge_multiplicity, g, e));
237 
238  DEBUG5(std::cout << " " << VertexPairAndGraph<Graph>(uv, g));
239 
240  // modify the expressions for the edge uv
241  expr -= vars.y[u_layer][v_layer].at(uv);
242  rev_expr -= vars.y[v_layer][u_layer].at(vu);
243 
244  // delete the offending edge from aux
245  ++r.first;
246  boost::remove_edge(aux_uv, aux);
247 
248  // add aux_v to the queue & mark it seen
249  if(!contains(S, aux_v)) {
250  pending.push(aux_v);
251  S.insert(aux_v);
252  }// if
253  }// for
254  }// while (pending is not empty)
255  DEBUG5(std::cout << std::endl);
256  const size_t S_size = S.size();
257  // add the exiting arcs to the expressions
258  for(const Vertex<AuxGraphs>& aux_u : S){
259  const unsigned u_subgraph = boost::get(vertex_subgraph_nr, aux, aux_u);
260  const unsigned u_layer = boost::get(vertex_layer, aux, aux_u);
261  const Vertex<Graph>& u = translate[u_subgraph][u_layer].right.at(aux_u);
262  // note: if the layer of u is 0, then uv is a matching edge and there are no edges from u leaving S (except to t_c)
263  if(u_layer == 1){
264  for(AdjIterRange<Graph> r = boost::adjacent_vertices(u, g); r.first != r.second; ++r.first){
265  const Vertex<Graph>& v = *r.first;
266  for(size_t v_layer = 0; v_layer < vars.max_multi; ++v_layer){
267  const auto aux_v_it = translate[v_layer][0].left.find(v);
268  // if the aux-version of uv is not within SxS, then modify the expressions using uv
269  if( (aux_v_it == translate[v_layer][0].left.end()) || !contains(S, aux_v_it->second)){
270  const VertexPair<Graph> uv = VertexPair<Graph>(u, v);
271  const VertexPair<Graph> vu = VertexPair<Graph>(v, u);
272 
273  expr += (double)(S_size * max_multiplicity) * vars.y[u_layer][v_layer].at(uv);
274  rev_expr += (double)(S_size * max_multiplicity) * vars.y[v_layer][u_layer].at(vu);
275  }// if
276  }// for
277  }// for
278  } else {
279  // if the layer of u is 0, then add u -> t_c
280  expr += (double)(S_size * max_multiplicity) * vars.tc[u_subgraph].at(u);
281  rev_expr += (double)(S_size * max_multiplicity) * vars.tc[u_subgraph].at(u);
282 
283  }// if layer = 1 / else
284  }// for S
285  // add the expressions to the model
286  add(expr >= 0);
287  add(rev_expr >= 0);
288  num_new_cuts += 2;
289 #ifdef LAZY_ADD_ALL_CYCLES
290  }// while aux has edges
291 #else
292  } // if aux has edges
293 #endif
294  // Step 3: collect statistical information
295  DEBUG2(std::cout << "======= callback called, added "<<num_new_cuts<<" new cuts ======"<<std::endl);
296  stats.times_called++;
297  stats.cuts_added += num_new_cuts;
298  stats.time_spent.pause();
299  } // main()
300 
301 
302 #warning TODO: use each edges multiplicity to restrict the domain of its variable instead of adding a constraint
303  // initialize the variables for the ILP formulation
304  template<class Graph>
305  void populate_variables(IloEnv& env, var_collection<Graph>& vars){
306  const Instance<Graph>& I = vars.I;
307  const Graph& g = *I.g;
308  const size_t max_multi = vars.max_multi;
309 
310  DEBUG5(std::cout << "creating z_{u,v}/x_{u,v}/y_uv"<<std::endl);
311  for(EdgeIterRange<Graph> e = boost::edges(g); e.first != e.second; ++e.first){
312  const Vertex<Graph>& u = source(*e.first, g);
313  const Vertex<Graph>& v = target(*e.first, g);
314  const std::string uname = std::to_string(boost::get(boost::vertex_name, g, u));
315  const std::string vname = std::to_string(boost::get(boost::vertex_name, g, v));
316 
317  // Step 1: add binary variables z_e = 1 <=> e belogs to a solution path/cycle
318  const std::string zname("z_" + uname + "," + vname);
319  vars.z.emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, 0, 1, IloNumVar::Bool, zname.c_str()));
320  const std::string xname("x_" + uname + "," + vname);
321  vars.x.emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, 0, max_multi, IloNumVar::Int, xname.c_str()));
322  // Step 2: add variables y^k_ij
323  const std::string yname1("y_" + uname + "," + vname);
324  const std::string yname2("y_" + vname + "," + uname);
325  for(size_t i = 0; i < max_multi; ++i)
326  for(size_t j = 0; j < max_multi; ++j){
327  vars.y[i][j].emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, 0, 1, IloNumVar::Bool, yname1.c_str()));
328  vars.y[i][j].emplace(std::piecewise_construct, std::make_tuple(v,u), std::make_tuple(env, 0, 1, IloNumVar::Bool, yname2.c_str()));
329  }
330  }
331  DEBUG5(std::cout << "creating s/tp/tc interaction variables"<<std::endl);
332  // Step 3: add variables y_s,i and y_i,tp and y_i,tc
333  for(VertexIterRange<Graph> v = boost::vertices(g); v.first != v.second; ++v.first){
334  const Vertex<Graph>& u = *v.first;
335  const std::string name(std::to_string(boost::get(boost::vertex_name, g, u)));
336  for(size_t i = 0; i < max_multi; ++i){
337  // on layer 0, come up from s
338  vars.s[i].emplace(std::piecewise_construct, std::make_tuple(u), std::make_tuple(env, 0, 1, IloNumVar::Bool, ("y_s," + name).c_str()));
339  // on layer 1, go up to tp
340  vars.tp[i].emplace(std::piecewise_construct, std::make_tuple(u), std::make_tuple(env, 0, 1, IloNumVar::Bool, ("y_" + name + ",tp").c_str()));
341  // on layer 0, go up to tc
342  vars.tc[i].emplace(std::piecewise_construct, std::make_tuple(u), std::make_tuple(env, 0, 1, IloNumVar::Bool, ("y_" + name + ",tc").c_str()));
343  }// for
344  }// for
345  }
346 
347  // initialize the constraints for the ILP formulation
348  template<class Graph>
349  void populate_constraints(IloEnv& env, IloRangeArray& c, const var_collection<Graph>& vars){
350  const Instance<Graph>& I = vars.I;
351  const Graph& g(*I.g);
352 
353  // constraint (16): x_e = 0 --> z_e = 0
354  DEBUG5(std::cout << "adding (16): x_e = 0 --> z_e = 0"<<std::endl);
355  for(auto x : vars.x){
356  const VertexPair<Graph> uv = x.first;
357 
358  IloExpr expr(env);
359  expr += x.second;
360  expr -= vars.z.at(uv);
361  c.add(expr >= 0);
362  }
363 
364  // constraint (17): of all the representations of ij, exactly x_ij should be chosen!
365  DEBUG5(std::cout << "adding (17): of all the representations of e, exactly x_e should be chosen"<<std::endl);
366  for(auto x : vars.x){
367  const VertexPair<Graph> uv = x.first;
368  const VertexPair<Graph> vu(uv.second, uv.first);
369 
370  IloExpr expr(env);
371  for(size_t i = 0; i < vars.max_multi; ++i){
372  for(size_t j = 0; j < vars.max_multi; ++j){
373  expr += vars.y[i][j].at(uv);
374  expr += vars.y[i][j].at(vu);
375  }
376  }
377  expr -= x.second;
378  c.add(expr == 0);
379  }
380 
381  // constraint (2): all matching-edges should be chosen!
382  DEBUG5(std::cout << "adding (2'): any matching-edge e should be chosen between 1 and m(e) times"<<std::endl);
383  for(auto x : vars.x)
384  if(I.is_matching_edge(x.first)) {
385  const Edge<Graph> e = boost::edge(x.first.first, x.first.second, g).first;
386  c.add(x.second >= 1);
387  c.add(x.second <= (double)boost::get(boost::edge_multiplicity, g, e));
388  DEBUG3(std::cout<<"allowing "<<EdgeAndGraph<Graph>(e, g)<<" with multi "<<(double)boost::get(boost::edge_multiplicity, g, e)<<std::endl);
389  }
390 
391  // constraint (4),(6): there should be at most sigma_p paths & sigma_c cycles
392  DEBUG5(std::cout << "adding (4)&(6): there should be at most sigma_p paths & sigma_c cycles"<<std::endl);
393  {
394  IloExpr p_expr(env), c_expr(env);
395  for(size_t i = 0; i < vars.max_multi; ++i) {
396  for(auto y : vars.tp[i]) p_expr += y.second;
397  for(auto y : vars.tc[i]) c_expr += y.second;
398  }
399  c.add(p_expr <= (double)I.num_paths);
400  c.add(c_expr <= (double)I.num_cycles);
401  }
402 
403  // constraint (3): everything that comes into layer l should leave layer l
404  DEBUG5(std::cout << "adding (18): flow conservation"<<std::endl);
405  for(VertexIterRange<Graph> u = boost::vertices(g); u.first != u.second; ++u.first){
406  for(size_t i = 0; i < vars.max_multi; ++i){
407  for(size_t layer = 0; layer < 2; ++layer){
408  IloExpr expr(env);
409  for(AdjIterRange<Graph> nh = boost::adjacent_vertices(*u.first, g); nh.first != nh.second; ++nh.first){
410  const VertexPair<Graph> uv(*u.first, *nh.first);
411  const VertexPair<Graph> vu(*nh.first, *u.first);
412  // remember: layer 0 -> 1 contains only matching edges
413  if(I.is_matching_edge(uv) == (layer == 0)){
414  for(size_t j = 0; j < vars.max_multi; ++j) expr += vars.y[i][j].at(uv);
415  DEBUG5(std::cout << " + y[" << i << "][*]_" << VertexPairAndGraph<Graph>(uv, g));
416  } else {
417  for(size_t j = 0; j < vars.max_multi; ++j) expr -= vars.y[j][i].at(vu);
418  DEBUG5(std::cout << " - y[*]["<<i<<"]_"<<VertexPairAndGraph<Graph>(vu, g));
419  }// if
420  }// for
421  if(layer == 0){
422  // on layer 0, p-flow can come up from s
423  DEBUG5(std::cout << " - y_s["<<i<<"],"<<VertexAndGraph<Graph>(*u.first, g));
424  expr -= vars.s[i].at(*u.first);
425  // on layer 0, p-flow can go up to tc
426  DEBUG5(std::cout << " + y_"<<VertexAndGraph<Graph>(*u.first, g)<<",tc["<<i<<"]");
427  expr += vars.tc[i].at(*u.first);
428  } else {
429  // on layer 1, p-flow can go up to tp
430  DEBUG5(std::cout << " + y_"<<VertexAndGraph<Graph>(*u.first, g)<<",tp["<<i<<"]");
431  expr += vars.tp[i].at(*u.first);
432  }// if
433  c.add(expr == 0);
434  }// for
435  }// for
436  }// for
437  // constraint (7): everything that goes to tc has to come in from s
438  DEBUG5(std::cout << "adding (7): cycle conservation"<<std::endl);
439  for(VertexIterRange<Graph> u = vertices(g); u.first != u.second; ++u.first){
440  IloExpr expr(env);
441  for(size_t i = 0; i < vars.max_multi; ++i) {
442  expr += vars.s[i].at(*u.first);
443  expr -= vars.tc[i].at(*u.first);
444  }
445  c.add(expr >= 0);
446  }
447  // constraint (5): each subset S of vertices should contain at most |S|-1 endpoints of arcs
448  //DEBUG5(std::cout << "adding (5): each subset S of vertices should contain at most |S|-1 endpoints of arcs"<<std::endl);
449  // done via callback!
450  }
451 
452 
453  // compute the ILP formulation for a graph g
454  template<class Graph>
455  void populate_model(IloModel& model, IloEnv& env, var_collection<Graph>& vars){
456  const Graph& g(*vars.I.g);
457  IloRangeArray c(env);
458  // construct variables, keeping note of the x_ij, y^l_ij and z^l_ij variables
459  DEBUG3(std::cout << "creating variables"<<std::endl);
460  populate_variables(env, vars);
461  // add optimization function
462  DEBUG3(std::cout << "creating optimization expression"<<std::endl);
463  IloExpr opt_exp(env);
464  for(EdgeIterRange<Graph> e = boost::edges(g); e.first != e.second; ++e.first){
465  const Vertex<Graph>& u = boost::source(*e.first, g);
466  const Vertex<Graph>& v = boost::target(*e.first, g);
467  // get the appropriate x_ij variable
468  auto uv = vars.z.find(std::make_pair(u,v));
469  if(uv == vars.z.end()) uv = vars.z.find(std::make_pair(v,u));
470  assert(uv != vars.z.end());
471  // add the term weight(e)*x_e
472  opt_exp += (double)boost::get(boost::edge_weight, g, *e.first) * uv->second;
473  }
474  model.add(IloMaximize(env, opt_exp));
475  // construct constraints
476  DEBUG3(std::cout << "creating constraints"<<std::endl);
477  populate_constraints(env, c, vars);
478  model.add(c);
479  }
480 
481 }
482 
483 #endif
484 
Definition: graph_utils.hpp:18
Definition: read_adj_list.hpp:22