Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
ilp_model_multi.hpp
1 
2 
3 #ifndef ILP_MODEL_MULTI_HPP
4 #define ILP_MODEL_MULTI_HPP
5 
6 #include <string>
7 #include <vector>
8 
9 #include "utils/utils.hpp"
10 #include "utils/set_queue.hpp"
11 #include "utils/profiling.hpp"
12 #include "solv/ilp_common.hpp"
13 #include "solv/ilp_contig_jumps.hpp"
14 
15 
16 namespace scaffold{ namespace solv { namespace ilp {
17 
18 
19 
20  // block isolated cycles as lazy constraint callback
21  class LazyCyclesBlockCallback: public IloCplex::LazyConstraintCallbackI{
22 
23  struct VertexProp {
24  unsigned layer;
25  const ScafVertex& orig;
26 
27  VertexProp(const ScafVertex& _orig, const unsigned _layer = 0): layer(_layer), orig(_orig) {}
28  };
29  typedef AuxiliaryGraph<VertexProp, no_property> AuxGraph;
30  typedef std::vector<VTranslateMap<RawScaffoldGraph, AuxGraph> > TranslateMatrix;
31 
32  const var_collection_multi& vars;
33  callback_statistics& stats;
34 
35  public:
36 
37  void build_auxgraph(const EdgeVarMap& var_map, AuxGraph& aux, TranslateMatrix& translate_layer);
38  void compute_startpoints(const TranslateMatrix& translate_layer, VertexSet<AuxGraph>& startpoints);
39  unsigned forbid_cycles_in_y();
40  unsigned forbid_invalid_jumps();
41  void main();
42 
43  IloCplex::CallbackI* duplicateCallback() const{
44  return (new (getEnv()) LazyCyclesBlockCallback(getEnv(), vars, stats));
45  }
46 
47  LazyCyclesBlockCallback(IloEnv env, const var_collection_multi& _vars, callback_statistics& _stats):
48  IloCplex::LazyConstraintCallbackI(env),
49  vars(_vars),
50  stats(_stats)
51  {}
52 
53  }; // class
54 
55  // callback function
56  IloCplex::Callback GetLazyCyclesBlock(IloEnv env, const var_collection_multi& vars, callback_statistics& stats) {
57  return (new (env) LazyCyclesBlockCallback(env, vars, stats));
58  }
59 
60  // build the auxiliary graph using the var collection vars
61  void LazyCyclesBlockCallback::build_auxgraph(const EdgeVarMap& var_map, AuxGraph& aux, TranslateMatrix& translate_layer)
62  {
63  const Instance& I = vars.I;
64 
65  // construct the auxiliary graph from the current solution
66  for(const auto& y : var_map) if(is_true(getValue(y.second))){
67  const ScafVertexPair& uv = y.first;
68  const ScafVertex& u = uv.first;
69  const ScafVertex& v = uv.second;
70  const unsigned layer = (I.get_matching().at(u) == v) ? 0 : 1;
71 
72 
73  // add vertices: first see if they're already there & if they are not there, add them to aux
74  auto u_it = translate_layer.at(layer).find(u);
75  if(u_it == translate_layer.at(layer).end()){
76  const Vertex<AuxGraph> au = boost::add_vertex(VertexProp(u, layer), aux);
77  u_it = translate_layer.at(layer).insert(u_it, typename VTranslateMap<RawScaffoldGraph, AuxGraph>::value_type(u, au));
78  }
79 
80  auto v_it = translate_layer.at(1-layer).find(v);
81  if(v_it == translate_layer.at(1-layer).end()){
82  const Vertex<AuxGraph> av = boost::add_vertex(VertexProp(v, 1-layer), aux);
83  v_it = translate_layer.at(1-layer).insert(v_it, typename VTranslateMap<RawScaffoldGraph, AuxGraph>::value_type(v, av));
84  }
85 
86  // add edge
87  const Vertex<AuxGraph>& aux_u = u_it->second;
88  const Vertex<AuxGraph>& aux_v = v_it->second;
89  boost::add_edge(aux_u, aux_v, aux);
90  }// for
91  }// function
92 
93  // return the startpoints of al paths & cycles
94  void LazyCyclesBlockCallback::compute_startpoints(const TranslateMatrix& translate_layer, VertexSet<AuxGraph>& startpoints)
95  {
96  DEBUG5(std::cout << "reachable from s: ");
97  for(const auto& y_su : vars.from_source) if(is_true(getValue(y_su.second))) {
98  const ScafVertex& u = y_su.first;
99  DEBUG5(std::cout << vars.I[u].name <<" ");
100  // note that CPLEX might give us a solution that has some s->u->t_c paths; if, in this case, u does not have an edge in aux, we can't translate
101  auto u_iter = translate_layer.at(0).find(u);
102  if(u_iter != translate_layer.at(0).end()) startpoints.insert(u_iter->second);
103  }// if
104  DEBUG5(std::cout << "."<< std::endl);
105  }
106 
107  unsigned LazyCyclesBlockCallback::forbid_cycles_in_y()
108  {
109  const Instance& I = vars.I;
110  const RawScaffoldGraph& g = I.get_graph();
111  AuxGraph aux;
112  TranslateMatrix translate(2);
113  VertexSet<AuxGraph> startpoints;
114 
115  // Step 1: build the auxiliary graph & remove everything that is reachable from s
116  DEBUG5(std::cout << "building auxiliary graph..."<<std::endl);
117  build_auxgraph(vars.times_used, aux, translate);
118  DEBUG5(std::cout << "built aux graph with "<<boost::num_vertices(aux)<<" vertices & "<<boost::num_edges(aux)<<" edges"<<std::endl);
119 
120  DEBUG5(std::cout << "removing valid paths/cycles from auxiliary graph..."<<std::endl);
121  // parse & remove all paths & cycles incident with pending
122  compute_startpoints(translate, startpoints);
123  remove_reachable(aux, startpoints, dir_fwd);
124  DEBUG5(std::cout << "after cleanup, "<<boost::num_vertices(aux)<<" vertices & "<<boost::num_edges(aux)<<" edges remain"<<std::endl);
125 
126  // whatever remains of aux are cycles that are not attached to s, that is, invalid cycles;
127  // Step 2: forbid them by adding a constraint forcing at least one arc going out of the cycle
128 
129  unsigned num_new_cuts = 0;
130  while(boost::num_edges(aux) != 0){
131  IloExpr expr(getEnv()), rev_expr(getEnv());
132  VertexSet<AuxGraph> S;
133 
134  DEBUG5(std::cout << "forbidding cyclic structure: ");
135  // find a set S of vertices in aux such that aux[S] is a cycle; write down the maximum multiplicity within S
136  unsigned max_multiplicity = 1;
137  const Edge<AuxGraph> e = *(boost::edges(aux).first);
138  const Vertex<AuxGraph> x = boost::source(e, aux);
139  VertexQueue<AuxGraph> pending;
140  pending.push(x);
141  S.insert(x);
142  while(! pending.empty()){
143  const Vertex<AuxGraph> aux_u = pending.front(); pending.pop();
144  const VertexProp& u_prop = aux[aux_u];
145  const ScafVertex& u = u_prop.orig;
146  for(OEdgeIterRange<AuxGraph> r = boost::out_edges(aux_u, aux); r.first != r.second;){
147  const Edge<AuxGraph> aux_uv = *r.first;
148  const Vertex<AuxGraph> aux_v = boost::target(aux_uv, aux);
149  const ScafVertex& v = aux[aux_v].orig;
150  const ScafVertexPair uv(u,v);
151  const ScafVertexPair vu(v,u);
152  const ScafEdge e = boost::edge(u, v, g).first;
153  max_multiplicity = std::max(max_multiplicity, g[e].multiplicity);
154 
155  DEBUG5(std::cout << " " << vars.I.get_edge_name(uv));
156 
157  // modify the expressions for the edge uv
158  expr -= vars.times_used.at(uv);
159  rev_expr -= vars.times_used.at(vu);
160 
161  // delete the offending edge from aux
162  ++r.first;
163  boost::remove_edge(aux_uv, aux);
164 
165  // add aux_v to the queue & mark it seen
166  if(!contains(S, aux_v)) {
167  pending.push(aux_v);
168  S.insert(aux_v);
169  }// if
170  }// for
171  }// while (pending is not empty)
172  DEBUG5(std::cout << "." << std::endl);
173  const size_t S_size = S.size();
174  // add the exiting arcs to the expressions
175  for(const Vertex<AuxGraph>& aux_u : S){
176  const VertexProp& u_prop = aux[aux_u];
177  const ScafVertex& u = u_prop.orig;
178  // note: if layer_of_u is 0, then uv is a matching edge and there are no edges from u leaving S (except to t_c)
179  if(u_prop.layer == 1){
180  for(ScafAdjIterRange r = boost::adjacent_vertices(u, g); r.first != r.second; ++r.first){
181  const ScafVertex& v = *r.first;
182  const auto aux_v_it = translate.at(0).find(v);
183  // if the aux-version of uv is not within SxS, then modify the expressions using uv
184  if( (aux_v_it == translate.at(0).end()) || !contains(S, aux_v_it->second)){
185  const ScafVertexPair uv(u, v);
186  const ScafVertexPair vu(v, u);
187 
188  expr += (double)(S_size * max_multiplicity) * vars.times_used.at(uv);
189  rev_expr += (double)(S_size * max_multiplicity) * vars.times_used.at(vu);
190  }// if
191  }// for
192  } else {
193  if(I.num_cycles > 0){
194  // if the layer of u is 0, then add u -> t_c
195  expr += (double)(S_size * max_multiplicity) * vars.to_cycle_sink.at(u);
196  rev_expr += (double)(S_size * max_multiplicity) * vars.to_cycle_sink.at(u);
197  }
198  }// if layer = 1 / else
199  }// for S
200  // add the expressions to the model
201  add(expr >= 0);
202  add(rev_expr >= 0);
203  num_new_cuts += 2;
204  }// while aux has edges
205  return num_new_cuts;
206  }
207 
208 
209  unsigned LazyCyclesBlockCallback::forbid_invalid_jumps()
210  {
211 #warning TODO: writeme
212  return 0;
213  }// function
214 
215 
216  void LazyCyclesBlockCallback::main()
217  {
218  stats.time_spent.resume();
219  DEBUG5(std::cout << "callback starts"<<std::endl);
220 
221  // note: forbidding cycles in w takes a bit longer, so don't run the cycle-interdiction on w unless y is acyclic
222  unsigned new_cuts = forbid_cycles_in_y();
223  if(new_cuts == 0) new_cuts = forbid_invalid_jumps();
224 
225  // Step 3: collect statistical information
226  DEBUG2(std::cout << "======= callback called, added "<<new_cuts<<" new cuts ======"<<std::endl);
227  stats.times_called++;
228  stats.cuts_added += new_cuts;
229  stats.time_spent.pause();
230  } // main()
231 
232 
233 
234  // initialize the variables for the ILP formulation
235  void populate_variables(IloModel& model,
236  const IloEnv& env,
237  var_collection_multi& vars,
238  const ScafJumpMap& jumps,
239  const ilp_options& opts,
240  const IloNum max_mult = IloInfinity)
241  {
242  const Instance& I = vars.I;
243  const RawScaffoldGraph& g = I.get_graph();
244  //IloRangeArray c(env);
245 
246  DEBUG5(std::cout << "creating z_{u,v}/x_{u,v}/y_uv"<<std::endl);
247  for(ScafEdgeIterRange er = boost::edges(g); er.first != er.second; ++er.first){
248  const ScafEdge e = *er.first;
249  const ScafVertex& u = boost::source(e, g);
250  const ScafVertex& v = boost::target(e, g);
251  const bool is_matching = I[e].is_matching_edge();
252  // note: matching edges have to be taken at least once and have a restricted upper bound
253  const IloNum uv_upper = I[e].multiplicity;
254  const IloNum uv_lower = is_matching ? 1 : 0;
255  const std::string uname = I[u].name;
256  const std::string vname = I[v].name;
257 
258  if(opts.multi_take_all){
259  // Step 1: add binary variables z_e = 1 <=> e belogs to a solution path/cycle
260  const std::string zname("z_" + uname + "," + vname);
261  vars.used_at_all.emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, 0, 1, IloNumVar::Bool, zname.c_str()));
262  }
263  // Step 1a: add variables x_e = # of occurances of e
264  const std::string xname("x_" + uname + "," + vname);
265  //DEBUG5(std::cout << xname << ": [" << uv_lower << "," << uv_upper << "]"<<std::endl);
266  vars.times_used_undir.emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, uv_lower, uv_upper, IloNumVar::Int, xname.c_str()));
267  // Step 2: add variables y^k_ij
268  const std::string yname1("y_" + uname + "," + vname);
269  const std::string yname2("y_" + vname + "," + uname);
270  //DEBUG5(std::cout << yname1 << ": [0," << uv_upper << "]"<<std::endl);
271  //DEBUG5(std::cout << yname2 << ": [0," << uv_upper << "]"<<std::endl);
272  vars.times_used.emplace(std::piecewise_construct, std::make_tuple(u,v), std::make_tuple(env, 0, uv_upper, IloNumVar::Int, yname1.c_str()));
273  vars.times_used.emplace(std::piecewise_construct, std::make_tuple(v,u), std::make_tuple(env, 0, uv_upper, IloNumVar::Int, yname2.c_str()));
274  }
275 
276  DEBUG5(std::cout << "creating s/tp/tc interaction variables"<<std::endl);
277  // Step 3: add variables y_s,i and y_i,tp and y_i,tc
278  for(ScafVIterRange v = boost::vertices(g); v.first != v.second; ++v.first){
279  const ScafVertex& u = *v.first;
280  const std::string name(g[u].name);
281  // on layer 0, come up from s
282  //DEBUG5(std::cout << "y_s," << name << ": [0," << max_mult << "]"<<std::endl);
283  vars.from_source[u] = IloNumVar(env, 0, max_mult, IloNumVar::Int, ("y_s," + name).c_str());
284  // on layer 1, go up to tp
285  if(I.num_paths > 0){
286  //DEBUG5(std::cout << "y_" << name << ",tp" << ": [0," << max_mult << "]"<<std::endl);
287  vars.to_path_sink[u] = IloNumVar(env, 0, max_mult, IloNumVar::Int, ("y_" + name + ",tp").c_str());
288  }
289  // on layer 0, go up to tc
290  if(I.num_cycles > 0){
291  //DEBUG5(std::cout <<"y_"<< name <<",tc"<< ": [0," << max_mult << "]"<<std::endl);
292  vars.to_cycle_sink[u] = IloNumVar(env, 0, max_mult, IloNumVar::Int, ("y_" + name + ",tc").c_str());
293  }
294  }
295 
296  if(!opts.ignore_contig_jumps){
297  DEBUG5(std::cout << "creating contig-jump variables & expressions"<<std::endl);
298  // Step 3: construct a contig jump network for uv
299  for(auto e_it = I.get_non_matching_edges(); e_it.is_valid(); ++e_it){
300  const ScafVertex& u = boost::source(*e_it, g);
301  const ScafVertex& v = boost::target(*e_it, g);
302  const int uv_length = I[*e_it].length;
303  for(const ScafVertexPair uv: {ScafVertexPair(u,v), ScafVertexPair(v,u)}){
304  const auto jump_set_it = jumps.find(uv);
305  if(jump_set_it != jumps.cend())
306  construct_jumps_for_VPair(model, env, vars, uv, uv_length, jump_set_it->second);
307  }
308  }
309  }// if we consider contig jumps
310  }
311 
312  // initialize the constraints for the ILP formulation
313  void populate_constraints(IloModel& model, const IloEnv& env, const var_collection_multi& vars, const ilp_options& opts)
314  {
315  const Instance& I = vars.I;
316  const RawScaffoldGraph& g = I.get_graph();
317  const ScafMatching& matching = I.get_matching();
318  IloRangeArray c(env);
319 
320  if(opts.multi_take_all){
321  // constraint (8): x_e = 0 --> z_e = 0
322  DEBUG5(std::cout << "adding (8): x_e = 0 --> z_e = 0"<<std::endl);
323  for(auto x : vars.times_used_undir){
324  const ScafVertexPair uv = x.first;
325 
326  IloExpr expr(env);
327  expr += x.second;
328  expr -= vars.used_at_all.at(uv);
329  c.add(expr >= 0);
330  }
331  }
332 
333  // constraint (1): of all the representations of ij, exactly x_ij should be chosen!
334  DEBUG5(std::cout << "adding (1): of all the representations of e, exactly x_e should be chosen"<<std::endl);
335  for(auto x : vars.times_used_undir){
336  const ScafVertexPair uv = x.first;
337  const ScafVertexPair vu(uv.second, uv.first);
338 
339  IloExpr expr(env);
340  expr -= x.second;
341  expr += vars.times_used.at(uv);
342  expr += vars.times_used.at(vu);
343  // add all the contig jump representations of uv and vu
344  if(!opts.ignore_contig_jumps)
345  if(!I.is_matching_edge(uv)){
346  const auto uv_iter = vars.contig_jumps.find(uv);
347  if(uv_iter != vars.contig_jumps.cend()) expr += uv_iter->second;
348  const auto vu_iter = vars.contig_jumps.find(vu);
349  if(vu_iter != vars.contig_jumps.cend()) expr += vu_iter->second;
350  }
351  c.add(expr == 0);
352  }
353 
354  /* NOTE: this is now implemented as domain restrictions on the x-variables
355  // constraint (2): all matching-edges should be chosen!
356  DEBUG5(std::cout << "adding (2'): any matching-edge e should be chosen between 1 and m(e) times"<<std::endl);
357  for(auto x : vars.times_used_undir)
358  if(I.is_matching_edge(x.first)) {
359  const ScafEdge e = boost::edge(x.first.first, x.first.second, g).first;
360  c.add(x.second >= 1);
361  c.add(x.second <= (double)boost::get(boost::edge_multiplicity, g, e));
362  DEBUG3(std::cout<<"allowing "<<EdgeAndGraph<ScaffoldGraph>(e, g)<<" with multi "<<(double)boost::get(boost::edge_multiplicity, g, e)<<std::endl);
363  }
364  */
365 
366  // constraint (4),(6): there should be at most sigma_p paths & sigma_c cycles
367  DEBUG5(std::cout << "adding (4)&(6): there should be at most sigma_p paths & sigma_c cycles & sigma objects in total"<<std::endl);
368  {
369  IloExpr p_expr(env), c_expr(env), o_expr(env);
370  for(auto y : vars.to_path_sink) p_expr += y.second;
371  c.add(p_expr <= (double)I.num_paths);
372 
373  for(auto y : vars.to_cycle_sink) c_expr += y.second;
374  c.add(c_expr <= (double)I.num_cycles);
375 
376  for(auto y : vars.from_source) o_expr += y.second;
377  c.add(o_expr <= (double)I.num_objects);
378  }
379 
380  // constraint (3): everything that comes into layer l should leave layer l
381  DEBUG5(std::cout << "adding (3): flow conservation"<<std::endl);
382  for(unsigned layer = 0; layer < 2; ++layer){
383  DEBUG5(std::cout << "==== layer "<<layer<<" ===="<<std::endl);
384  for(ScafVIterRange range = boost::vertices(g); range.first != range.second; ++range.first){
385  const ScafVertex& u = *range.first;
386  //DEBUG5(std::cout << "vertex "<<I[u].name<<": ");
387  IloExpr expr(env);
388  for(ScafAdjIterRange nh = adjacent_vertices(u, g); nh.first != nh.second; ++nh.first){
389  const ScafVertex& v = *nh.first;
390  const ScafVertexPair uv(u, v);
391  const ScafVertexPair vu(v, u);
392  // remember: layer 0 -> 1 contains only matching edges
393  if((matching.at(u) == v) == (layer == 0)){
394  // flow goes to next layer along uv
395  expr += vars.times_used.at(uv);
396  //DEBUG5(std::cout << " + y_" << I[u].name <<"->"<<I[v].name);
397  } else {
398  // flow comes from the other layer alog vu
399  expr -= vars.times_used.at(vu);
400  //DEBUG5(std::cout << " - y_"<< I[v].name <<"->"<<I[u].name);
401  }// if
402  }// for
403  if(layer == 0){
404  // on layer 0, p-flow can come up from s
405  //DEBUG5(std::cout << " - y_s,"<< I[u].name);
406  expr -= vars.from_source.at(u);
407  if(I.num_cycles > 0){
408  // on layer 0, p-flow can go up to tc
409  //DEBUG5(std::cout << " + y_"<< I[u].name<<",tc");
410  expr += vars.to_cycle_sink.at(u);
411  }
412  } else {
413  if(I.num_paths > 0){
414  // on layer 1, p-flow can go up to tp
415  //DEBUG5(std::cout << " + y_"<< I[u].name<<",tp");
416  expr += vars.to_path_sink.at(u);
417  }
418  }// if
419  c.add(expr == 0);
420  //DEBUG5(std::cout << " = 0" << std::endl);
421  }// for
422  }// for
423 
424  if(I.num_cycles > 0){
425  // constraint (7): everything that goes to tc has to come in from s
426  DEBUG5(std::cout << "adding (7): cycle conservation"<<std::endl);
427  for(ScafVIterRange u = boost::vertices(g); u.first != u.second; ++u.first){
428  IloExpr expr(env);
429  expr += vars.from_source.at(*u.first);
430  expr -= vars.to_cycle_sink.at(*u.first);
431  c.add(expr >= 0);
432  }
433  }
434 
435  // constraint (5): each subset S of vertices should contain at most |S|-1 endpoints of arcs
436  //DEBUG5(std::cout << "adding (5): each subset S of vertices should contain at most |S|-1 endpoints of arcs"<<std::endl);
437  // done via callback!
438  model.add(c);
439  }
440 
441  void create_optimization_target(IloModel& model, const IloEnv& env, var_collection_multi& vars, const ilp_options& opts)
442  {
443  const Instance& I = vars.I;
444  const RawScaffoldGraph& g = I.get_graph();
445 
446  IloExpr opt_exp(env);
447  for(ScafEdgeIterRange e = boost::edges(g); e.first != e.second; ++e.first){
448  const ScafEdgeProperty& uv_info = g[*e.first];
449  const ScafVertex& u = boost::source(*e.first, g);
450  const ScafVertex& v = boost::target(*e.first, g);
451  if(!uv_info.is_matching_edge()){
452  // get the appropriate x_ij variable
453  // note, if the solution weight does not scale with the multiplicity, we have boolean variables z indicating whether x > 0
454  const EdgeVarMap& x = opts.multi_take_all ? vars.used_at_all : vars.times_used_undir;
455 
456  auto uv = x.find(ScafVertexPair(u,v));
457  if(uv == x.cend()) uv = x.find(ScafVertexPair(v,u));
458  assert(uv != x.cend());
459  // add the term weight(e)*x_e
460  opt_exp += (double)(I[*e.first].weight) * uv->second;
461  }
462  }
463  model.add(IloMaximize(env, opt_exp));
464 
465  }
466 
467  // compute the ILP formulation for a graph g
468  void populate_model(IloModel& model, const IloEnv& env, var_collection_multi& vars, const ScafJumpMap& jumps, const ilp_options& opts)
469  {
470  // construct variables, keeping note of the x_ij, y^l_ij and z^l_ij variables
471  DEBUG3(std::cout << "creating variables"<<std::endl);
472  populate_variables(model, env, vars, jumps, opts);
473  // construct constraints
474  DEBUG3(std::cout << "creating constraints"<<std::endl);
475  populate_constraints(model, env, vars, opts);
476  // add optimization function
477  DEBUG3(std::cout << "creating optimization expression"<<std::endl);
478  create_optimization_target(model, env, vars, opts);
479  }
480 
481 
482 }}} // namespace
483 
484 #endif
485 
Definition: ilp_common.hpp:85
Definition: read_adj_list.hpp:22
void pause()
pause the timer (= stop())
Definition: profiling.hpp:41
an instance is a ScaffoldGraph with given path- & cycle- numbers and a solution to keep track of dele...
Definition: instance.hpp:27
void resume()
resume the timer by setting an artificial start_time
Definition: profiling.hpp:46
Definition: ilp_model_multi.hpp:21
EdgeName get_edge_name(const VertexPair< Graph > &uv) const
get a copy of the name of the edge uv; independent of its presence
Definition: scaffold_graph.hpp:421
bool contains(const Set &s, const Element &el)
a more readable containment check
Definition: utils.hpp:171
Definition: ilp_common.hpp:40
const Matching< Graph > & get_matching() const
get a const reference to the perfect matching in _the_graph
Definition: scaffold_graph.hpp:232