Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
jump_preprocess.hpp
1 
2 #ifndef JUMP_PREPROCESS_HPP
3 #define JUMP_PREPROCESS_HPP
4 
5 #include "utils/graph_typedefs.hpp"
6 #include "utils/scaffolding_typedefs.hpp"
7 
8 namespace scaffold{
9 
10 
11  // the jump preprocessing allows "jumps"
12  // that is, placing small contigs in the gap between two contigs.
13  // To achieve this, we might have to add some edges, for example
14  // if u-------v
15  // |
16  // \--x==y
17  // we have to add the edge yv to allow xy to be placed in uv.
18  // If we do this, however, we need to make sure that, whenever yv
19  // is in a solution, so is ux! So, return this dependency as a map
20  //
21  // also, map all possible jumps uv over xy to the arc xy
22 
23 
24  // given a small contig xy, add edges to support it being placed in appropriate non-contigs
25  // keep interesting_contigs up to date by adding contigs that might have become interesting again by adding new non-contigs
26  void support_jump_directional(const ScafVertexPair& xy,
27  const int xy_length,
28  const int contig_multi,
29  ScaffoldGraph& sg,
30  std::queue<ScafEdge>& interesting_contigs,
31  ScafJumpMap& jump_map,
32  ScafVPairDependencyMap& dependencies)
33  {
34  const RawScaffoldGraph& g = sg.get_graph();
35  const ScafVertex& x = xy.first;
36  const ScafVertex& y = xy.second;
37  const ScafVertexPair yx(y, x);
38  assert(contig_multi > 0);
39 
40  // prepare a predicate for an iterator that ignores matching edges and edges to "x" or "y"
41  TargetContainedPredicate<RawScaffoldGraph> nor_x_nor_y_nor_match({x, y}, g, true, false);
42 
43  DEBUG5(std::cout << "checking small 3-paths involving "<< sg[x].name << "->"<< sg[y].name <<" (len = "<<xy_length<<")"<<std::endl);
44  // step 2: for each edge uv of a neighbor u of x such that
45  // xy fits in uv (and v != y) add the edge v--y
46  for(auto x_incident = sg.get_incident(x, nor_x_nor_y_nor_match); x_incident.is_valid(); ++x_incident){
47  const ScafEdge& xu_e = *x_incident;
48  const ScafVertex& u = boost::target(xu_e, g);
49  const ScafVertexPair ux(u, x);
50  const ScafVertexPair xu(x, u);
51  const int xu_length = sg[xu_e].length;
52  if(xu_length != NO_LENGTH){
53  for(auto u_incident = sg.get_incident(u, nor_x_nor_y_nor_match); u_incident.is_valid(); ++u_incident){
54  const ScafEdge& uv = *u_incident;
55  const ScafVertex& v = boost::target(uv, g);
56  const int uv_length = sg[uv].length;
57  if(uv_length != NO_LENGTH){
58  int vy_length = uv_length - xy_length - xu_length;
59  DEBUG5(std::cout << "next 3-path: "<< sg[x].name <<"->"<<sg[u].name << "->"<< sg[v].name <<" -- len: "<<xu_length+uv_length<<std::endl);
60  // from here, we have v---u---x===y
61  // now, add the edge v---y & register its dependence on x---u
62  std::pair<ScafEdge, bool> vy_in_sg(uv, false); // store the edge vy and whether it exists in sg
63  if(vy_length >= 0){
64  vy_in_sg = sg.add_edge(v, y, ScafEdgeProperty(0, vy_length, contig_multi));
65  if(vy_in_sg.second){
66  // vg has been added to g as a new edge
67  DEBUG5(std::cout << "inserted "<<sg[v].name<<"->"<<sg[y].name<<" of length "<<vy_length<<" = "<<uv_length<<" (len of "<<sg[u].name<<"->"<<sg[v].name<<") - "<<xy_length<<" (len of "<<sg[x].name<<"->"<<sg[y].name<<") - "<<xu_length<<" (len of "<<sg[x].name<<"->"<<sg[u].name<<")"<<std::endl);
68  // step A: register dependencies of the new edge on the old edge
69  dependencies[ScafVertexPair(y,v)].emplace_back(ux);
70  dependencies[ScafVertexPair(v,y)].emplace_back(xu);
71  // step B: register v's & y's incident contig as newly interesting
72  interesting_contigs.push(sg.incident_matching_edge(v));
73  interesting_contigs.push(sg.incident_matching_edge(y));
74  } else vy_in_sg.second = true; // indicate that vg already exists
75  } else vy_in_sg = sg.find_edge(v, y);
76  // in any case, if xy is shorter than x-u-v-y, then register the jump over xy
77  if(vy_in_sg.second){
78  vy_length = sg[vy_in_sg.first].length;
79  if((vy_length != NO_LENGTH) && xu_length + uv_length + vy_length >= xy_length){
80  jump_map.DEEP_EMPLACE(ScafVertexPair(u,v),).first->second.insert(xy);
81  jump_map.DEEP_EMPLACE(ScafVertexPair(v,u),).first->second.insert(yx);
82  }// if vy has a length and x-u-v-y is larger than xy
83  }// if vy exists in sg
84  }// if vw has a length
85  }// for all v-w
86  }// if xu has a length
87  }// for all x-v
88  }
89 
90  void support_jump(const ScafEdge& small_contig,
91  ScaffoldGraph& sg,
92  std::queue<ScafEdge>& interesting_contigs,
93  ScafJumpMap& jump_map,
94  ScafVPairDependencyMap& dependencies)
95  {
96  const RawScaffoldGraph& g = sg.get_graph();
97  const int contig_length = sg[small_contig].length;
98  const unsigned contig_multi = sg[small_contig].multiplicity;
99  // step 1: get left & right neighbors
100  const ScafVertex left = boost::source(small_contig, g);
101  const ScafVertex right = boost::target(small_contig, g);
102  const ScafVertexPair lr(left,right);
103  const ScafVertexPair rl(right,left);
104 
105  support_jump_directional(lr, contig_length, contig_multi, sg, interesting_contigs, jump_map, dependencies);
106  support_jump_directional(rl, contig_length, contig_multi, sg, interesting_contigs, jump_map, dependencies);
107  }
108 
109  void preprocess_jumps(ScaffoldGraph& sg, ScafJumpMap& jump_map, ScafVPairDependencyMap& dependencies)
110  {
111  const RawScaffoldGraph& g = sg.get_graph();
112 
113  std::queue<ScafEdge> interesting_contigs;
114  for(auto e_it = sg.get_matching_edges(); e_it.is_valid(); ++e_it)
115  interesting_contigs.push(*e_it);
116 
117  // extend the graph to support each small contig being placed in appropriate non-contigs
118  DEBUG5(std::cout << "edges of g: "<<std::endl);
119  DEBUG5(for(auto r = boost::edges(g); r.first != r.second; ++r.first) std::cout << sg.get_edge_name(*r.first)<<"\t["<<sg[*r.first].length<<"]" <<std::endl);
120  while(!interesting_contigs.empty()){
121  support_jump(interesting_contigs.front(), sg, interesting_contigs, jump_map, dependencies);
122  interesting_contigs.pop();
123  }
124  }// function
125 
126 }// namespace
127 
128 
129 #endif
130 
Definition: read_adj_list.hpp:22