Scaffolding  0.1
This program can assemble genome scaffolds using the pairing information in paired-end reads.
scaffolding_typedefs.hpp
1 
2 
3 #ifndef SCAFFOLDING_TYPEDEFS_HPP
4 #define SCAFFOLDING_TYPEDEFS_HPP
5 
6 #include <iostream>
7 #include <list>
8 #include <initializer_list>
9 #include <boost/graph/properties.hpp>
10 
11 #include "utils/string_utils.hpp"
12 #include "utils/graph_typedefs.hpp"
13 #include "utils/graph_infos.hpp"
14 #include "utils/adv_edge_iters.hpp"
15 #include "utils/scoring.hpp"
16 
17 #define NO_WEIGHT UINT_MAX
18 #define NO_LENGTH INT_MIN
19 #define CYCLIC_SEQUENCE_INDICATOR "(c)"
20 #define REVERSE_SEQUENCE_INDICATOR "(rev)"
21 
22 #define BASES "ABCDEFGHIJKLMNOPQRSTUVWXYZ*-"
23 
24 
25 // define bases such that COMPLEMENTARY_BASES[i] is the complement COMPLEMENTARY_BASES[i+1]
26 #warning WARNING: by default, the complement of Adenine is Thymine, not Uracil! But both U & T will be complemented by A.
27 // to change the default complement, exchange U & T in the following
28 const char* COMPLEMENTARY_BASES = "ATAUCGRYMK";
29 
30 namespace scaffold {
31 
32  // forward declaration of the reverse complementer
33  void reverse_complement_inplace(std::string&);
34 
35  // an oriented sequence is a sequence that starts at the vertex with the designated name
37  std::string sequence;
38  VertexName start_vertex;
39 
40  OrientedSequence() {}
42  sequence(os.sequence), start_vertex(os.start_vertex) {}
43  OrientedSequence(const std::string& _sequence, const std::string& _start = VertexName()):
44  sequence(_sequence), start_vertex(_start) {}
45 
46 
47  void reverse_complement(const VertexName& new_start_vertex = VertexName())
48  {
49  start_vertex = new_start_vertex;
50  reverse_complement_inplace(sequence);
51  }
52  };
53 
54  // an oriented sequence with a use counter
56  unsigned counter = 1;
57 
58  using OrientedSequence::OrientedSequence;
60  OrientedSequence(os) {}
61 
62  };
63 
64  // a SequenceMap assigns each contig name a sequence of base pairs
65  typedef boost::unordered_map<std::string, OrientedSequence> SequenceMap;
66  typedef std::list<std::pair<std::string, std::string> > NamedSequenceList;
67 
69  unsigned index;
70  std::string name;
71 
72  // copy the vertex index as its new name
73  void index_to_name()
74  {
75  name = std::to_string(index);
76  }
77 
78  friend std::ostream& operator<<(std::ostream& os, const ScafVertexProperty& info)
79  {
80  if(info.name.empty())
81  return os << info.index;
82  else
83  return os << info.name;
84  }
85  };
87  unsigned weight = NO_WEIGHT;
88  int length = NO_LENGTH;
89  unsigned multiplicity = 1;
90  std::string contig_name;
91 
92  ScafEdgeProperty() {}
93  ScafEdgeProperty(const unsigned _weight, const int _length, const unsigned _multi, const std::string& _contig_name = ""):
94  weight(_weight), length(_length), multiplicity(_multi), contig_name(_contig_name) {}
96  weight(ep.weight), length(ep.length), multiplicity(ep.multiplicity), contig_name(ep.contig_name) {}
97 
98  // an edge is considered a matching edge if it does not have a weight assigned
99  bool is_matching_edge() const
100  {
101  return (weight == NO_WEIGHT);
102  }
103 
104  friend std::ostream& operator<<(std::ostream& os, const ScafEdgeProperty& info)
105  {
106  return os << "w: "<< info.weight << " l: " << info.length << " m: " << info.multiplicity << " c: '" << info.contig_name<<"'";
107  }
108 
109  };
111  unsigned insert_size;
112  unsigned standard_deviation;
113  };
114 
115 
116  typedef boost::adjacency_list<
117  boost::hash_setS, // OutEdgeList
118  boost::hash_setS, // VertexList
119  boost::undirectedS, // (bi)directed ?
123  > RawScaffoldGraph;
124 
125  typedef Vertex<RawScaffoldGraph> ScafVertex;
126  typedef VertexSet<RawScaffoldGraph> ScafVertexSet;
127  typedef VertexPair<RawScaffoldGraph> ScafVertexPair;
128  typedef VertexPairList<RawScaffoldGraph> ScafVertexPairList;
129  typedef VertexPairSet<RawScaffoldGraph> ScafVertexPairSet;
130  typedef VPairDependencyMap<RawScaffoldGraph> ScafVPairDependencyMap;
131  typedef VertexIter<RawScaffoldGraph> ScafVIter;
132  typedef VertexIterRange<RawScaffoldGraph> ScafVIterRange;
133  typedef AdjIter<RawScaffoldGraph> ScafAdjIter;
134  typedef AdjIterRange<RawScaffoldGraph> ScafAdjIterRange;
135 
136  typedef typename boost::property_map<RawScaffoldGraph, unsigned ScafVertexProperty::*>::type ScafVIndexMap;
137  typedef typename boost::property_map<RawScaffoldGraph, unsigned ScafVertexProperty::*>::const_type cScafVIndexMap;
138 
139  typedef Edge<RawScaffoldGraph> ScafEdge;
140  typedef EdgeSet<RawScaffoldGraph> ScafEdgeSet;
141  typedef EdgeList<RawScaffoldGraph> ScafEdgeList;
142  typedef EdgeIter<RawScaffoldGraph> ScafEdgeIter;
143  typedef EdgeIterRange<RawScaffoldGraph> ScafEdgeIterRange;
144  typedef OEdgeIter<RawScaffoldGraph> ScafOEdgeIter;
145  typedef OEdgeIterRange<RawScaffoldGraph> ScafOEdgeIterRange;
146  typedef IEdgeIter<RawScaffoldGraph> ScafIEdgeIter;
147  typedef IEdgeIterRange<RawScaffoldGraph> ScafIEdgeIterRange;
148 
149  typedef Matching<RawScaffoldGraph> ScafMatching;
150  typedef std::list<ScafEdge> AlternatingPath;
151  typedef boost::unordered_map<ScafVertexPair, ScafVertexPairSet> ScafJumpMap;
152 
153  // a contig jump is a subgraph consisting of a short (shorter than insert size) contig and a 3-path of non-contig edges,
154  // together forming a 4-cycle; this occurs if reads (corresponding to the middle edge of the 3-path) "span over" the short contig
155  struct contig_jump{
156  const ScafEdge contig;
157  std::list<ScafEdge> path;
158 
159  contig_jump(const ScafEdge& _contig, const std::initializer_list<ScafEdge>& L):
160  contig(_contig),
161  path(L)
162  {}
163 
164  };
165 
166 
167 } // namespace
168 
169 #endif
Definition: scaffolding_typedefs.hpp:55
Definition: read_adj_list.hpp:22
Definition: scaffolding_typedefs.hpp:36
Definition: scaffolding_typedefs.hpp:110
Definition: scaffolding_typedefs.hpp:68
Definition: scaffolding_typedefs.hpp:155
Definition: scaffolding_typedefs.hpp:86