7 #ifndef STRING_UTILS_HPP
8 #define STRING_UTILS_HPP
14 #define WHITESPACES " \t"
19 const size_t pos = s.find_first_of(skip_to);
20 if(pos == std::string::npos) s.clear();
else s.erase(0,pos);
24 inline void skip_all(std::string& s,
const std::string& to_skip)
26 const size_t pos = s.find_first_not_of(to_skip);
27 if(pos == std::string::npos) s.clear();
else s.erase(0,pos);
33 long result = std::stoi(s.c_str());
41 std::string
trim(
const std::string& str,
const std::string& to_remove = WHITESPACES)
43 const size_t first = str.find_first_not_of(to_remove);
44 if(first == std::string::npos)
return "";
45 const size_t last = str.find_last_not_of(to_remove);
46 return str.substr(first, last - first + 1);
57 unsigned s1len = s1.size();
58 unsigned s2len = s2.size();
59 unsigned column_start = 1;
60 unsigned* column =
new unsigned[s1len + 1];
61 std::iota(column + column_start, column + s1len + 1, column_start);
63 for(
unsigned x = column_start; x <= s2len; x++) {
65 unsigned last_diagonal = x - column_start;
66 for(
unsigned y = column_start; y <= s1len; y++) {
67 unsigned old_diagonal = column[y];
68 auto possibilities = {
71 last_diagonal + (s1[y - 1] == s2[x - 1]? 0 : 1)
73 column[y] = std::min(possibilities);
74 last_diagonal = old_diagonal;
77 unsigned result = column[s1len];
85 unsigned len = std::min(s1.length(), s2.length()) + 1;
87 while(len--) result += (s1[len] != s2[len]);
96 while(length--) result += (s1[length] != s2[length]);
105 const std::string& seq2,
106 const unsigned overlap)
108 assert(overlap <= seq1.length());
109 const unsigned seq1_len = seq1.length() - overlap;
110 const unsigned seq2_len = seq2.length();
111 const unsigned new_len = seq1_len + seq2_len;
112 char*
const new_seq = (
char*)malloc(new_len + 1);
113 char*
const seq1_start = new_seq;
114 char*
const seq2_start = new_seq + seq1_len;
115 std::cout <<
"got "<<new_len+1<<
" bytes @"<<(long)new_seq<<std::endl;
117 std::cout <<
"writing "<<(long)(seq2_start - seq1_start)<<
" chars of "<<seq1<<
" to "<<(long)seq1_start<<std::endl;
118 memcpy(seq1_start, seq1.c_str(), seq1_len);
120 std::cout <<
"writing "<<seq2_len<<
" chars of "<<seq2<<
" to "<<(long)seq2_start<<std::endl;
121 memcpy(seq2_start, seq2.c_str(), seq2_len);
122 new_seq[new_len+1] = 0;
124 std::string result(new_seq);
125 std::cout <<
"result string is "<< result <<
" with c_str @"<<(long)(result.c_str())<<
", freeing pointer @"<<(
long)new_seq<<std::endl;
127 std::cout <<
"done!"<<std::endl;
135 const std::string& seq2,
136 const unsigned overlap)
139 std::string result = seq1.substr(0, seq1.length() - overlap) + seq2;
std::string merge_strings_segfault(const std::string &seq1, const std::string &seq2, const unsigned overlap)
merge two sequences seq1 and seq2 with overlap "overlap" according to their order ...
Definition: string_utils.hpp:104
std::string merge_strings(const std::string &seq1, const std::string &seq2, const unsigned overlap)
merge two sequences seq1 and seq2 with overlap "overlap" according to their order ...
Definition: string_utils.hpp:134
unsigned levenshtein_distance(const std::string &s1, const std::string &s2)
return the levenstein distance between two strings
Definition: string_utils.hpp:53
void skip_all(std::string &s, const std::string &to_skip)
remove all characters in 'to_skip' from the beginning of s
Definition: string_utils.hpp:24
long read_single_number(std::string &s)
consume an integer from the beginning of s and return it
Definition: string_utils.hpp:31
void skip_to(std::string &s, const std::string &skip_to)
remove all characters not in 'skip_to' from the beginning of s
Definition: string_utils.hpp:17
std::string trim(const std::string &str, const std::string &to_remove=WHITESPACES)
remove leading & trailing chars (whitespaces by default) from str
Definition: string_utils.hpp:41
unsigned hamming_distance(const std::string &s1, const std::string &s2)
returns the Hamming distance between the maximal prefixes of equal length
Definition: string_utils.hpp:83