Antkeeper  0.0.1
sequence.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2023 Christopher J. Howard
3  *
4  * This file is part of Antkeeper source code.
5  *
6  * Antkeeper source code is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Antkeeper source code is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP
21 #define ANTKEEPER_GENETICS_SEQUENCE_HPP
22 
23 #include <engine/genetics/base.hpp>
25 #include <algorithm>
26 #include <iterator>
27 #include <random>
28 
29 namespace genetics {
30 
32 namespace sequence {
33 
39 template <class Iterator>
40 struct orf
41 {
43  Iterator start;
44 
46  Iterator stop;
47 };
48 
57 template <class ForwardIt1, class ForwardIt2, class URBG>
58 ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
59 
68 template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
69 void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
70 
78 template <class ForwardIt>
79 orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const codon::table& table);
80 
89 template <class ForwardIt, class UnaryOperation, class URBG>
90 ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
91 
100 template <class ForwardIt, class Size, class UnaryOperation, class URBG>
101 void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
102 
111 template <class ForwardIt1, class ForwardIt2>
112 ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride);
113 
121 template <class InputIt, class OutputIt>
122 OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
123 
132 template <class InputIt, class OutputIt>
133 OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const codon::table& table);
134 
136 namespace dna
137 {
145  template <class InputIt, class OutputIt>
146  OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
147 }
148 
150 namespace rna
151 {
159  template <class InputIt, class OutputIt>
160  OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
161 }
162 
163 template <class ForwardIt1, class ForwardIt2, class URBG>
164 ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
165 {
166  typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
167  std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
168  difference_t pos = distribution(g);
169  std::advance(first1, pos);
170  std::advance(first2, pos);
171  std::swap_ranges(first1, last1, first2);
172  return first2;
173 }
174 
175 template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
176 void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
177 {
178  typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
179 
180  std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
181  ForwardIt1 crossover1, crossover2;
182 
183  while (count)
184  {
185  crossover1 = first1;
186  crossover2 = first2;
187 
188  difference_t pos = distribution(g);
189  std::advance(crossover1, pos);
190  std::advance(crossover2, pos);
191  std::swap_ranges(crossover1, last1, crossover2);
192 
193  --count;
194  }
195 }
196 
197 template <class ForwardIt>
198 orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const codon::table& table)
199 {
200  ForwardIt second;
201  ForwardIt third;
202  orf<ForwardIt> result;
203 
204  auto distance = std::distance(first, last);
205 
206  if (distance >= 3)
207  {
208  second = first;
209  ++second;
210  third = second;
211  ++third;
212 
213  do
214  {
215  if (codon::is_start(*first, *second, *third, table.starts))
216  {
217  result.start = first;
218  distance -= 3;
219  break;
220  }
221 
222  first = second;
223  second = third;
224  ++third;
225  --distance;
226  }
227  while (third != last);
228  }
229 
230  for (; distance >= 3; distance -= 3)
231  {
232  first = ++third;
233  second = ++third;
234  ++third;
235 
236  if (codon::is_stop(*first, *second, *third, table.aas))
237  {
238  result.stop = first;
239  return result;
240  }
241  }
242 
243  return {last, last};
244 }
245 
246 template <class ForwardIt, class UnaryOperation, class URBG>
247 ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
248 {
249  typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
250 
251  if (first == last)
252  return first;
253 
254  std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
255  std::advance(first, distribution(g));
256  *first = unary_op(*first);
257 
258  return first;
259 }
260 
261 template <class ForwardIt, class Size, class UnaryOperation, class URBG>
262 void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
263 {
264  typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
265 
266  if (first == last)
267  return first;
268 
269  std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
270  ForwardIt mutation;
271 
272  while (count)
273  {
274  mutation = first;
275  std::advance(mutation, distribution(g));
276  *mutation = unary_op(*mutation);
277  --count;
278  }
279 }
280 
281 template <class ForwardIt1, class ForwardIt2>
282 ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride)
283 {
284  for (auto distance = std::distance(first, last); distance > 0; distance -= stride)
285  {
286  ForwardIt1 it = first;
287  for (ForwardIt2 s_it = s_first; ; ++it, ++s_it)
288  {
289  if (s_it == s_last)
290  return first;
291 
292  if (it == last)
293  return last;
294 
295  if (!base::compare(*it, *s_it))
296  break;
297  }
298 
299  if (distance > stride)
300  std::advance(first, stride);
301  }
302 
303  return last;
304 }
305 
306 template <class InputIt, class OutputIt>
307 inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
308 {
309  return std::transform(first, last, d_first, base::transcribe);
310 }
311 
312 template <class InputIt, class OutputIt>
313 OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const codon::table& table)
314 {
315  auto length = std::distance(first, last);
316 
317  if (length >= 3)
318  {
319  InputIt second = first;
320  ++second;
321  InputIt third = second;
322  ++third;
323 
324  *(d_first++) = codon::translate(*first, *second, *third, table.starts);
325 
326  for (length -= 3; length >= 3; length -= 3)
327  {
328  first = ++third;
329  second = ++third;
330  ++third;
331 
332  *(d_first++) = codon::translate(*first, *second, *third, table.aas);
333  }
334  }
335 
336  return d_first;
337 }
338 
339 namespace dna
340 {
341  template <class InputIt, class OutputIt>
342  inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
343  {
344  return std::transform(first, last, d_first, base::dna::complement);
345  }
346 }
347 
348 namespace rna
349 {
350  template <class InputIt, class OutputIt>
351  inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
352  {
353  return std::transform(first, last, d_first, base::rna::complement);
354  }
355 }
356 
357 } // namespace sequence
358 } // namespace genetics
359 
360 #endif // ANTKEEPER_GENETICS_SEQUENCE_HPP
constexpr int count(T x) noexcept
Returns the number of set bits in a value, known as a population count or Hamming weight.
Definition: bit-math.hpp:211
char complement(char symbol)
Returns the DNA complement of an IUPAC degenerate base symbol.
Definition: base.cpp:82
char complement(char symbol)
Returns the RNA complement of an IUPAC degenerate base symbol.
Definition: base.cpp:91
int compare(char a, char b)
Returns the number of bases that are represented by both IUPAC degenerate base symbols.
Definition: base.cpp:65
char transcribe(char symbol)
Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping T for U or U for T.
Definition: base.cpp:75
char translate(char base1, char base2, char base3, const char *aas)
Translates a codon into an amino acid.
Definition: codon.cpp:65
bool is_stop(char base1, char base2, char base3, const char *aas)
Returns true if a codon is a stop codon.
Definition: codon.cpp:79
bool is_start(char base1, char base2, char base3, const char *starts)
Returns true if a codon is a start codon.
Definition: codon.cpp:73
OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols.
Definition: sequence.hpp:342
OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols.
Definition: sequence.hpp:351
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG &&g)
Exchanges elements between two ranges, starting at a random offset.
Definition: sequence.hpp:164
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping T for U or U for T.
Definition: sequence.hpp:307
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const codon::table &table)
Translates a sequence of codons into amino acids.
Definition: sequence.hpp:313
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG &&g)
Applies the given function to a random selection of elements in a range.
Definition: sequence.hpp:262
orf< ForwardIt > find_orf(ForwardIt first, ForwardIt last, const codon::table &table)
Searches a sequence for an open reading frame (ORF).
Definition: sequence.hpp:198
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG &&g)
Applies the given function to a randomly selected element in a range.
Definition: sequence.hpp:247
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG &&g)
Exchanges elements between two ranges multiple times, starting at a random offset each time.
Definition: sequence.hpp:176
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits< ForwardIt1 >::difference_type stride)
Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate ...
Definition: sequence.hpp:282
Genetic algorithms.
Definition: amino-acid.hpp:25
T length(const quaternion< T > &q)
Calculates the length of a quaternion.
Definition: quaternion.hpp:602
T distance(const vector< T, N > &p0, const vector< T, N > &p1)
Calculates the distance between two points.
Definition: vector.hpp:1106
Table for translating codons to amino acids.
Definition: codon.hpp:34
const char * aas
String of 64 IUPAC amino acid base symbols, in TCAG order.
Definition: codon.hpp:36
const char * starts
String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than - and * indicate ...
Definition: codon.hpp:39
Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisibl...
Definition: sequence.hpp:41
Iterator start
Iterator to the first base of the start codon.
Definition: sequence.hpp:43
Iterator stop
Iterator to the first base of the stop codon.
Definition: sequence.hpp:46