Filename | /Users/ap13/perl5/lib/perl5/Bio/SeqUtils.pm |
Statements | Executed 105 statements in 5.48ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 896µs | 1.12ms | BEGIN@585 | Bio::SeqUtils::
2 | 2 | 1 | 115µs | 132µs | valid_aa | Bio::SeqUtils::
1 | 1 | 1 | 18µs | 37µs | BEGIN@189 | Bio::SeqUtils::
1 | 1 | 1 | 14µs | 63µs | BEGIN@191 | Bio::SeqUtils::
27 | 1 | 1 | 12µs | 12µs | CORE:match (opcode) | Bio::SeqUtils::
1 | 1 | 1 | 11µs | 19µs | BEGIN@190 | Bio::SeqUtils::
1 | 1 | 1 | 10µs | 45µs | BEGIN@192 | Bio::SeqUtils::
1 | 1 | 1 | 6µs | 6µs | CORE:sort (opcode) | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _coord_adjust | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _coord_adjust_deletion | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _coord_adjust_insertion | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _coord_revcom | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _feature_revcom | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _get_similarity | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _location_objects_from_coordinate_list | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _new_seq_from_old | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _new_seq_via_clone | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | _single_loc_object_from_collection | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | cat | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | delete | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | evolve | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | insert | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | ligate | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | mutate | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | revcom_with_features | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | seq3 | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | seq3in | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | translate_3frames | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | translate_6frames | Bio::SeqUtils::
0 | 0 | 0 | 0s | 0s | trunc_with_features | Bio::SeqUtils::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # BioPerl module for Bio::SeqUtils | ||||
2 | # | ||||
3 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
4 | # | ||||
5 | # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org> | ||||
6 | # | ||||
7 | # Copyright Heikki Lehvaslaiho | ||||
8 | # | ||||
9 | # You may distribute this module under the same terms as perl itself | ||||
10 | |||||
11 | # POD documentation - main docs before the code | ||||
12 | |||||
13 | =head1 NAME | ||||
14 | |||||
15 | Bio::SeqUtils - Additional methods for PrimarySeq objects | ||||
16 | |||||
17 | =head1 SYNOPSIS | ||||
18 | |||||
19 | use Bio::SeqUtils; | ||||
20 | # get a Bio::PrimarySeqI compliant object, $seq, somehow | ||||
21 | $util = Bio::SeqUtils->new(); | ||||
22 | $polypeptide_3char = $util->seq3($seq); | ||||
23 | # or | ||||
24 | $polypeptide_3char = Bio::SeqUtils->seq3($seq); | ||||
25 | |||||
26 | # set the sequence string (stored in one char code in the object) | ||||
27 | Bio::SeqUtils->seq3($seq, $polypeptide_3char); | ||||
28 | |||||
29 | # translate a sequence in all six frames | ||||
30 | @seqs = Bio::SeqUtils->translate_6frames($seq); | ||||
31 | |||||
32 | # inplace editing of the sequence | ||||
33 | Bio::SeqUtils->mutate($seq, | ||||
34 | Bio::LiveSeq::Mutation->new(-seq => 'c', | ||||
35 | -pos => 3 | ||||
36 | )); | ||||
37 | # mutate a sequence to desired similarity% | ||||
38 | $newseq = Bio::SeqUtils-> evolve | ||||
39 | ($seq, $similarity, $transition_transversion_rate); | ||||
40 | |||||
41 | # concatenate two or more sequences with annotations and features, | ||||
42 | # the first sequence will be modified | ||||
43 | Bio::SeqUtils->cat(@seqs); | ||||
44 | my $catseq=$seqs[0]; | ||||
45 | |||||
46 | # truncate a sequence, retaining features and adjusting their | ||||
47 | # coordinates if necessary | ||||
48 | my $truncseq = Bio::SeqUtils->trunc_with_features($seq, 100, 200); | ||||
49 | |||||
50 | # reverse complement a sequence and its features | ||||
51 | my $revcomseq = Bio::SeqUtils->revcom_with_features($seq); | ||||
52 | |||||
53 | # simulate cloning of a fragment into a vector. Cut the vector at | ||||
54 | # positions 1000 and 1100 (deleting postions 1001 to 1099) and | ||||
55 | # "ligate" a fragment into the sites. The fragment is | ||||
56 | # reverse-complemented in this example (option "flip"). | ||||
57 | # All features of the vector and fragment are preserved and | ||||
58 | # features that are affected by the deletion/insertion are | ||||
59 | # modified accordingly. | ||||
60 | # $vector and $fragment must be Bio::SeqI compliant objects | ||||
61 | my $new_molecule = Bio::Sequtils->ligate( | ||||
62 | -vector => $vector, | ||||
63 | -fragment => $fragment, | ||||
64 | -left => 1000, | ||||
65 | -right => 1100, | ||||
66 | -flip => 1 | ||||
67 | ); | ||||
68 | |||||
69 | # delete a segment of a sequence (from pos 1000 to 1100, inclusive), | ||||
70 | # again preserving features and annotations | ||||
71 | my $new_molecule = Bio::SeqUtils->cut( $seq, 1000, 1100 ); | ||||
72 | |||||
73 | # insert a fragment into a recipient between positions 1000 and | ||||
74 | # 1001. $recipient is a Bio::SeqI compliant object | ||||
75 | my $new_molecule = Bio::SeqUtils::PbrTools->insert( | ||||
76 | $recipient_seq, | ||||
77 | $fragment_seq, | ||||
78 | 1000 | ||||
79 | ); | ||||
80 | |||||
81 | =head1 DESCRIPTION | ||||
82 | |||||
83 | This class is a holder of methods that work on Bio::PrimarySeqI- | ||||
84 | compliant sequence objects, e.g. Bio::PrimarySeq and | ||||
85 | Bio::Seq. These methods are not part of the Bio::PrimarySeqI | ||||
86 | interface and should in general not be essential to the primary function | ||||
87 | of sequence objects. If you are thinking of adding essential | ||||
88 | functions, it might be better to create your own sequence class. | ||||
89 | See L<Bio::PrimarySeqI>, L<Bio::PrimarySeq>, and L<Bio::Seq> for more. | ||||
90 | |||||
91 | The methods take as their first argument a sequence object. It is | ||||
92 | possible to use methods without first creating a SeqUtils object, | ||||
93 | i.e. use it as an anonymous hash. | ||||
94 | |||||
95 | The first two methods, seq3() and seq3in(), give out or read in protein | ||||
96 | sequences coded in three letter IUPAC amino acid codes. | ||||
97 | |||||
98 | The next two methods, translate_3frames() and translate_6frames(), wrap | ||||
99 | around the standard translate method to give back an array of three | ||||
100 | forward or all six frame translations. | ||||
101 | |||||
102 | The mutate() method mutates the sequence string with a mutation | ||||
103 | description object. | ||||
104 | |||||
105 | The cat() method concatenates two or more sequences. The first sequence | ||||
106 | is modified by addition of the remaining sequences. All annotations and | ||||
107 | sequence features will be transferred. | ||||
108 | |||||
109 | The revcom_with_features() and trunc_with_features() methods are similar | ||||
110 | to the revcom() and trunc() methods from Bio::Seq, but also adjust any | ||||
111 | features associated with the sequence as appropriate. | ||||
112 | |||||
113 | There are also methods that simulate molecular cloning with rich | ||||
114 | sequence objects. | ||||
115 | The delete() method cuts a segment out of a sequence and re-joins the | ||||
116 | left and right fragments (like splicing or digesting and re-ligating a | ||||
117 | molecule). Positions (and types) of sequence features are adjusted | ||||
118 | accordingly: | ||||
119 | Features that span the deleted segment are converted to split featuress | ||||
120 | to indicate the disruption. (Sub)Features that extend into the deleted | ||||
121 | segment are truncated. | ||||
122 | A new molecule is created and returned. | ||||
123 | |||||
124 | The insert() method inserts a fragment (which can be a rich Bio::Seq | ||||
125 | object) into another sequence object adding all annotations and | ||||
126 | features to the final product. | ||||
127 | Features that span the insertion site are converted to split features | ||||
128 | to indicate the disruption. | ||||
129 | A new feature is added to indicate the inserted fragment itself. | ||||
130 | A new molecule is created and returned. | ||||
131 | |||||
132 | The ligate() method simulates digesting a recipient (vector) and | ||||
133 | ligating a fragment into it, which can also be flipped if needed. It | ||||
134 | is simply a combination of a deletion and an insertion step and | ||||
135 | returns a new molecule. The rules for modifying feature locations | ||||
136 | outlined above are also used here, e.g. features that span the cut | ||||
137 | sites are converted to split features with truncated sub-locations. | ||||
138 | |||||
139 | |||||
140 | =head1 FEEDBACK | ||||
141 | |||||
142 | =head2 Mailing Lists | ||||
143 | |||||
144 | User feedback is an integral part of the evolution of this and other | ||||
145 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
146 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
147 | |||||
148 | bioperl-l@bioperl.org - General discussion | ||||
149 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
150 | |||||
151 | =head2 Support | ||||
152 | |||||
153 | Please direct usage questions or support issues to the mailing list: | ||||
154 | |||||
155 | I<bioperl-l@bioperl.org> | ||||
156 | |||||
157 | rather than to the module maintainer directly. Many experienced and | ||||
158 | reponsive experts will be able look at the problem and quickly | ||||
159 | address it. Please include a thorough description of the problem | ||||
160 | with code and data examples if at all possible. | ||||
161 | |||||
162 | =head2 Reporting Bugs | ||||
163 | |||||
164 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
165 | the bugs and their resolution. Bug reports can be submitted via the | ||||
166 | web: | ||||
167 | |||||
168 | https://github.com/bioperl/bioperl-live/issues | ||||
169 | |||||
170 | =head1 AUTHOR - Heikki Lehvaslaiho | ||||
171 | |||||
172 | Email: heikki-at-bioperl-dot-org | ||||
173 | |||||
174 | =head1 CONTRIBUTORS | ||||
175 | |||||
176 | Roy R. Chaudhuri - roy.chaudhuri at gmail.com | ||||
177 | Frank Schwach - frank.schwach@sanger.ac.uk | ||||
178 | |||||
179 | =head1 APPENDIX | ||||
180 | |||||
181 | The rest of the documentation details each of the object | ||||
182 | methods. Internal methods are usually preceded with a _ | ||||
183 | |||||
184 | =cut | ||||
185 | |||||
186 | # Let the code begin... | ||||
187 | |||||
188 | package Bio::SeqUtils; | ||||
189 | 2 | 31µs | 2 | 55µs | # spent 37µs (18+19) within Bio::SeqUtils::BEGIN@189 which was called:
# once (18µs+19µs) by Bio::Tools::CodonTable::BEGIN@191 at line 189 # spent 37µs making 1 call to Bio::SeqUtils::BEGIN@189
# spent 19µs making 1 call to strict::import |
190 | 2 | 33µs | 2 | 27µs | # spent 19µs (11+8) within Bio::SeqUtils::BEGIN@190 which was called:
# once (11µs+8µs) by Bio::Tools::CodonTable::BEGIN@191 at line 190 # spent 19µs making 1 call to Bio::SeqUtils::BEGIN@190
# spent 8µs making 1 call to warnings::import |
191 | 2 | 31µs | 2 | 112µs | # spent 63µs (14+49) within Bio::SeqUtils::BEGIN@191 which was called:
# once (14µs+49µs) by Bio::Tools::CodonTable::BEGIN@191 at line 191 # spent 63µs making 1 call to Bio::SeqUtils::BEGIN@191
# spent 49µs making 1 call to Exporter::import |
192 | 2 | 1.25ms | 2 | 79µs | # spent 45µs (10+34) within Bio::SeqUtils::BEGIN@192 which was called:
# once (10µs+34µs) by Bio::Tools::CodonTable::BEGIN@191 at line 192 # spent 45µs making 1 call to Bio::SeqUtils::BEGIN@192
# spent 34µs making 1 call to parent::import |
193 | |||||
194 | # new inherited from RootI | ||||
195 | |||||
196 | 1 | 17µs | our %ONECODE = ( | ||
197 | 'Ala' => 'A', | ||||
198 | 'Asx' => 'B', | ||||
199 | 'Cys' => 'C', | ||||
200 | 'Asp' => 'D', | ||||
201 | 'Glu' => 'E', | ||||
202 | 'Phe' => 'F', | ||||
203 | 'Gly' => 'G', | ||||
204 | 'His' => 'H', | ||||
205 | 'Ile' => 'I', | ||||
206 | 'Lys' => 'K', | ||||
207 | 'Leu' => 'L', | ||||
208 | 'Met' => 'M', | ||||
209 | 'Asn' => 'N', | ||||
210 | 'Pro' => 'P', | ||||
211 | 'Gln' => 'Q', | ||||
212 | 'Arg' => 'R', | ||||
213 | 'Ser' => 'S', | ||||
214 | 'Thr' => 'T', | ||||
215 | 'Val' => 'V', | ||||
216 | 'Trp' => 'W', | ||||
217 | 'Xaa' => 'X', | ||||
218 | 'Tyr' => 'Y', | ||||
219 | 'Glx' => 'Z', | ||||
220 | 'Ter' => '*', | ||||
221 | 'Sec' => 'U', | ||||
222 | 'Pyl' => 'O', | ||||
223 | 'Xle' => 'J' | ||||
224 | ); | ||||
225 | |||||
226 | 1 | 10µs | our %THREECODE = ( | ||
227 | 'A' => 'Ala', | ||||
228 | 'B' => 'Asx', | ||||
229 | 'C' => 'Cys', | ||||
230 | 'D' => 'Asp', | ||||
231 | 'E' => 'Glu', | ||||
232 | 'F' => 'Phe', | ||||
233 | 'G' => 'Gly', | ||||
234 | 'H' => 'His', | ||||
235 | 'I' => 'Ile', | ||||
236 | 'K' => 'Lys', | ||||
237 | 'L' => 'Leu', | ||||
238 | 'M' => 'Met', | ||||
239 | 'N' => 'Asn', | ||||
240 | 'P' => 'Pro', | ||||
241 | 'Q' => 'Gln', | ||||
242 | 'R' => 'Arg', | ||||
243 | 'S' => 'Ser', | ||||
244 | 'T' => 'Thr', | ||||
245 | 'V' => 'Val', | ||||
246 | 'W' => 'Trp', | ||||
247 | 'Y' => 'Tyr', | ||||
248 | 'Z' => 'Glx', | ||||
249 | 'X' => 'Xaa', | ||||
250 | '*' => 'Ter', | ||||
251 | 'U' => 'Sec', | ||||
252 | 'O' => 'Pyl', | ||||
253 | 'J' => 'Xle' | ||||
254 | ); | ||||
255 | |||||
256 | =head2 seq3 | ||||
257 | |||||
258 | Title : seq3 | ||||
259 | Usage : $string = Bio::SeqUtils->seq3($seq) | ||||
260 | Function: Read only method that returns the amino acid sequence as a | ||||
261 | string of three letter codes. alphabet has to be | ||||
262 | 'protein'. Output follows the IUPAC standard plus 'Ter' for | ||||
263 | terminator. Any unknown character, including the default | ||||
264 | unknown character 'X', is changed into 'Xaa'. A noncoded | ||||
265 | aminoacid selenocystein is recognized (Sec, U). | ||||
266 | |||||
267 | Returns : A scalar | ||||
268 | Args : character used for stop in the protein sequence optional, | ||||
269 | defaults to '*' string used to separate the output amino | ||||
270 | acid codes, optional, defaults to '' | ||||
271 | |||||
272 | =cut | ||||
273 | |||||
274 | sub seq3 { | ||||
275 | my ( $self, $seq, $stop, $sep ) = @_; | ||||
276 | |||||
277 | $seq->isa('Bio::PrimarySeqI') | ||||
278 | || $self->throw('Not a Bio::PrimarySeqI object but [$self]'); | ||||
279 | $seq->alphabet eq 'protein' | ||||
280 | || $self->throw('Not a protein sequence'); | ||||
281 | |||||
282 | if ( defined $stop ) { | ||||
283 | length $stop != 1 | ||||
284 | and $self->throw('One character stop needed, not [$stop]'); | ||||
285 | $THREECODE{$stop} = "Ter"; | ||||
286 | } | ||||
287 | $sep ||= ''; | ||||
288 | |||||
289 | my $aa3s; | ||||
290 | foreach my $aa ( split //, uc $seq->seq ) { | ||||
291 | $THREECODE{$aa} and $aa3s .= $THREECODE{$aa} . $sep, next; | ||||
292 | $aa3s .= 'Xaa' . $sep; | ||||
293 | } | ||||
294 | $sep and substr( $aa3s, -( length $sep ), length $sep ) = ''; | ||||
295 | return $aa3s; | ||||
296 | } | ||||
297 | |||||
298 | =head2 seq3in | ||||
299 | |||||
300 | Title : seq3in | ||||
301 | Usage : $seq = Bio::SeqUtils->seq3in($seq, 'MetGlyTer') | ||||
302 | Function: Method for changing of the sequence of a | ||||
303 | Bio::PrimarySeqI sequence object. The three letter amino | ||||
304 | acid input string is converted into one letter code. Any | ||||
305 | unknown character triplet, including the default 'Xaa', is | ||||
306 | converted into 'X'. | ||||
307 | |||||
308 | Returns : Bio::PrimarySeq object | ||||
309 | Args : sequence string | ||||
310 | optional character to be used for stop in the protein sequence, | ||||
311 | defaults to '*' | ||||
312 | optional character to be used for unknown in the protein sequence, | ||||
313 | defaults to 'X' | ||||
314 | |||||
315 | =cut | ||||
316 | |||||
317 | sub seq3in { | ||||
318 | my ( $self, $seq, $string, $stop, $unknown ) = @_; | ||||
319 | |||||
320 | $seq->isa('Bio::PrimarySeqI') | ||||
321 | || $self->throw("Not a Bio::PrimarySeqI object but [$self]"); | ||||
322 | $seq->alphabet eq 'protein' | ||||
323 | || $self->throw('Not a protein sequence'); | ||||
324 | |||||
325 | if ( defined $stop ) { | ||||
326 | length $stop != 1 | ||||
327 | and $self->throw("One character stop needed, not [$stop]"); | ||||
328 | $ONECODE{'Ter'} = $stop; | ||||
329 | } | ||||
330 | if ( defined $unknown ) { | ||||
331 | length $unknown != 1 | ||||
332 | and $self->throw("One character stop needed, not [$unknown]"); | ||||
333 | $ONECODE{'Xaa'} = $unknown; | ||||
334 | } | ||||
335 | |||||
336 | my ( $aas, $aa3 ); | ||||
337 | my $length = ( length $string ) - 2; | ||||
338 | for ( my $i = 0 ; $i < $length ; $i += 3 ) { | ||||
339 | $aa3 = substr( $string, $i, 3 ); | ||||
340 | $aa3 = ucfirst( lc($aa3) ); | ||||
341 | $ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next; | ||||
342 | $aas .= $ONECODE{'Xaa'}; | ||||
343 | } | ||||
344 | $seq->seq($aas); | ||||
345 | return $seq; | ||||
346 | } | ||||
347 | |||||
348 | =head2 translate_3frames | ||||
349 | |||||
350 | Title : translate_3frames | ||||
351 | Usage : @prots = Bio::SeqUtils->translate_3frames($seq) | ||||
352 | Function: Translate a nucleotide sequence in three forward frames. | ||||
353 | The IDs of the sequences are appended with '-0F', '-1F', '-2F'. | ||||
354 | Returns : An array of seq objects | ||||
355 | Args : sequence object | ||||
356 | same arguments as to Bio::PrimarySeqI::translate | ||||
357 | |||||
358 | =cut | ||||
359 | |||||
360 | sub translate_3frames { | ||||
361 | my ( $self, $seq, @args ) = @_; | ||||
362 | |||||
363 | $self->throw( 'Object [$seq] ' | ||||
364 | . 'of class [' | ||||
365 | . ref($seq) | ||||
366 | . '] can not be translated.' ) | ||||
367 | unless $seq->can('translate'); | ||||
368 | |||||
369 | my ( $stop, $unknown, $frame, $tableid, $fullCDS, $throw ) = @args; | ||||
370 | my @seqs; | ||||
371 | my $f = 0; | ||||
372 | while ( $f != 3 ) { | ||||
373 | my $translation = | ||||
374 | $seq->translate( $stop, $unknown, $f, $tableid, $fullCDS, $throw ); | ||||
375 | $translation->id( $seq->id . "-" . $f . "F" ); | ||||
376 | push @seqs, $translation; | ||||
377 | $f++; | ||||
378 | } | ||||
379 | |||||
380 | return @seqs; | ||||
381 | } | ||||
382 | |||||
383 | =head2 translate_6frames | ||||
384 | |||||
385 | Title : translate_6frames | ||||
386 | Usage : @prots = Bio::SeqUtils->translate_6frames($seq) | ||||
387 | Function: translate a nucleotide sequence in all six frames | ||||
388 | The IDs of the sequences are appended with '-0F', '-1F', '-2F', | ||||
389 | '-0R', '-1R', '-2R'. | ||||
390 | Returns : An array of seq objects | ||||
391 | Args : sequence object | ||||
392 | same arguments as to Bio::PrimarySeqI::translate | ||||
393 | |||||
394 | =cut | ||||
395 | |||||
396 | sub translate_6frames { | ||||
397 | my ( $self, $seq, @args ) = @_; | ||||
398 | |||||
399 | my @seqs = $self->translate_3frames( $seq, @args ); | ||||
400 | my @seqs2 = $self->translate_3frames( $seq->revcom, @args ); | ||||
401 | foreach my $seq2 (@seqs2) { | ||||
402 | my ($tmp) = $seq2->id; | ||||
403 | $tmp =~ s/F$/R/g; | ||||
404 | $seq2->id($tmp); | ||||
405 | } | ||||
406 | return @seqs, @seqs2; | ||||
407 | } | ||||
408 | |||||
409 | =head2 valid_aa | ||||
410 | |||||
411 | Title : valid_aa | ||||
412 | Usage : my @aa = $table->valid_aa | ||||
413 | Function: Retrieves a list of the valid amino acid codes. | ||||
414 | The list is ordered so that first 21 codes are for unique | ||||
415 | amino acids. The rest are ['B', 'Z', 'X', '*']. | ||||
416 | Returns : array of all the valid amino acid codes | ||||
417 | Args : [optional] $code => [0 -> return list of 1 letter aa codes, | ||||
418 | 1 -> return list of 3 letter aa codes, | ||||
419 | 2 -> return associative array of both ] | ||||
420 | |||||
421 | =cut | ||||
422 | |||||
423 | # spent 132µs (115+17) within Bio::SeqUtils::valid_aa which was called 2 times, avg 66µs/call:
# once (64µs+18µs) by Bio::Tools::CodonTable::BEGIN@198 at line 296 of Bio/Tools/CodonTable.pm
# once (51µs+0s) by Bio::Tools::CodonTable::BEGIN@198 at line 295 of Bio/Tools/CodonTable.pm | ||||
424 | 92 | 123µs | my ( $self, $code ) = @_; | ||
425 | |||||
426 | if ( !$code ) { | ||||
427 | my @codes; | ||||
428 | 1 | 6µs | foreach my $c ( sort values %ONECODE ) { # spent 6µs making 1 call to Bio::SeqUtils::CORE:sort | ||
429 | 27 | 12µs | push @codes, $c unless ( $c =~ /[BZX\*]/ ); # spent 12µs making 27 calls to Bio::SeqUtils::CORE:match, avg 437ns/call | ||
430 | } | ||||
431 | push @codes, qw(B Z X *); # so they are in correct order ? | ||||
432 | return @codes; | ||||
433 | } | ||||
434 | elsif ( $code == 1 ) { | ||||
435 | my @codes; | ||||
436 | foreach my $c ( sort keys %ONECODE ) { | ||||
437 | push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ ); | ||||
438 | } | ||||
439 | push @codes, ( 'Asx', 'Glx', 'Xaa', 'Ter' ); | ||||
440 | return @codes; | ||||
441 | } | ||||
442 | elsif ( $code == 2 ) { | ||||
443 | my %codes = %ONECODE; | ||||
444 | foreach my $c ( keys %ONECODE ) { | ||||
445 | my $aa = $ONECODE{$c}; | ||||
446 | $codes{$aa} = $c; | ||||
447 | } | ||||
448 | return %codes; | ||||
449 | } | ||||
450 | else { | ||||
451 | $self->warn( | ||||
452 | "unrecognized code in " . ref($self) . " method valid_aa()" ); | ||||
453 | return (); | ||||
454 | } | ||||
455 | } | ||||
456 | |||||
457 | =head2 mutate | ||||
458 | |||||
459 | Title : mutate | ||||
460 | Usage : Bio::SeqUtils->mutate($seq,$mutation1, $mutation2); | ||||
461 | Function: Inplace editing of the sequence. | ||||
462 | |||||
463 | The second argument can be a Bio::LiveSeq::Mutation object | ||||
464 | or an array of them. The mutations are applied sequentially | ||||
465 | checking only that their position is within the current | ||||
466 | sequence. Insertions are inserted before the given | ||||
467 | position. | ||||
468 | |||||
469 | Returns : boolean | ||||
470 | Args : sequence object | ||||
471 | mutation, a Bio::LiveSeq::Mutation object, or an array of them | ||||
472 | |||||
473 | See L<Bio::LiveSeq::Mutation>. | ||||
474 | |||||
475 | =cut | ||||
476 | |||||
477 | sub mutate { | ||||
478 | my ( $self, $seq, @mutations ) = @_; | ||||
479 | |||||
480 | $self->throw( 'Object [$seq] ' | ||||
481 | . 'of class [' | ||||
482 | . ref($seq) | ||||
483 | . '] should be a Bio::PrimarySeqI ' ) | ||||
484 | unless $seq->isa('Bio::PrimarySeqI'); | ||||
485 | $self->throw( 'Object [$mutations[0]] ' | ||||
486 | . 'of class [' | ||||
487 | . ref( $mutations[0] ) | ||||
488 | . '] should be a Bio::LiveSeq::Mutation' ) | ||||
489 | unless $mutations[0]->isa('Bio::LiveSeq::Mutation'); | ||||
490 | |||||
491 | foreach my $mutation (@mutations) { | ||||
492 | $self->throw('Attempting to mutate sequence beyond its length') | ||||
493 | unless $mutation->pos - 1 <= $seq->length; | ||||
494 | |||||
495 | my $string = $seq->seq; | ||||
496 | substr $string, $mutation->pos - 1, $mutation->len, $mutation->seq; | ||||
497 | $seq->seq($string); | ||||
498 | } | ||||
499 | 1; | ||||
500 | } | ||||
501 | |||||
502 | =head2 cat | ||||
503 | |||||
504 | Title : cat | ||||
505 | Usage : Bio::SeqUtils->cat(@seqs); | ||||
506 | my $catseq=$seqs[0]; | ||||
507 | Function: Concatenates a list of Bio::Seq objects, adding them all on to the | ||||
508 | end of the first sequence. Annotations and sequence features are | ||||
509 | copied over from any additional objects, and the coordinates of any | ||||
510 | copied features are adjusted appropriately. | ||||
511 | Returns : a boolean | ||||
512 | Args : array of sequence objects | ||||
513 | |||||
514 | Note that annotations have no sequence locations. If you concatenate | ||||
515 | sequences with the same annotations they will all be added. | ||||
516 | |||||
517 | =cut | ||||
518 | |||||
519 | sub cat { | ||||
520 | my ( $self, $seq, @seqs ) = @_; | ||||
521 | $self->throw( 'Object [$seq] ' | ||||
522 | . 'of class [' | ||||
523 | . ref($seq) | ||||
524 | . '] should be a Bio::PrimarySeqI ' ) | ||||
525 | unless $seq->isa('Bio::PrimarySeqI'); | ||||
526 | |||||
527 | for my $catseq (@seqs) { | ||||
528 | $self->throw( 'Object [$catseq] ' | ||||
529 | . 'of class [' | ||||
530 | . ref($catseq) | ||||
531 | . '] should be a Bio::PrimarySeqI ' ) | ||||
532 | unless $catseq->isa('Bio::PrimarySeqI'); | ||||
533 | |||||
534 | $self->throw( | ||||
535 | 'Trying to concatenate sequences with different alphabets: ' | ||||
536 | . $seq->display_id . '(' | ||||
537 | . $seq->alphabet | ||||
538 | . ') and ' | ||||
539 | . $catseq->display_id . '(' | ||||
540 | . $catseq->alphabet | ||||
541 | . ')' ) | ||||
542 | unless $catseq->alphabet eq $seq->alphabet; | ||||
543 | |||||
544 | my $length = $seq->length; | ||||
545 | $seq->seq( $seq->seq . $catseq->seq ); | ||||
546 | |||||
547 | # move annotations | ||||
548 | if ( $seq->isa("Bio::AnnotatableI") | ||||
549 | and $catseq->isa("Bio::AnnotatableI") ) | ||||
550 | { | ||||
551 | foreach my $key ( $catseq->annotation->get_all_annotation_keys() ) { | ||||
552 | |||||
553 | foreach my $value ( $catseq->annotation->get_Annotations($key) ) | ||||
554 | { | ||||
555 | $seq->annotation->add_Annotation( $key, $value ); | ||||
556 | } | ||||
557 | } | ||||
558 | } | ||||
559 | |||||
560 | # move SeqFeatures | ||||
561 | if ( $seq->isa('Bio::SeqI') and $catseq->isa('Bio::SeqI') ) { | ||||
562 | for my $feat ( $catseq->get_SeqFeatures ) { | ||||
563 | $seq->add_SeqFeature( $self->_coord_adjust( $feat, $length ) ); | ||||
564 | } | ||||
565 | } | ||||
566 | |||||
567 | } | ||||
568 | 1; | ||||
569 | } | ||||
570 | |||||
571 | =head2 trunc_with_features | ||||
572 | |||||
573 | Title : trunc_with_features | ||||
574 | Usage : $trunc=Bio::SeqUtils->trunc_with_features($seq, $start, $end); | ||||
575 | Function: Like Bio::Seq::trunc, but keeps features (adjusting coordinates | ||||
576 | where necessary. Features that partially overlap the region have | ||||
577 | their location changed to a Bio::Location::Fuzzy. | ||||
578 | Returns : A new sequence object | ||||
579 | Args : A sequence object, start coordinate, end coordinate (inclusive) | ||||
580 | |||||
581 | |||||
582 | =cut | ||||
583 | |||||
584 | sub trunc_with_features { | ||||
585 | 2 | 3.96ms | 1 | 1.12ms | # spent 1.12ms (896µs+220µs) within Bio::SeqUtils::BEGIN@585 which was called:
# once (896µs+220µs) by Bio::Tools::CodonTable::BEGIN@191 at line 585 # spent 1.12ms making 1 call to Bio::SeqUtils::BEGIN@585 |
586 | my ( $self, $seq, $start, $end ) = @_; | ||||
587 | $self->throw( 'Object [$seq] ' | ||||
588 | . 'of class [' | ||||
589 | . ref($seq) | ||||
590 | . '] should be a Bio::SeqI ' ) | ||||
591 | unless $seq->isa('Bio::SeqI'); | ||||
592 | my $trunc = $seq->trunc( $start, $end ); | ||||
593 | my $truncrange = | ||||
594 | Bio::Range->new( -start => $start, -end => $end, -strand => 0 ); | ||||
595 | |||||
596 | # make sure that there is no annotation or features in $trunc | ||||
597 | # (->trunc() now clone objects except for Bio::Seq::LargePrimarySeq) | ||||
598 | $trunc->annotation->remove_Annotations; | ||||
599 | $trunc->remove_SeqFeatures; | ||||
600 | |||||
601 | # move annotations | ||||
602 | foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { | ||||
603 | foreach my $value ( $seq->annotation->get_Annotations($key) ) { | ||||
604 | $trunc->annotation->add_Annotation( $key, $value ); | ||||
605 | } | ||||
606 | } | ||||
607 | |||||
608 | # move features | ||||
609 | foreach ( | ||||
610 | grep { | ||||
611 | $_ = $self->_coord_adjust( $_, 1 - $start, $end + 1 - $start ) | ||||
612 | if $_->overlaps($truncrange) | ||||
613 | } $seq->get_SeqFeatures | ||||
614 | ) | ||||
615 | { | ||||
616 | $trunc->add_SeqFeature($_); | ||||
617 | } | ||||
618 | return $trunc; | ||||
619 | } | ||||
620 | |||||
621 | =head2 delete | ||||
622 | |||||
623 | Title : delete | ||||
624 | Function: cuts a segment out of a sequence and re-joins the left and right fragments | ||||
625 | (like splicing or digesting and re-ligating a molecule). | ||||
626 | Positions (and types) of sequence features are adjusted accordingly: | ||||
627 | Features that span the cut site are converted to split featuress to | ||||
628 | indicate the disruption. | ||||
629 | Features that extend into the cut-out fragment are truncated. | ||||
630 | A new molecule is created and returned. | ||||
631 | Usage : my $cutseq = Bio::SeqUtils::PbrTools->cut( $seq, 1000, 1100 ); | ||||
632 | Args : a Bio::PrimarySeqI compliant object to cut, | ||||
633 | first nt of the segment to be deleted | ||||
634 | last nt of the segment to be deleted | ||||
635 | optional: | ||||
636 | hash-ref of options: | ||||
637 | clone_obj: if true, clone the input sequence object rather | ||||
638 | than calling "new" on the object's class | ||||
639 | |||||
640 | Returns : a new Bio::Seq object | ||||
641 | |||||
642 | =cut | ||||
643 | |||||
644 | sub delete { | ||||
645 | my $self = shift; | ||||
646 | my ( $seq, $left, $right, $opts_ref ) = @_; | ||||
647 | $self->throw( 'was expecting 3-4 paramters but got ' . @_ ) | ||||
648 | unless @_ == 3 || @_ == 4; | ||||
649 | |||||
650 | $self->throw( | ||||
651 | 'Object of class [' . ref($seq) . '] should be a Bio::PrimarySeqI ' ) | ||||
652 | unless blessed($seq) && $seq->isa('Bio::PrimarySeqI'); | ||||
653 | |||||
654 | $self->throw("Left coordinate ($left) must be >= 1") if $left < 1; | ||||
655 | if ( $right > $seq->length ) { | ||||
656 | $self->throw( "Right coordinate ($right) must be less than " | ||||
657 | . 'sequence length (' | ||||
658 | . $seq->length | ||||
659 | . ')' ); | ||||
660 | } | ||||
661 | |||||
662 | # piece together the sequence string of the remaining fragments | ||||
663 | my $left_seq = $seq->subseq( 1, $left - 1 ); | ||||
664 | my $right_seq = $seq->subseq( $right + 1, $seq->length ); | ||||
665 | if ( !$left_seq || !$right_seq ) { | ||||
666 | $self->throw( | ||||
667 | 'could not assemble sequences. At least one of the fragments is empty' | ||||
668 | ); | ||||
669 | } | ||||
670 | my $seq_str = $left_seq . $right_seq; | ||||
671 | |||||
672 | # create the new seq object with the same class as the recipient | ||||
673 | # or (if requested), make a clone of the existing object. In the | ||||
674 | # latter case we need to remove sequence features from the cloned | ||||
675 | # object instead of copying them | ||||
676 | my $product; | ||||
677 | if ( $opts_ref->{clone_obj} ) { | ||||
678 | $product = $self->_new_seq_via_clone( $seq, $seq_str ); | ||||
679 | } | ||||
680 | else { | ||||
681 | $product = $self->_new_seq_from_old( $seq, { seq => $seq_str } ); | ||||
682 | } | ||||
683 | |||||
684 | # move sequence features | ||||
685 | if ( $product->isa('Bio::SeqI') && $seq->isa('Bio::SeqI') ) { | ||||
686 | for my $feat ( $seq->get_SeqFeatures ) { | ||||
687 | my $adjfeat = $self->_coord_adjust_deletion( $feat, $left, $right ); | ||||
688 | $product->add_SeqFeature($adjfeat) if $adjfeat; | ||||
689 | } | ||||
690 | } | ||||
691 | |||||
692 | # add a feature to annotatde the deletion | ||||
693 | my $deletion_feature = Bio::SeqFeature::Generic->new( | ||||
694 | -primary_tag => 'misc_feature', | ||||
695 | -tag => { note => 'deletion of ' . ( $right - $left + 1 ) . 'bp' }, | ||||
696 | -location => Bio::Location::Simple->new( | ||||
697 | -start => $left - 1, | ||||
698 | -end => $left, | ||||
699 | -location_type => 'IN-BETWEEN' | ||||
700 | ) | ||||
701 | ); | ||||
702 | $product->add_SeqFeature($deletion_feature); | ||||
703 | return $product; | ||||
704 | } | ||||
705 | |||||
706 | =head2 insert | ||||
707 | |||||
708 | Title : insert | ||||
709 | Function: inserts a fragment (a Bio::Seq object) into a nother sequence object | ||||
710 | adding all annotations and features to the final product. | ||||
711 | Features that span the insertion site are converted to split | ||||
712 | features to indicate the disruption. | ||||
713 | A new feature is added to indicate the inserted fragment itself. | ||||
714 | A new molecule is created and returned. | ||||
715 | Usage : # insert a fragment after pos 1000 | ||||
716 | my $insert_seq = Bio::SeqUtils::PbrTools->insert( | ||||
717 | $recipient_seq, | ||||
718 | $fragment_seq, | ||||
719 | 1000 | ||||
720 | ); | ||||
721 | Args : recipient sequence (a Bio::PrimarySeqI compliant object), | ||||
722 | a fragmetn to insert (Bio::PrimarySeqI compliant object), | ||||
723 | insertion position (fragment is inserted to the right of this pos) | ||||
724 | pos=0 will prepend the fragment to the recipient | ||||
725 | optional: | ||||
726 | hash-ref of options: | ||||
727 | clone_obj: if true, clone the input sequence object rather | ||||
728 | than calling "new" on the object's class | ||||
729 | Returns : a new Bio::Seq object | ||||
730 | |||||
731 | =cut | ||||
732 | |||||
733 | sub insert { | ||||
734 | my $self = shift; | ||||
735 | my ( $recipient, $fragment, $insert_pos, $opts_ref ) = @_; | ||||
736 | $self->throw( 'was expecting 3-4 paramters but got ' . @_ ) | ||||
737 | unless @_ == 3 || @_ == 4; | ||||
738 | |||||
739 | $self->throw( 'Recipient object of class [' | ||||
740 | . ref($recipient) | ||||
741 | . '] should be a Bio::PrimarySeqI ' ) | ||||
742 | unless blessed($recipient) && $recipient->isa('Bio::PrimarySeqI'); | ||||
743 | |||||
744 | $self->throw( 'Fragment object of class [' | ||||
745 | . ref($fragment) | ||||
746 | . '] should be a Bio::PrimarySeqI ' ) | ||||
747 | unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); | ||||
748 | |||||
749 | $self->throw( 'Can\'t concatenate sequences with different alphabets: ' | ||||
750 | . 'recipient is ' | ||||
751 | . $recipient->alphabet | ||||
752 | . ' and fragment is ' | ||||
753 | . $fragment->alphabet ) | ||||
754 | unless $recipient->alphabet eq $fragment->alphabet; | ||||
755 | |||||
756 | if ( $insert_pos < 0 or $insert_pos > $recipient->length ) { | ||||
757 | $self->throw( "insertion position ($insert_pos) must be between 0 and " | ||||
758 | . 'recipient sequence length (' | ||||
759 | . $recipient->length | ||||
760 | . ')' ); | ||||
761 | } | ||||
762 | |||||
763 | if ( $fragment->can('is_circular') && $fragment->is_circular ) { | ||||
764 | $self->throw('Can\'t insert circular fragments'); | ||||
765 | } | ||||
766 | |||||
767 | if ( !$recipient->seq ) { | ||||
768 | $self->throw( | ||||
769 | 'Recipient has no sequence, can not insert into this object'); | ||||
770 | } | ||||
771 | |||||
772 | # construct raw sequence of the new molecule | ||||
773 | my $left_seq = | ||||
774 | $insert_pos > 0 | ||||
775 | ? $recipient->subseq( 1, $insert_pos ) | ||||
776 | : ''; | ||||
777 | my $mid_seq = $fragment->seq; | ||||
778 | my $right_seq = | ||||
779 | $insert_pos < $recipient->length | ||||
780 | ? $recipient->subseq( $insert_pos + 1, $recipient->length ) | ||||
781 | : ''; | ||||
782 | my $seq_str = $left_seq . $mid_seq . $right_seq; | ||||
783 | |||||
784 | # create the new seq object with the same class as the recipient | ||||
785 | # or (if requested), make a clone of the existing object. In the | ||||
786 | # latter case we need to remove sequence features from the cloned | ||||
787 | # object instead of copying them | ||||
788 | my $product; | ||||
789 | if ( $opts_ref->{clone_obj} ) { | ||||
790 | $product = $self->_new_seq_via_clone( $recipient, $seq_str ); | ||||
791 | } | ||||
792 | else { | ||||
793 | my @desc; | ||||
794 | push @desc, 'Inserted fragment: ' . $fragment->desc | ||||
795 | if defined $fragment->desc; | ||||
796 | push @desc, 'Recipient: ' . $recipient->desc | ||||
797 | if defined $recipient->desc; | ||||
798 | $product = $self->_new_seq_from_old( | ||||
799 | $recipient, | ||||
800 | { | ||||
801 | seq => $seq_str, | ||||
802 | display_id => $recipient->display_id, | ||||
803 | accession_number => $recipient->accession_number || '', | ||||
804 | alphabet => $recipient->alphabet, | ||||
805 | desc => join( '; ', @desc ), | ||||
806 | verbose => $recipient->verbose || $fragment->verbose, | ||||
807 | is_circular => $recipient->is_circular || 0, | ||||
808 | } | ||||
809 | ); | ||||
810 | |||||
811 | } # if clone_obj | ||||
812 | |||||
813 | # move annotations from fragment to product | ||||
814 | if ( $product->isa("Bio::AnnotatableI") | ||||
815 | && $fragment->isa("Bio::AnnotatableI") ) | ||||
816 | { | ||||
817 | foreach my $key ( $fragment->annotation->get_all_annotation_keys ) { | ||||
818 | foreach my $value ( $fragment->annotation->get_Annotations($key) ) { | ||||
819 | $product->annotation->add_Annotation( $key, $value ); | ||||
820 | } | ||||
821 | } | ||||
822 | } | ||||
823 | |||||
824 | # move sequence features to product with adjusted coordinates | ||||
825 | if ( $product->isa('Bio::SeqI') ) { | ||||
826 | |||||
827 | # for the fragment, just shift the features to new position | ||||
828 | if ( $fragment->isa('Bio::SeqI') ) { | ||||
829 | for my $feat ( $fragment->get_SeqFeatures ) { | ||||
830 | my $adjfeat = $self->_coord_adjust( $feat, $insert_pos ); | ||||
831 | $product->add_SeqFeature($adjfeat) if $adjfeat; | ||||
832 | } | ||||
833 | } | ||||
834 | |||||
835 | # for recipient, shift and modify features according to insertion. | ||||
836 | if ( $recipient->isa('Bio::SeqI') ) { | ||||
837 | for my $feat ( $recipient->get_SeqFeatures ) { | ||||
838 | my $adjfeat = | ||||
839 | $self->_coord_adjust_insertion( $feat, $insert_pos, | ||||
840 | $fragment->length ); | ||||
841 | $product->add_SeqFeature($adjfeat) if $adjfeat; | ||||
842 | } | ||||
843 | } | ||||
844 | } | ||||
845 | |||||
846 | # add a feature to annotate the insertion | ||||
847 | my $insertion_feature = Bio::SeqFeature::Generic->new( | ||||
848 | -start => $insert_pos + 1, | ||||
849 | -end => $insert_pos + $fragment->length, | ||||
850 | -primary_tag => 'misc_feature', | ||||
851 | -tag => { note => 'inserted fragment' }, | ||||
852 | ); | ||||
853 | $product->add_SeqFeature($insertion_feature); | ||||
854 | |||||
855 | return $product; | ||||
856 | } | ||||
857 | |||||
858 | =head2 ligate | ||||
859 | |||||
860 | title : ligate | ||||
861 | function: pastes a fragment (which can also have features) into a recipient | ||||
862 | sequence between two "cut" sites, preserving features and adjusting | ||||
863 | their locations. | ||||
864 | This is a shortcut for deleting a segment from a sequence object followed | ||||
865 | by an insertion of a fragmnet and is supposed to be used to simulate | ||||
866 | in-vitro cloning where a recipient (a vector) is digested and a fragment | ||||
867 | is then ligated into the recipient molecule. The fragment can be flipped | ||||
868 | (reverse-complemented with all its features). | ||||
869 | A new sequence object is returned to represent the product of the reaction. | ||||
870 | Features and annotations are transferred from the insert to the product | ||||
871 | and features on the recipient are adjusted according to the methods | ||||
872 | L</"delete"> amd L</"insert">: | ||||
873 | Features spanning the insertion site will be split up into two sub-locations. | ||||
874 | (Sub-)features in the deleted region are themselves deleted. | ||||
875 | (Sub-)features that extend into the deleted region are truncated. | ||||
876 | The class of the product object depends on the class of the recipient (vector) | ||||
877 | sequence object. if it is not possible to instantiate a new | ||||
878 | object of that class, a Bio::Primaryseq object is created instead. | ||||
879 | usage : # insert the flipped fragment between positions 1000 and 1100 of the | ||||
880 | # vector, i.e. everything between these two positions is deleted and | ||||
881 | # replaced by the fragment | ||||
882 | my $new_molecule = Bio::Sequtils::Pbrtools->ligate( | ||||
883 | -recipient => $vector, | ||||
884 | -fragment => $fragment, | ||||
885 | -left => 1000, | ||||
886 | -right => 1100, | ||||
887 | -flip => 1, | ||||
888 | -clone_obj => 1 | ||||
889 | ); | ||||
890 | args : recipient: the recipient/vector molecule | ||||
891 | fragment: molecule that is to be ligated into the vector | ||||
892 | left: left cut site (fragment will be inserted to the right of | ||||
893 | this position) | ||||
894 | optional: | ||||
895 | right: right cut site (fragment will be inseterted to the | ||||
896 | left of this position). defaults to left+1 | ||||
897 | flip: boolean, if true, the fragment is reverse-complemented | ||||
898 | (including features) before inserting | ||||
899 | clone_obj: if true, clone the recipient object to create the product | ||||
900 | instead of calling "new" on its class | ||||
901 | returns : a new Bio::Seq object of the ligated fragments | ||||
902 | |||||
903 | =cut | ||||
904 | |||||
905 | sub ligate { | ||||
906 | my $self = shift; | ||||
907 | my ( $recipient, $fragment, $left, $right, $flip, $clone_obj ) = | ||||
908 | $self->_rearrange( [qw(RECIPIENT FRAGMENT LEFT RIGHT FLIP CLONE_OBJ )], | ||||
909 | @_ ); | ||||
910 | $self->throw("missing required parameter 'recipient'") unless $recipient; | ||||
911 | $self->throw("missing required parameter 'fragment'") unless $fragment; | ||||
912 | $self->throw("missing required parameter 'left'") unless defined $left; | ||||
913 | |||||
914 | $right ||= $left + 1; | ||||
915 | |||||
916 | $self->throw( | ||||
917 | "Fragment must be a Bio::PrimarySeqI compliant object but it is a " | ||||
918 | . ref($fragment) ) | ||||
919 | unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI'); | ||||
920 | |||||
921 | $fragment = $self->revcom_with_features($fragment) if $flip; | ||||
922 | |||||
923 | my $opts_ref = {}; | ||||
924 | $opts_ref->{clone_obj} = 1 if $clone_obj; | ||||
925 | |||||
926 | # clone in two steps: first delete between the insertion sites, | ||||
927 | # then insert the fragment. Step 1 is skipped if insert positions | ||||
928 | # are adjacent (no deletion) | ||||
929 | my ( $product1, $product2 ); | ||||
930 | eval { | ||||
931 | if ( $right == $left + 1 ) { | ||||
932 | $product1 = $recipient; | ||||
933 | } | ||||
934 | else { | ||||
935 | $product1 = | ||||
936 | $self->delete( $recipient, $left + 1, $right - 1, $opts_ref ); | ||||
937 | } | ||||
938 | }; | ||||
939 | $self->throw( "Failed in step 1 (cut recipient): " . $@ ) if $@; | ||||
940 | eval { $product2 = $self->insert( $product1, $fragment, $left, $opts_ref ) }; | ||||
941 | $self->throw( "Failed in step 2 (insert fragment): " . $@ ) if $@; | ||||
942 | |||||
943 | return $product2; | ||||
944 | |||||
945 | } | ||||
946 | |||||
947 | =head2 _coord_adjust_deletion | ||||
948 | |||||
949 | title : _coord_adjust_deletion | ||||
950 | function: recursively adjusts coordinates of seqfeatures on a molecule | ||||
951 | where a segment has been deleted. | ||||
952 | (sub)features that span the deletion site become split features. | ||||
953 | (sub)features that extend into the deletion site are truncated. | ||||
954 | A note is added to the feature to inform about the size and | ||||
955 | position of the deletion. | ||||
956 | usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_deletion( | ||||
957 | $feature, | ||||
958 | $start, | ||||
959 | $end | ||||
960 | ); | ||||
961 | args : a Bio::SeqFeatureI compliant object, | ||||
962 | start (inclusive) position of the deletion site, | ||||
963 | end (inclusive) position of the deletion site | ||||
964 | returns : a Bio::SeqFeatureI compliant object | ||||
965 | |||||
966 | =cut | ||||
967 | |||||
968 | sub _coord_adjust_deletion { | ||||
969 | my ( $self, $feat, $left, $right ) = @_; | ||||
970 | |||||
971 | $self->throw( 'object [$feat] ' | ||||
972 | . 'of class [' | ||||
973 | . ref($feat) | ||||
974 | . '] should be a Bio::SeqFeatureI ' ) | ||||
975 | unless $feat->isa('Bio::SeqFeatureI'); | ||||
976 | $self->throw('missing coordinates: need a left and a right position') | ||||
977 | unless defined $left && defined $right; | ||||
978 | |||||
979 | if ( $left > $right ) { | ||||
980 | if ( $feat->can('is_circular') && $feat->is_circular ) { | ||||
981 | |||||
982 | # todo handle circular molecules | ||||
983 | $self->throw( | ||||
984 | 'can not yet handle deletions in circular molecules if deletion spans origin' | ||||
985 | ); | ||||
986 | } | ||||
987 | else { | ||||
988 | $self->throw( | ||||
989 | "left coordinate ($left) must be less than right ($right)" | ||||
990 | . " but it was greater" ); | ||||
991 | } | ||||
992 | } | ||||
993 | my $deletion = Bio::Location::Simple->new( | ||||
994 | -start => $left, | ||||
995 | -end => $right, | ||||
996 | ); | ||||
997 | my $del_length = $right - $left + 1; | ||||
998 | |||||
999 | my @adjsubfeat; | ||||
1000 | for my $subfeat ( $feat->get_SeqFeatures ) { | ||||
1001 | my $adjsubfeat = | ||||
1002 | $self->_coord_adjust_deletion( $subfeat, $left, $right ); | ||||
1003 | push @adjsubfeat, $adjsubfeat if $adjsubfeat; | ||||
1004 | } | ||||
1005 | |||||
1006 | my @loc; | ||||
1007 | my $note; | ||||
1008 | for ( $feat->location->each_Location ) { | ||||
1009 | next if $deletion->contains($_); # this location will be deleted; | ||||
1010 | my $strand = $_->strand; | ||||
1011 | my $type = $_->location_type; | ||||
1012 | my $start = $_->start; | ||||
1013 | my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; | ||||
1014 | my $end = $_->end; | ||||
1015 | my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; | ||||
1016 | my @newcoords = (); | ||||
1017 | if ( $start < $deletion->start && $end > $deletion->end ) | ||||
1018 | { # split the feature | ||||
1019 | @newcoords = ( | ||||
1020 | [ $start, ( $deletion->start - 1 ), $start_type, $end_type ], | ||||
1021 | [ | ||||
1022 | ( $deletion->start ), $end - $del_length, | ||||
1023 | $start_type, $end_type | ||||
1024 | ] | ||||
1025 | ); | ||||
1026 | $note = | ||||
1027 | $del_length | ||||
1028 | . 'bp internal deletion between pos ' | ||||
1029 | . ( $deletion->start - 1 ) . ' and ' | ||||
1030 | . $deletion->start; | ||||
1031 | } | ||||
1032 | elsif ( $_->start < $deletion->start && $_->end >= $deletion->start ) | ||||
1033 | { # truncate feature end | ||||
1034 | @newcoords = | ||||
1035 | ( [ $start, ( $deletion->start - 1 ), $start_type, $end_type ] ); | ||||
1036 | $note = | ||||
1037 | ( $end - $deletion->start + 1 ) . 'bp deleted from feature '; | ||||
1038 | if ( $feat->strand ) { | ||||
1039 | $note .= $feat->strand == 1 ? "3' " : "5' "; | ||||
1040 | } | ||||
1041 | $note .= 'end'; | ||||
1042 | } | ||||
1043 | elsif ( $_->start <= $deletion->end && $_->end > $deletion->end ) | ||||
1044 | { # truncate feature start and shift end | ||||
1045 | @newcoords = ( | ||||
1046 | [ | ||||
1047 | ( $deletion->start ), $end - $del_length, | ||||
1048 | $start_type, $end_type | ||||
1049 | ] | ||||
1050 | ); | ||||
1051 | $note = | ||||
1052 | ( $deletion->end - $start + 1 ) . 'bp deleted from feature '; | ||||
1053 | if ( $feat->strand ) { | ||||
1054 | $note .= $feat->strand == 1 ? "5' end" : "3' end"; | ||||
1055 | } | ||||
1056 | else { | ||||
1057 | $note .= 'start'; | ||||
1058 | } | ||||
1059 | } | ||||
1060 | elsif ( $start >= $deletion->end ) { # just shift entire location | ||||
1061 | @newcoords = ( | ||||
1062 | [ | ||||
1063 | $start - $del_length, $end - $del_length, | ||||
1064 | $start_type, $end_type | ||||
1065 | ] | ||||
1066 | ); | ||||
1067 | } | ||||
1068 | else { # not affected by deletion | ||||
1069 | @newcoords = ( [ $start, $end, $start_type, $end_type ] ); | ||||
1070 | } | ||||
1071 | |||||
1072 | # if we have no coordinates, we return nothing | ||||
1073 | # the feature is deleted | ||||
1074 | return unless @newcoords; | ||||
1075 | |||||
1076 | my @subloc = | ||||
1077 | $self->_location_objects_from_coordinate_list( \@newcoords, $strand, | ||||
1078 | $type ); | ||||
1079 | push @loc, $self->_single_loc_object_from_collection(@subloc); | ||||
1080 | } # each location | ||||
1081 | |||||
1082 | # create new feature based on original one and move annotation across | ||||
1083 | my $newfeat = | ||||
1084 | Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); | ||||
1085 | foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { | ||||
1086 | foreach my $value ( $feat->annotation->get_Annotations($key) ) { | ||||
1087 | $newfeat->annotation->add_Annotation( $key, $value ); | ||||
1088 | } | ||||
1089 | } | ||||
1090 | foreach my $key ( $feat->get_all_tags() ) { | ||||
1091 | $newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); | ||||
1092 | } | ||||
1093 | |||||
1094 | # If we have a note about the deleted bases, add it | ||||
1095 | if ($note) { | ||||
1096 | $newfeat->add_tag_value( 'note', $note ); | ||||
1097 | } | ||||
1098 | |||||
1099 | # set modified location(s) for the new feature and | ||||
1100 | # add its subfeatures if any | ||||
1101 | my $loc = $self->_single_loc_object_from_collection(@loc); | ||||
1102 | $loc ? $newfeat->location($loc) : return; | ||||
1103 | $newfeat->add_SeqFeature($_) for @adjsubfeat; | ||||
1104 | |||||
1105 | return $newfeat; | ||||
1106 | |||||
1107 | } | ||||
1108 | |||||
1109 | =head2 _coord_adjust_insertion | ||||
1110 | |||||
1111 | title : _coord_adjust_insertion | ||||
1112 | function: recursively adjusts coordinates of seqfeatures on a molecule | ||||
1113 | where another sequence has been inserted. | ||||
1114 | (sub)features that span the insertion site become split features | ||||
1115 | and a note is added about the size and positin of the insertion. | ||||
1116 | Features with an IN-BETWEEN location at the insertion site | ||||
1117 | are lost (such features can only exist between adjacent bases) | ||||
1118 | usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_insertion( | ||||
1119 | $feature, | ||||
1120 | $insert_pos, | ||||
1121 | $insert_length | ||||
1122 | ); | ||||
1123 | args : a Bio::SeqFeatureI compliant object, | ||||
1124 | insertion position (insert to the right of this position) | ||||
1125 | length of inserted fragment | ||||
1126 | returns : a Bio::SeqFeatureI compliant object | ||||
1127 | |||||
1128 | =cut | ||||
1129 | |||||
1130 | sub _coord_adjust_insertion { | ||||
1131 | my ( $self, $feat, $insert_pos, $insert_len ) = @_; | ||||
1132 | |||||
1133 | $self->throw( 'object [$feat] ' | ||||
1134 | . 'of class [' | ||||
1135 | . ref($feat) | ||||
1136 | . '] should be a Bio::SeqFeatureI ' ) | ||||
1137 | unless $feat->isa('Bio::SeqFeatureI'); | ||||
1138 | $self->throw('missing insert position') unless defined $insert_pos; | ||||
1139 | $self->throw('missing insert length') unless defined $insert_len; | ||||
1140 | |||||
1141 | my @adjsubfeat; | ||||
1142 | for my $subfeat ( $feat->get_SeqFeatures ) { | ||||
1143 | push @adjsubfeat, | ||||
1144 | $self->_coord_adjust_insertion( $subfeat, $insert_pos, $insert_len ); | ||||
1145 | } | ||||
1146 | |||||
1147 | my @loc; | ||||
1148 | my $note; | ||||
1149 | for ( $feat->location->each_Location ) { | ||||
1150 | |||||
1151 | # loose IN-BETWEEN features at the insertion site | ||||
1152 | next | ||||
1153 | if ( $_->location_type eq 'IN-BETWEEN' && $_->start == $insert_pos ); | ||||
1154 | my $strand = $_->strand; | ||||
1155 | my $type = $_->location_type; | ||||
1156 | my $start = $_->start; | ||||
1157 | my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef; | ||||
1158 | my $end = $_->end; | ||||
1159 | my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef; | ||||
1160 | my @newcoords = (); | ||||
1161 | if ( $start <= $insert_pos && $end > $insert_pos ) { # split the feature | ||||
1162 | @newcoords = ( | ||||
1163 | [ $start, $insert_pos, $start_type, $end_type ], | ||||
1164 | [ | ||||
1165 | ( $insert_pos + 1 + $insert_len ), $end + $insert_len, | ||||
1166 | $start_type, $end_type | ||||
1167 | ] | ||||
1168 | ); | ||||
1169 | $note = | ||||
1170 | $insert_len | ||||
1171 | . 'bp internal insertion between pos ' | ||||
1172 | . $insert_pos . ' and ' | ||||
1173 | . ( $insert_pos + $insert_len + 1 ); | ||||
1174 | |||||
1175 | } | ||||
1176 | elsif ( $start > $insert_pos ) { # just shift entire location | ||||
1177 | @newcoords = ( | ||||
1178 | [ | ||||
1179 | $start + $insert_len, $end + $insert_len, | ||||
1180 | $start_type, $end_type | ||||
1181 | ] | ||||
1182 | ); | ||||
1183 | } | ||||
1184 | else { # not affected | ||||
1185 | @newcoords = ( [ $start, $end, $start_type, $end_type ] ); | ||||
1186 | } | ||||
1187 | |||||
1188 | # if we have deleted all coordinates, return nothing | ||||
1189 | # (possible if all locations are IN-BETWEEN) | ||||
1190 | return unless @newcoords; | ||||
1191 | |||||
1192 | my @subloc = | ||||
1193 | $self->_location_objects_from_coordinate_list( \@newcoords, $strand, | ||||
1194 | $type ); | ||||
1195 | |||||
1196 | # put together final location which could be a split now | ||||
1197 | push @loc, $self->_single_loc_object_from_collection(@subloc); | ||||
1198 | } # each location | ||||
1199 | |||||
1200 | # create new feature based on original one and move annotation across | ||||
1201 | my $newfeat = | ||||
1202 | Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); | ||||
1203 | foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { | ||||
1204 | foreach my $value ( $feat->annotation->get_Annotations($key) ) { | ||||
1205 | $newfeat->annotation->add_Annotation( $key, $value ); | ||||
1206 | } | ||||
1207 | } | ||||
1208 | foreach my $key ( $feat->get_all_tags() ) { | ||||
1209 | $newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); | ||||
1210 | } | ||||
1211 | |||||
1212 | # If we have a note about the inserted bases, add it | ||||
1213 | if ($note) { | ||||
1214 | $newfeat->add_tag_value( 'note', $note ); | ||||
1215 | } | ||||
1216 | |||||
1217 | # set modified location(s) for the new feature and | ||||
1218 | # add its subfeatures if any | ||||
1219 | my $loc = $self->_single_loc_object_from_collection(@loc); | ||||
1220 | $loc ? $newfeat->location($loc) : return; | ||||
1221 | $newfeat->add_SeqFeature($_) for @adjsubfeat; | ||||
1222 | |||||
1223 | return $newfeat; | ||||
1224 | |||||
1225 | } | ||||
1226 | |||||
1227 | =head2 _single_loc_object_from_collection | ||||
1228 | |||||
1229 | Title : _single_loc_object_from_collection | ||||
1230 | Function: takes an array of location objects. Returns either a split | ||||
1231 | location object if there are more than one locations in the | ||||
1232 | array or returns the single location if there is only one | ||||
1233 | Usage : my $loc = _single_loc_object_from_collection( @sublocs ); | ||||
1234 | Args : array of Bio::Location objects | ||||
1235 | Returns : a single Bio:;Location object containing all locations | ||||
1236 | |||||
1237 | =cut | ||||
1238 | |||||
1239 | sub _single_loc_object_from_collection { | ||||
1240 | my ( $self, @locs ) = @_; | ||||
1241 | my $loc; | ||||
1242 | if ( @locs > 1 ) { | ||||
1243 | $loc = Bio::Location::Split->new; | ||||
1244 | $loc->add_sub_Location(@locs); | ||||
1245 | } | ||||
1246 | elsif ( @locs == 1 ) { | ||||
1247 | $loc = shift @locs; | ||||
1248 | } | ||||
1249 | return $loc; | ||||
1250 | } # _single_loc_object_from_collection | ||||
1251 | |||||
1252 | =head2 _location_objects_from_coordinate_list | ||||
1253 | |||||
1254 | Title : _location_objects_from_coordinate_list | ||||
1255 | Function: takes an array-ref of start/end coordinates, a strand and a | ||||
1256 | type and returns a list of Bio::Location objects (Fuzzy by | ||||
1257 | default, Simple in case of in-between coordinates). | ||||
1258 | If location type is not "IN-BETWEEN", individual types may be | ||||
1259 | passed in for start and end location as per Bio::Location::Fuzzy | ||||
1260 | documentation. | ||||
1261 | Usage : my @loc_objs = $self->_location_objects_from_coordinate_list( | ||||
1262 | \@coords, | ||||
1263 | $strand, | ||||
1264 | $type | ||||
1265 | ); | ||||
1266 | Args : array-ref of array-refs each containing: | ||||
1267 | start, end [, start-type, end-type] | ||||
1268 | where types are optional. If given, must be | ||||
1269 | a one of ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') | ||||
1270 | strand (all locations must be on same strand) | ||||
1271 | location-type (EXACT, IN-BETWEEN etc) | ||||
1272 | Returns : list of Bio::Location objects | ||||
1273 | |||||
1274 | =cut | ||||
1275 | |||||
1276 | sub _location_objects_from_coordinate_list { | ||||
1277 | my $self = shift; | ||||
1278 | my ( $coords_ref, $strand, $type ) = @_; | ||||
1279 | $self->throw( 'expected 3 parameters but got ' . @_ ) unless @_ == 3; | ||||
1280 | $self->throw('first argument must be an ARRAY reference#') | ||||
1281 | unless ref($coords_ref) eq 'ARRAY'; | ||||
1282 | |||||
1283 | my @loc; | ||||
1284 | foreach my $coords_set (@$coords_ref) { | ||||
1285 | my ( $start, $end, $start_type, $end_type ) = @$coords_set; | ||||
1286 | |||||
1287 | # taken from Bio::SeqUtils::_coord_adjust | ||||
1288 | if ( $type ne 'IN-BETWEEN' ) { | ||||
1289 | my $loc = Bio::Location::Fuzzy->new( | ||||
1290 | -start => $start, | ||||
1291 | -end => $end, | ||||
1292 | -strand => $strand, | ||||
1293 | -location_type => $type | ||||
1294 | ); | ||||
1295 | $loc->start_pos_type($start_type) if $start_type; | ||||
1296 | $loc->end_pos_type($end_type) if $end_type; | ||||
1297 | push @loc, $loc; | ||||
1298 | } | ||||
1299 | else { | ||||
1300 | push @loc, | ||||
1301 | Bio::Location::Simple->new( | ||||
1302 | -start => $start, | ||||
1303 | -end => $end, | ||||
1304 | -strand => $strand, | ||||
1305 | -location_type => $type | ||||
1306 | ); | ||||
1307 | } | ||||
1308 | } # each coords_set | ||||
1309 | return @loc; | ||||
1310 | } # _location_objects_from_coordinate_list | ||||
1311 | |||||
1312 | =head2 _new_seq_via_clone | ||||
1313 | |||||
1314 | Title : _new_seq_via_clone | ||||
1315 | Function: clone a sequence object using Bio::Root::Root::clone and set the new sequence string | ||||
1316 | sequence features are removed. | ||||
1317 | Usage : my $new_seq = $self->_new_seq_via_clone( $seq_obj, $seq_str ); | ||||
1318 | Args : original seq object [, new sequence string] | ||||
1319 | Returns : a clone of the original sequence object, optionally with new sequence string | ||||
1320 | |||||
1321 | =cut | ||||
1322 | |||||
1323 | sub _new_seq_via_clone { | ||||
1324 | my ( $self, $in_seq_obj, $seq_str ) = @_; | ||||
1325 | my $out_seq_obj = $in_seq_obj->clone; | ||||
1326 | $out_seq_obj->remove_SeqFeatures if $out_seq_obj->can('remove_SeqFeatures'); | ||||
1327 | if ( blessed $out_seq_obj->seq | ||||
1328 | && $out_seq_obj->seq->isa('Bio::PrimarySeq') ) | ||||
1329 | { | ||||
1330 | $out_seq_obj->seq->seq($seq_str); | ||||
1331 | } | ||||
1332 | else { | ||||
1333 | $out_seq_obj->seq($seq_str); | ||||
1334 | } | ||||
1335 | return $out_seq_obj; | ||||
1336 | |||||
1337 | } # _new_seq_via_clone | ||||
1338 | |||||
1339 | =head2 _new_seq_from_old | ||||
1340 | |||||
1341 | Title : _new_seq_from_old | ||||
1342 | Function: creates a new sequence obejct, if possible of the same class as the old and adds | ||||
1343 | attributes to it. Also copies annotation across to the new object. | ||||
1344 | Usage : my $new_seq = $self->_new_seq_from_old( $seq_obj, { seq => $seq_str, display_id => 'some_ID'}); | ||||
1345 | Args : old sequence object | ||||
1346 | hashref of attributes for the new sequence (sequence string etc.) | ||||
1347 | Returns : a new Bio::Seq object | ||||
1348 | |||||
1349 | =cut | ||||
1350 | |||||
1351 | sub _new_seq_from_old { | ||||
1352 | my ( $self, $in_seq_obj, $attr ) = @_; | ||||
1353 | $self->throw('attributes must be a hashref') | ||||
1354 | if $attr && ref($attr) ne 'HASH'; | ||||
1355 | |||||
1356 | my $seqclass; | ||||
1357 | if ( $in_seq_obj->can_call_new ) { | ||||
1358 | $seqclass = ref($in_seq_obj); | ||||
1359 | } | ||||
1360 | else { | ||||
1361 | $seqclass = 'Bio::Primaryseq'; | ||||
1362 | $self->_attempt_to_load_seq; | ||||
1363 | } | ||||
1364 | |||||
1365 | my $out_seq_obj = $seqclass->new( | ||||
1366 | -seq => $attr->{seq} || $in_seq_obj->seq, | ||||
1367 | -display_id => $attr->{display_id} || $in_seq_obj->display_id, | ||||
1368 | -accession_number => $attr->{accession_number} | ||||
1369 | || $in_seq_obj->accession_number | ||||
1370 | || '', | ||||
1371 | -alphabet => $in_seq_obj->alphabet, | ||||
1372 | -desc => $attr->{desc} || $in_seq_obj->desc, | ||||
1373 | -verbose => $attr->{verbose} || $in_seq_obj->verbose, | ||||
1374 | -is_circular => $attr->{is_circular} || $in_seq_obj->is_circular || 0, | ||||
1375 | ); | ||||
1376 | |||||
1377 | # move the annotation across to the product | ||||
1378 | if ( $out_seq_obj->isa("Bio::AnnotatableI") | ||||
1379 | && $in_seq_obj->isa("Bio::AnnotatableI") ) | ||||
1380 | { | ||||
1381 | foreach my $key ( $in_seq_obj->annotation->get_all_annotation_keys ) { | ||||
1382 | foreach my $value ( $in_seq_obj->annotation->get_Annotations($key) ) | ||||
1383 | { | ||||
1384 | $out_seq_obj->annotation->add_Annotation( $key, $value ); | ||||
1385 | } | ||||
1386 | } | ||||
1387 | } | ||||
1388 | return $out_seq_obj; | ||||
1389 | } # _new_seq_from_old | ||||
1390 | |||||
1391 | =head2 _coord_adjust | ||||
1392 | |||||
1393 | Title : _coord_adjust | ||||
1394 | Usage : my $newfeat=Bio::SeqUtils->_coord_adjust($feature, 100, $seq->length); | ||||
1395 | Function: Recursive subroutine to adjust the coordinates of a feature | ||||
1396 | and all its subfeatures. If a sequence length is specified, then | ||||
1397 | any adjusted features that have locations beyond the boundaries | ||||
1398 | of the sequence are converted to Bio::Location::Fuzzy objects. | ||||
1399 | |||||
1400 | Returns : A Bio::SeqFeatureI compliant object. | ||||
1401 | Args : A Bio::SeqFeatureI compliant object, | ||||
1402 | the number of bases to add to the coordinates | ||||
1403 | (optional) the length of the parent sequence | ||||
1404 | |||||
1405 | |||||
1406 | =cut | ||||
1407 | |||||
1408 | sub _coord_adjust { | ||||
1409 | my ( $self, $feat, $add, $length ) = @_; | ||||
1410 | $self->throw( 'Object [$feat] ' | ||||
1411 | . 'of class [' | ||||
1412 | . ref($feat) | ||||
1413 | . '] should be a Bio::SeqFeatureI ' ) | ||||
1414 | unless $feat->isa('Bio::SeqFeatureI'); | ||||
1415 | my @adjsubfeat; | ||||
1416 | for my $subfeat ( $feat->get_SeqFeatures ) { | ||||
1417 | push @adjsubfeat, $self->_coord_adjust( $subfeat, $add, $length ); | ||||
1418 | } | ||||
1419 | my @loc; | ||||
1420 | for ( $feat->location->each_Location ) { | ||||
1421 | my @coords = ( $_->start, $_->end ); | ||||
1422 | my $strand = $_->strand; | ||||
1423 | my $type = $_->location_type; | ||||
1424 | foreach (@coords) { | ||||
1425 | $self->throw("can not handle negative feature positions (got: $_)") | ||||
1426 | if $_ < 0; | ||||
1427 | if ( $add + $_ < 1 ) { | ||||
1428 | $_ = '<1'; | ||||
1429 | } | ||||
1430 | elsif ( defined $length and $add + $_ > $length ) { | ||||
1431 | $_ = ">$length"; | ||||
1432 | } | ||||
1433 | else { | ||||
1434 | $_ = $add + $_; | ||||
1435 | } | ||||
1436 | } | ||||
1437 | push @loc, | ||||
1438 | $self->_location_objects_from_coordinate_list( [ \@coords ], | ||||
1439 | $strand, $type ); | ||||
1440 | } | ||||
1441 | my $newfeat = | ||||
1442 | Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); | ||||
1443 | foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { | ||||
1444 | foreach my $value ( $feat->annotation->get_Annotations($key) ) { | ||||
1445 | $newfeat->annotation->add_Annotation( $key, $value ); | ||||
1446 | } | ||||
1447 | } | ||||
1448 | foreach my $key ( $feat->get_all_tags() ) { | ||||
1449 | $newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); | ||||
1450 | } | ||||
1451 | my $loc = $self->_single_loc_object_from_collection(@loc); | ||||
1452 | $loc ? $newfeat->location($loc) : return; | ||||
1453 | $newfeat->add_SeqFeature($_) for @adjsubfeat; | ||||
1454 | return $newfeat; | ||||
1455 | } | ||||
1456 | |||||
1457 | =head2 revcom_with_features | ||||
1458 | |||||
1459 | Title : revcom_with_features | ||||
1460 | Usage : $revcom=Bio::SeqUtils->revcom_with_features($seq); | ||||
1461 | Function: Like Bio::Seq::revcom, but keeps features (adjusting coordinates | ||||
1462 | as appropriate. | ||||
1463 | Returns : A new sequence object | ||||
1464 | Args : A sequence object | ||||
1465 | |||||
1466 | |||||
1467 | =cut | ||||
1468 | |||||
1469 | sub revcom_with_features { | ||||
1470 | my ( $self, $seq ) = @_; | ||||
1471 | $self->throw( 'Object [$seq] ' | ||||
1472 | . 'of class [' | ||||
1473 | . ref($seq) | ||||
1474 | . '] should be a Bio::SeqI ' ) | ||||
1475 | unless $seq->isa('Bio::SeqI'); | ||||
1476 | my $revcom = $seq->revcom; | ||||
1477 | |||||
1478 | # make sure that there is no annotation or features in $trunc | ||||
1479 | # (->revcom() now clone objects except for Bio::Seq::LargePrimarySeq) | ||||
1480 | $revcom->annotation->remove_Annotations; | ||||
1481 | $revcom->remove_SeqFeatures; | ||||
1482 | |||||
1483 | #move annotations | ||||
1484 | foreach my $key ( $seq->annotation->get_all_annotation_keys() ) { | ||||
1485 | foreach my $value ( $seq->annotation->get_Annotations($key) ) { | ||||
1486 | $revcom->annotation->add_Annotation( $key, $value ); | ||||
1487 | } | ||||
1488 | } | ||||
1489 | |||||
1490 | #move features | ||||
1491 | for ( map { $self->_feature_revcom( $_, $seq->length ) } | ||||
1492 | reverse $seq->get_SeqFeatures ) | ||||
1493 | { | ||||
1494 | $revcom->add_SeqFeature($_); | ||||
1495 | } | ||||
1496 | return $revcom; | ||||
1497 | } | ||||
1498 | |||||
1499 | =head2 _feature_revcom | ||||
1500 | |||||
1501 | Title : _feature_revcom | ||||
1502 | Usage : my $newfeat=Bio::SeqUtils->_feature_revcom($feature, $seq->length); | ||||
1503 | Function: Recursive subroutine to reverse complement a feature and | ||||
1504 | all its subfeatures. The length of the parent sequence must be | ||||
1505 | specified. | ||||
1506 | |||||
1507 | Returns : A Bio::SeqFeatureI compliant object. | ||||
1508 | Args : A Bio::SeqFeatureI compliant object, | ||||
1509 | the length of the parent sequence | ||||
1510 | |||||
1511 | |||||
1512 | =cut | ||||
1513 | |||||
1514 | sub _feature_revcom { | ||||
1515 | my ( $self, $feat, $length ) = @_; | ||||
1516 | $self->throw( 'Object [$feat] ' | ||||
1517 | . 'of class [' | ||||
1518 | . ref($feat) | ||||
1519 | . '] should be a Bio::SeqFeatureI ' ) | ||||
1520 | unless $feat->isa('Bio::SeqFeatureI'); | ||||
1521 | my @adjsubfeat; | ||||
1522 | for my $subfeat ( $feat->get_SeqFeatures ) { | ||||
1523 | push @adjsubfeat, $self->_feature_revcom( $subfeat, $length ); | ||||
1524 | } | ||||
1525 | my @loc; | ||||
1526 | for ( $feat->location->each_Location ) { | ||||
1527 | my $type = $_->location_type; | ||||
1528 | my $strand; | ||||
1529 | if ( $_->strand == -1 ) { $strand = 1 } | ||||
1530 | elsif ( $_->strand == 1 ) { $strand = -1 } | ||||
1531 | else { $strand = $_->strand } | ||||
1532 | my $newend = | ||||
1533 | $self->_coord_revcom( $_->start, $_->start_pos_type, $length ); | ||||
1534 | my $newstart = | ||||
1535 | $self->_coord_revcom( $_->end, $_->end_pos_type, $length ); | ||||
1536 | my $newstart_type = $_->end_pos_type; | ||||
1537 | $newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER'; | ||||
1538 | $newstart_type = 'AFTER' if $_->end_pos_type eq 'BEFORE'; | ||||
1539 | my $newend_type = $_->start_pos_type; | ||||
1540 | $newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER'; | ||||
1541 | $newend_type = 'AFTER' if $_->start_pos_type eq 'BEFORE'; | ||||
1542 | push @loc, | ||||
1543 | $self->_location_objects_from_coordinate_list( | ||||
1544 | [ [ $newstart, $newend, $newstart_type, $newend_type ] ], | ||||
1545 | $strand, $type ); | ||||
1546 | } | ||||
1547 | my $newfeat = | ||||
1548 | Bio::SeqFeature::Generic->new( -primary => $feat->primary_tag ); | ||||
1549 | foreach my $key ( $feat->annotation->get_all_annotation_keys() ) { | ||||
1550 | foreach my $value ( $feat->annotation->get_Annotations($key) ) { | ||||
1551 | $newfeat->annotation->add_Annotation( $key, $value ); | ||||
1552 | } | ||||
1553 | } | ||||
1554 | foreach my $key ( $feat->get_all_tags() ) { | ||||
1555 | $newfeat->add_tag_value( $key, $feat->get_tag_values($key) ); | ||||
1556 | } | ||||
1557 | |||||
1558 | my $loc = $self->_single_loc_object_from_collection(@loc); | ||||
1559 | $loc ? $newfeat->location($loc) : return; | ||||
1560 | |||||
1561 | $newfeat->add_SeqFeature($_) for @adjsubfeat; | ||||
1562 | return $newfeat; | ||||
1563 | } | ||||
1564 | |||||
1565 | sub _coord_revcom { | ||||
1566 | my ( $self, $coord, $type, $length ) = @_; | ||||
1567 | if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) { | ||||
1568 | $coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge; | ||||
1569 | } | ||||
1570 | else { | ||||
1571 | $coord =~ s/(\d+)/$length+1-$1/ge; | ||||
1572 | $coord =~ tr/<>/></; | ||||
1573 | $coord = '>' . $coord | ||||
1574 | if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>'; | ||||
1575 | $coord = '<' . $coord | ||||
1576 | if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<'; | ||||
1577 | } | ||||
1578 | return $coord; | ||||
1579 | } | ||||
1580 | |||||
1581 | =head2 evolve | ||||
1582 | |||||
1583 | Title : evolve | ||||
1584 | Usage : my $newseq = Bio::SeqUtils-> | ||||
1585 | evolve($seq, $similarity, $transition_transversion_rate); | ||||
1586 | Function: Mutates the sequence by point mutations until the similarity of | ||||
1587 | the new sequence has decreased to the required level. | ||||
1588 | Transition/transversion rate is adjustable. | ||||
1589 | Returns : A new Bio::PrimarySeq object | ||||
1590 | Args : sequence object | ||||
1591 | percentage similarity (e.g. 80) | ||||
1592 | tr/tv rate, optional, defaults to 1 (= 1:1) | ||||
1593 | |||||
1594 | Set the verbosity of the Bio::SeqUtils object to positive integer to | ||||
1595 | see the mutations as they happen. | ||||
1596 | |||||
1597 | This method works only on nucleotide sequences. It prints a warning if | ||||
1598 | you set the target similarity to be less than 25%. | ||||
1599 | |||||
1600 | Transition/transversion ratio is an observed attribute of an sequence | ||||
1601 | comparison. We are dealing here with the transition/transversion rate | ||||
1602 | that we set for our model of sequence evolution. | ||||
1603 | |||||
1604 | =cut | ||||
1605 | |||||
1606 | sub evolve { | ||||
1607 | my ( $self, $seq, $sim, $rate ) = @_; | ||||
1608 | $rate ||= 1; | ||||
1609 | |||||
1610 | $self->throw( 'Object [$seq] ' | ||||
1611 | . 'of class [' | ||||
1612 | . ref($seq) | ||||
1613 | . '] should be a Bio::PrimarySeqI ' ) | ||||
1614 | unless $seq->isa('Bio::PrimarySeqI'); | ||||
1615 | |||||
1616 | $self->throw( | ||||
1617 | "[$sim] " . ' should be a positive integer or float under 100' ) | ||||
1618 | unless $sim =~ /^[+\d.]+$/ and $sim <= 100; | ||||
1619 | |||||
1620 | $self->warn( | ||||
1621 | "Nucleotide sequences are 25% similar by chance. | ||||
1622 | Do you really want to set similarity to [$sim]%?\n" | ||||
1623 | ) unless $sim > 25; | ||||
1624 | |||||
1625 | $self->throw('Only nucleotide sequences are supported') | ||||
1626 | if $seq->alphabet eq 'protein'; | ||||
1627 | |||||
1628 | # arrays of possible changes have transitions as first items | ||||
1629 | my %changes; | ||||
1630 | $changes{'a'} = [ 't', 'c', 'g' ]; | ||||
1631 | $changes{'t'} = [ 'a', 'c', 'g' ]; | ||||
1632 | $changes{'c'} = [ 'g', 'a', 't' ]; | ||||
1633 | $changes{'g'} = [ 'c', 'a', 't' ]; | ||||
1634 | |||||
1635 | # given the desired rate, find out where cut off points need to be | ||||
1636 | # when random numbers are generated from 0 to 100 | ||||
1637 | # we are ignoring identical mutations (e.g. A->A) to speed things up | ||||
1638 | my $bin_size = 100 / ( $rate + 2 ); | ||||
1639 | my $transition = 100 - ( 2 * $bin_size ); | ||||
1640 | my $first_transversion = $transition + $bin_size; | ||||
1641 | |||||
1642 | # unify the look of sequence strings | ||||
1643 | my $string = lc $seq->seq; # lower case | ||||
1644 | $string =~ | ||||
1645 | s/u/t/; # simplyfy our life; modules should deal with the change anyway | ||||
1646 | # store the original sequence string | ||||
1647 | my $oristring = $string; | ||||
1648 | my $length = $seq->length; | ||||
1649 | |||||
1650 | # stop evolving if the limit has been reached | ||||
1651 | until ( $self->_get_similarity( $oristring, $string ) <= $sim ) { | ||||
1652 | |||||
1653 | # find the location in the string to change | ||||
1654 | my $loc = int( rand $length ) + 1; | ||||
1655 | |||||
1656 | # nucleotide to change | ||||
1657 | my $oldnuc = substr $string, $loc - 1, 1; | ||||
1658 | my $newnuc; | ||||
1659 | |||||
1660 | # nucleotide it is changed to | ||||
1661 | my $choose = rand(100); | ||||
1662 | if ( $choose < $transition ) { | ||||
1663 | $newnuc = $changes{$oldnuc}[0]; | ||||
1664 | } | ||||
1665 | elsif ( $choose < $first_transversion ) { | ||||
1666 | $newnuc = $changes{$oldnuc}[1]; | ||||
1667 | } | ||||
1668 | else { | ||||
1669 | $newnuc = $changes{$oldnuc}[2]; | ||||
1670 | } | ||||
1671 | |||||
1672 | # do the change | ||||
1673 | substr $string, $loc - 1, 1, $newnuc; | ||||
1674 | |||||
1675 | $self->debug("$loc$oldnuc>$newnuc\n"); | ||||
1676 | } | ||||
1677 | |||||
1678 | return new Bio::PrimarySeq( | ||||
1679 | -id => $seq->id . "-$sim", | ||||
1680 | -description => $seq->description, | ||||
1681 | -seq => $string | ||||
1682 | ); | ||||
1683 | } | ||||
1684 | |||||
1685 | sub _get_similarity { | ||||
1686 | my ( $self, $oriseq, $seq ) = @_; | ||||
1687 | |||||
1688 | my $len = length($oriseq); | ||||
1689 | my $c; | ||||
1690 | |||||
1691 | for ( my $i = 0 ; $i < $len ; $i++ ) { | ||||
1692 | $c++ if substr( $oriseq, $i, 1 ) eq substr( $seq, $i, 1 ); | ||||
1693 | } | ||||
1694 | return 100 * $c / $len; | ||||
1695 | } | ||||
1696 | |||||
1697 | 1 | 23µs | 1; | ||
# spent 12µs within Bio::SeqUtils::CORE:match which was called 27 times, avg 437ns/call:
# 27 times (12µs+0s) by Bio::SeqUtils::valid_aa at line 429, avg 437ns/call | |||||
# spent 6µs within Bio::SeqUtils::CORE:sort which was called:
# once (6µs+0s) by Bio::SeqUtils::valid_aa at line 428 |