Filename | /Users/ap13/perl5/lib/perl5/Bio/Tools/IUPAC.pm |
Statements | Executed 14 statements in 1.67ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 64µs | 64µs | BEGIN@172 | Bio::Tools::IUPAC::
1 | 1 | 1 | 15µs | 29µs | BEGIN@168 | Bio::Tools::IUPAC::
1 | 1 | 1 | 9µs | 83µs | BEGIN@169 | Bio::Tools::IUPAC::
1 | 1 | 1 | 9µs | 95µs | BEGIN@170 | Bio::Tools::IUPAC::
1 | 1 | 1 | 8µs | 8µs | iupac_iup | Bio::Tools::IUPAC::
1 | 1 | 1 | 5µs | 5µs | iupac_iub | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | AUTOLOAD | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | _initialize | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | count | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | iupac | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | iupac_amb | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | iupac_iub_amb | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | iupac_iup_amb | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | iupac_rev_iub | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | new | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | next_seq | Bio::Tools::IUPAC::
0 | 0 | 0 | 0s | 0s | regexp | Bio::Tools::IUPAC::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | # BioPerl module for IUPAC | ||||
3 | # | ||||
4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
5 | # | ||||
6 | # Cared for by Aaron Mackey <amackey@virginia.edu> | ||||
7 | # | ||||
8 | # Copyright Aaron Mackey | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | |||||
12 | # POD documentation - main docs before the code | ||||
13 | |||||
14 | =head1 NAME | ||||
15 | |||||
16 | Bio::Tools::IUPAC - Generates unique sequence objects or regular expressions from | ||||
17 | an ambiguous IUPAC sequence | ||||
18 | |||||
19 | =head1 SYNOPSIS | ||||
20 | |||||
21 | use Bio::PrimarySeq; | ||||
22 | use Bio::Tools::IUPAC; | ||||
23 | |||||
24 | # Get the IUPAC code for proteins | ||||
25 | my %iupac_prot = Bio::Tools::IUPAC->new->iupac_iup; | ||||
26 | |||||
27 | # Create a sequence with degenerate residues | ||||
28 | my $ambiseq = Bio::PrimarySeq->new(-seq => 'ARTCGUTGN', -alphabet => 'dna'); | ||||
29 | |||||
30 | # Create all possible non-degenerate sequences | ||||
31 | my $iupac = Bio::Tools::IUPAC->new(-seq => $ambiseq); | ||||
32 | while ($uniqueseq = $iupac->next_seq()) { | ||||
33 | # process the unique Bio::Seq object. | ||||
34 | } | ||||
35 | |||||
36 | # Get a regular expression that matches all possible sequences | ||||
37 | my $regexp = $iupac->regexp(); | ||||
38 | |||||
39 | =head1 DESCRIPTION | ||||
40 | |||||
41 | Bio::Tools::IUPAC is a tool that manipulates sequences with ambiguous residues | ||||
42 | following the IUPAC conventions. Non-standard characters have the meaning | ||||
43 | described below: | ||||
44 | |||||
45 | IUPAC-IUB SYMBOLS FOR NUCLEOTIDE (DNA OR RNA) NOMENCLATURE: | ||||
46 | Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030 | ||||
47 | |||||
48 | --------------------------------------------------------------- | ||||
49 | Symbol Meaning Nucleic Acid | ||||
50 | --------------------------------------------------------------- | ||||
51 | A A Adenine | ||||
52 | C C Cytosine | ||||
53 | G G Guanine | ||||
54 | T T Thymine | ||||
55 | U U Uracil | ||||
56 | M A or C aMino | ||||
57 | R A or G puRine | ||||
58 | W A or T Weak | ||||
59 | S C or G Strong | ||||
60 | Y C or T pYrimidine | ||||
61 | K G or T Keto | ||||
62 | V A or C or G not T (closest unused char after T) | ||||
63 | H A or C or T not G (closest unused char after G) | ||||
64 | D A or G or T not C (closest unused char after C) | ||||
65 | B C or G or T not A (closest unused char after A) | ||||
66 | X G or A or T or C Unknown (very rarely used) | ||||
67 | N G or A or T or C Unknown (commonly used) | ||||
68 | |||||
69 | |||||
70 | IUPAC-IUP AMINO ACID SYMBOLS: | ||||
71 | Biochem J. 1984 Apr 15; 219(2): 345-373 | ||||
72 | Eur J Biochem. 1993 Apr 1; 213(1): 2 | ||||
73 | |||||
74 | ------------------------------------------ | ||||
75 | Symbol Meaning | ||||
76 | ------------------------------------------ | ||||
77 | A Alanine | ||||
78 | B Aspartic Acid, Asparagine | ||||
79 | C Cysteine | ||||
80 | D Aspartic Acid | ||||
81 | E Glutamic Acid | ||||
82 | F Phenylalanine | ||||
83 | G Glycine | ||||
84 | H Histidine | ||||
85 | I Isoleucine | ||||
86 | J Isoleucine/Leucine | ||||
87 | K Lysine | ||||
88 | L Leucine | ||||
89 | M Methionine | ||||
90 | N Asparagine | ||||
91 | O Pyrrolysine | ||||
92 | P Proline | ||||
93 | Q Glutamine | ||||
94 | R Arginine | ||||
95 | S Serine | ||||
96 | T Threonine | ||||
97 | U Selenocysteine | ||||
98 | V Valine | ||||
99 | W Tryptophan | ||||
100 | X Unknown | ||||
101 | Y Tyrosine | ||||
102 | Z Glutamic Acid, Glutamine | ||||
103 | * Terminator | ||||
104 | |||||
105 | There are a few things Bio::Tools::IUPAC can do for you: | ||||
106 | |||||
107 | =over | ||||
108 | |||||
109 | =item * | ||||
110 | |||||
111 | report the IUPAC mapping between ambiguous and non-ambiguous residues | ||||
112 | |||||
113 | =item * | ||||
114 | |||||
115 | produce a stream of all possible corresponding unambiguous Bio::Seq objects given | ||||
116 | an ambiguous sequence object | ||||
117 | |||||
118 | =item * | ||||
119 | |||||
120 | convert an ambiguous sequence object to a corresponding regular expression | ||||
121 | |||||
122 | =back | ||||
123 | |||||
124 | =head1 FEEDBACK | ||||
125 | |||||
126 | =head2 Mailing Lists | ||||
127 | |||||
128 | User feedback is an integral part of the evolution of this and other | ||||
129 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
130 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
131 | |||||
132 | bioperl-l@bioperl.org - General discussion | ||||
133 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
134 | |||||
135 | =head2 Support | ||||
136 | |||||
137 | Please direct usage questions or support issues to the mailing list: | ||||
138 | |||||
139 | I<bioperl-l@bioperl.org> | ||||
140 | |||||
141 | rather than to the module maintainer directly. Many experienced and | ||||
142 | reponsive experts will be able look at the problem and quickly | ||||
143 | address it. Please include a thorough description of the problem | ||||
144 | with code and data examples if at all possible. | ||||
145 | |||||
146 | =head2 Reporting Bugs | ||||
147 | |||||
148 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
149 | the bugs and their resolution. Bug reports can be submitted via the | ||||
150 | web: | ||||
151 | |||||
152 | https://github.com/bioperl/bioperl-live/issues | ||||
153 | |||||
154 | =head1 AUTHOR - Aaron Mackey | ||||
155 | |||||
156 | Email amackey-at-virginia.edu | ||||
157 | |||||
158 | =head1 APPENDIX | ||||
159 | |||||
160 | The rest of the documentation details each of the object | ||||
161 | methods. Internal methods are usually preceded with a _ | ||||
162 | |||||
163 | =cut | ||||
164 | |||||
165 | |||||
166 | package Bio::Tools::IUPAC; | ||||
167 | |||||
168 | 2 | 30µs | 2 | 44µs | # spent 29µs (15+15) within Bio::Tools::IUPAC::BEGIN@168 which was called:
# once (15µs+15µs) by Bio::Tools::CodonTable::BEGIN@190 at line 168 # spent 29µs making 1 call to Bio::Tools::IUPAC::BEGIN@168
# spent 15µs making 1 call to strict::import |
169 | 2 | 33µs | 2 | 83µs | # spent 83µs (9+74) within Bio::Tools::IUPAC::BEGIN@169 which was called:
# once (9µs+74µs) by Bio::Tools::CodonTable::BEGIN@190 at line 169 # spent 83µs making 1 call to Bio::Tools::IUPAC::BEGIN@169
# spent 74µs making 1 call to base::import, recursion: max depth 2, sum of overlapping time 74µs |
170 | 2 | 289µs | 2 | 182µs | # spent 95µs (9+87) within Bio::Tools::IUPAC::BEGIN@170 which was called:
# once (9µs+87µs) by Bio::Tools::CodonTable::BEGIN@190 at line 170 # spent 95µs making 1 call to Bio::Tools::IUPAC::BEGIN@170
# spent 87µs making 1 call to vars::import |
171 | |||||
172 | # spent 64µs within Bio::Tools::IUPAC::BEGIN@172 which was called:
# once (64µs+0s) by Bio::Tools::CodonTable::BEGIN@190 at line 267 | ||||
173 | # Ambiguous nucleic residues are matched to unambiguous residues | ||||
174 | 1 | 24µs | %IUB = ( | ||
175 | A => [qw(A)], | ||||
176 | C => [qw(C)], | ||||
177 | G => [qw(G)], | ||||
178 | T => [qw(T)], | ||||
179 | U => [qw(U)], | ||||
180 | M => [qw(A C)], | ||||
181 | R => [qw(A G)], | ||||
182 | S => [qw(C G)], | ||||
183 | W => [qw(A T)], | ||||
184 | Y => [qw(C T)], | ||||
185 | K => [qw(G T)], | ||||
186 | V => [qw(A C G)], | ||||
187 | H => [qw(A C T)], | ||||
188 | D => [qw(A G T)], | ||||
189 | B => [qw(C G T)], | ||||
190 | N => [qw(A C G T)], | ||||
191 | X => [qw(A C G T)], | ||||
192 | ); | ||||
193 | |||||
194 | # Same as %IUB but ambiguous residues are matched to ambiguous residues only | ||||
195 | 1 | 10µs | %IUB_AMB = ( | ||
196 | M => [qw(M)], | ||||
197 | R => [qw(R)], | ||||
198 | W => [qw(W)], | ||||
199 | S => [qw(S)], | ||||
200 | Y => [qw(Y)], | ||||
201 | K => [qw(K)], | ||||
202 | V => [qw(M R S V)], | ||||
203 | H => [qw(H M W Y)], | ||||
204 | D => [qw(D K R W)], | ||||
205 | B => [qw(B K S Y)], | ||||
206 | N => [qw(B D H K M N R S V W Y)], | ||||
207 | ); | ||||
208 | |||||
209 | # The inverse of %IUB | ||||
210 | 1 | 8µs | %REV_IUB = ( | ||
211 | A => 'A', | ||||
212 | T => 'T', | ||||
213 | U => 'U', | ||||
214 | C => 'C', | ||||
215 | G => 'G', | ||||
216 | AC => 'M', | ||||
217 | AG => 'R', | ||||
218 | AT => 'W', | ||||
219 | CG => 'S', | ||||
220 | CT => 'Y', | ||||
221 | GT => 'K', | ||||
222 | ACG => 'V', | ||||
223 | ACT => 'H', | ||||
224 | AGT => 'D', | ||||
225 | CGT => 'B', | ||||
226 | ACGT => 'N', | ||||
227 | N => 'N' | ||||
228 | ); | ||||
229 | |||||
230 | # Same thing with proteins now | ||||
231 | 1 | 15µs | %IUP = ( | ||
232 | A => [qw(A)], | ||||
233 | B => [qw(D N)], | ||||
234 | C => [qw(C)], | ||||
235 | D => [qw(D)], | ||||
236 | E => [qw(E)], | ||||
237 | F => [qw(F)], | ||||
238 | G => [qw(G)], | ||||
239 | H => [qw(H)], | ||||
240 | I => [qw(I)], | ||||
241 | J => [qw(I L)], | ||||
242 | K => [qw(K)], | ||||
243 | L => [qw(L)], | ||||
244 | M => [qw(M)], | ||||
245 | N => [qw(N)], | ||||
246 | O => [qw(O)], | ||||
247 | P => [qw(P)], | ||||
248 | Q => [qw(Q)], | ||||
249 | R => [qw(R)], | ||||
250 | S => [qw(S)], | ||||
251 | T => [qw(T)], | ||||
252 | U => [qw(U)], | ||||
253 | V => [qw(V)], | ||||
254 | W => [qw(W)], | ||||
255 | X => [qw(X)], | ||||
256 | Y => [qw(Y)], | ||||
257 | Z => [qw(E Q)], | ||||
258 | '*' => [qw(*)], | ||||
259 | ); | ||||
260 | |||||
261 | 1 | 7µs | %IUP_AMB = ( | ||
262 | B => [qw(B)], | ||||
263 | J => [qw(J)], | ||||
264 | Z => [qw(Z)], | ||||
265 | ); | ||||
266 | |||||
267 | 1 | 1.23ms | 1 | 64µs | } # spent 64µs making 1 call to Bio::Tools::IUPAC::BEGIN@172 |
268 | |||||
269 | |||||
270 | =head2 new | ||||
271 | |||||
272 | Title : new | ||||
273 | Usage : Bio::Tools::IUPAC->new($seq); | ||||
274 | Function: Create a new IUPAC object, which acts as a sequence stream (akin to | ||||
275 | SeqIO) | ||||
276 | Args : an ambiguously coded sequence object that has a specified 'alphabet' | ||||
277 | Returns : a Bio::Tools::IUPAC object. | ||||
278 | |||||
279 | =cut | ||||
280 | |||||
281 | sub new { | ||||
282 | my ($class,@args) = @_; | ||||
283 | my $self = $class->SUPER::new(@args); | ||||
284 | my ($seq) = $self->_rearrange([qw(SEQ)],@args); | ||||
285 | |||||
286 | if ( (not defined $seq) && @args && ref($args[0]) ) { | ||||
287 | # parameter not passed as named parameter? | ||||
288 | $seq = $args[0]; | ||||
289 | } | ||||
290 | |||||
291 | if (defined $seq) { | ||||
292 | if (not $seq->isa('Bio::PrimarySeqI')) { | ||||
293 | $self->throw('Must supply a sequence object'); | ||||
294 | } | ||||
295 | if (length $seq->seq == 0) { | ||||
296 | $self->throw('Sequence had zero-length'); | ||||
297 | } | ||||
298 | $self->{'_seq'} = $seq; | ||||
299 | } | ||||
300 | |||||
301 | return $self; | ||||
302 | } | ||||
303 | |||||
304 | |||||
305 | sub _initialize { | ||||
306 | my ($self) = @_; | ||||
307 | my %iupac = $self->iupac; | ||||
308 | $self->{'_alpha'} = [ map { $iupac{uc $_} } split('', $self->{'_seq'}->seq) ]; | ||||
309 | $self->{'_string'} = [(0) x length($self->{'_seq'}->seq())]; | ||||
310 | $self->{'_string'}->[0] = -1; | ||||
311 | } | ||||
312 | |||||
313 | |||||
314 | =head2 next_seq | ||||
315 | |||||
316 | Title : next_seq | ||||
317 | Usage : $iupac->next_seq(); | ||||
318 | Function: returns the next unique sequence object | ||||
319 | Args : none. | ||||
320 | Returns : a Bio::Seq object | ||||
321 | |||||
322 | =cut | ||||
323 | |||||
324 | sub next_seq { | ||||
325 | my ($self) = @_; | ||||
326 | |||||
327 | if (not exists $self->{'_string'}) { | ||||
328 | $self->_initialize(); | ||||
329 | } | ||||
330 | |||||
331 | for my $i ( 0 .. $#{$self->{'_string'}} ) { | ||||
332 | next unless $self->{'_string'}->[$i] || @{$self->{'_alpha'}->[$i]} > 1; | ||||
333 | if ( $self->{'_string'}->[$i] == $#{$self->{'_alpha'}->[$i]} ) { # rollover | ||||
334 | if ( $i == $#{$self->{'_string'}} ) { # end of possibilities | ||||
335 | return; | ||||
336 | } else { | ||||
337 | $self->{'_string'}->[$i] = 0; | ||||
338 | next; | ||||
339 | } | ||||
340 | } else { | ||||
341 | $self->{'_string'}->[$i]++; | ||||
342 | my $j = -1; | ||||
343 | my $seqstr = join('', map { $j++; $self->{'_alpha'}->[$j]->[$_]; } @{$self->{'_string'}}); | ||||
344 | my $desc = $self->{'_seq'}->desc() || ''; | ||||
345 | $self->{'_num'}++; | ||||
346 | 1 while $self->{'_num'} =~ s/(\d)(\d\d\d)(?!\d)/$1,$2/; | ||||
347 | $desc =~ s/( \[Bio::Tools::IUPAC-generated\sunique sequence # [^\]]*\])|$/ \[Bio::Tools::IUPAC-generated unique sequence # $self->{'_num'}\]/; | ||||
348 | $self->{'_num'} =~ s/,//g; | ||||
349 | |||||
350 | # Return a fresh sequence object | ||||
351 | return Bio::PrimarySeq->new(-seq => $seqstr, -desc => $desc); | ||||
352 | } | ||||
353 | } | ||||
354 | } | ||||
355 | |||||
356 | |||||
357 | =head2 iupac | ||||
358 | |||||
359 | Title : iupac | ||||
360 | Usage : my %symbols = $iupac->iupac; | ||||
361 | Function: Returns a hash of symbols -> symbol components of the right type | ||||
362 | for the given sequence, i.e. it is the same as iupac_iup() if | ||||
363 | Bio::Tools::IUPAC was given a proteic sequence, or iupac_iub() if the | ||||
364 | sequence was nucleic. For example, the key 'M' has the value ['A', 'C']. | ||||
365 | Args : none | ||||
366 | Returns : Hash | ||||
367 | |||||
368 | =cut | ||||
369 | |||||
370 | sub iupac { | ||||
371 | my ($self) = @_; | ||||
372 | my $alphabet = lc( $self->{'_seq'}->alphabet() ); | ||||
373 | if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { | ||||
374 | return %IUB; # nucleic | ||||
375 | } elsif ( $alphabet eq 'protein' ) { | ||||
376 | return %IUP; # proteic | ||||
377 | } else { | ||||
378 | $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); | ||||
379 | } | ||||
380 | } | ||||
381 | |||||
- - | |||||
384 | =head2 iupac_amb | ||||
385 | |||||
386 | Title : iupac_amb | ||||
387 | Usage : my %symbols = $iupac->iupac_amb; | ||||
388 | Function: Same as iupac() but only contains a mapping between ambiguous residues | ||||
389 | and the ambiguous residues they map to. For example, the key 'N' has | ||||
390 | the value ['R', 'Y', 'K', 'M', 'S', 'W', 'B', 'D', 'H', 'V', 'N'], | ||||
391 | i.e. it matches all other ambiguous residues. | ||||
392 | Args : none | ||||
393 | Returns : Hash | ||||
394 | |||||
395 | =cut | ||||
396 | |||||
397 | sub iupac_amb { | ||||
398 | my ($self) = @_; | ||||
399 | my $alphabet = lc( $self->{'_seq'}->alphabet() ); | ||||
400 | if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { | ||||
401 | return %IUB_AMB; # nucleic | ||||
402 | } elsif ( $alphabet eq 'protein' ) { | ||||
403 | return %IUP_AMB; # proteic | ||||
404 | } else { | ||||
405 | $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); | ||||
406 | } | ||||
407 | } | ||||
408 | |||||
409 | |||||
410 | =head2 iupac_iup | ||||
411 | |||||
412 | Title : iupac_iup | ||||
413 | Usage : my %aasymbols = $iupac->iupac_iup; | ||||
414 | Function: Returns a hash of PROTEIN symbols -> non-ambiguous symbol components | ||||
415 | Args : none | ||||
416 | Returns : Hash | ||||
417 | |||||
418 | =cut | ||||
419 | |||||
420 | # spent 8µs within Bio::Tools::IUPAC::iupac_iup which was called:
# once (8µs+0s) by Bio::Tools::CodonTable::BEGIN@198 at line 294 of Bio/Tools/CodonTable.pm | ||||
421 | 1 | 12µs | return %IUP; | ||
422 | } | ||||
423 | |||||
424 | |||||
425 | =head2 iupac_iup_amb | ||||
426 | |||||
427 | Title : iupac_iup_amb | ||||
428 | Usage : my %aasymbols = $iupac->iupac_iup_amb; | ||||
429 | Function: Returns a hash of PROTEIN symbols -> ambiguous symbol components | ||||
430 | Args : none | ||||
431 | Returns : Hash | ||||
432 | |||||
433 | =cut | ||||
434 | |||||
435 | sub iupac_iup_amb { | ||||
436 | return %IUP_AMB; | ||||
437 | } | ||||
438 | |||||
439 | |||||
440 | =head2 iupac_iub | ||||
441 | |||||
442 | Title : iupac_iub | ||||
443 | Usage : my %dnasymbols = $iupac->iupac_iub; | ||||
444 | Function: Returns a hash of DNA symbols -> non-ambiguous symbol components | ||||
445 | Args : none | ||||
446 | Returns : Hash | ||||
447 | |||||
448 | =cut | ||||
449 | |||||
450 | # spent 5µs within Bio::Tools::IUPAC::iupac_iub which was called:
# once (5µs+0s) by Bio::Tools::CodonTable::BEGIN@198 at line 293 of Bio/Tools/CodonTable.pm | ||||
451 | 1 | 9µs | return %IUB; | ||
452 | } | ||||
453 | |||||
454 | |||||
455 | =head2 iupac_iub_amb | ||||
456 | |||||
457 | Title : iupac_iub_amb | ||||
458 | Usage : my %dnasymbols = $iupac->iupac_iub; | ||||
459 | Function: Returns a hash of DNA symbols -> ambiguous symbol components | ||||
460 | Args : none | ||||
461 | Returns : Hash | ||||
462 | |||||
463 | =cut | ||||
464 | |||||
465 | sub iupac_iub_amb { | ||||
466 | return %IUB_AMB; | ||||
467 | } | ||||
468 | |||||
469 | |||||
470 | =head2 iupac_rev_iub | ||||
471 | |||||
472 | Title : iupac_rev_iub | ||||
473 | Usage : my %dnasymbols = $iupac->iupac_rev_iub; | ||||
474 | Function: Returns a hash of nucleotide combinations -> IUPAC code | ||||
475 | (a reverse of the iupac_iub hash). | ||||
476 | Args : none | ||||
477 | Returns : Hash | ||||
478 | |||||
479 | =cut | ||||
480 | |||||
481 | sub iupac_rev_iub { | ||||
482 | return %REV_IUB; | ||||
483 | } | ||||
484 | |||||
485 | |||||
486 | =head2 count | ||||
487 | |||||
488 | Title : count | ||||
489 | Usage : my $total = $iupac->count(); | ||||
490 | Function: Calculates the number of unique, unambiguous sequences that | ||||
491 | this ambiguous sequence could generate | ||||
492 | Args : none | ||||
493 | Return : int | ||||
494 | |||||
495 | =cut | ||||
496 | |||||
497 | sub count { | ||||
498 | my ($self) = @_; | ||||
499 | if (not exists $self->{'_string'}) { | ||||
500 | $self->_initialize(); | ||||
501 | } | ||||
502 | my $count = 1; | ||||
503 | $count *= scalar(@$_) for (@{$self->{'_alpha'}}); | ||||
504 | return $count; | ||||
505 | } | ||||
506 | |||||
507 | |||||
508 | =head2 regexp | ||||
509 | |||||
510 | Title : regexp | ||||
511 | Usage : my $re = $iupac->regexp(); | ||||
512 | Function: Converts the ambiguous sequence into a regular expression that | ||||
513 | matches all of the corresponding ambiguous and non-ambiguous sequences. | ||||
514 | You can further manipulate the resulting regular expression with the | ||||
515 | Bio::Tools::SeqPattern module. After you are done building your | ||||
516 | regular expression, you might want to compile it and make it case- | ||||
517 | insensitive: | ||||
518 | $re = qr/$re/i; | ||||
519 | Args : 1 to match RNA: T and U characters will match interchangeably | ||||
520 | Return : regular expression | ||||
521 | |||||
522 | =cut | ||||
523 | |||||
524 | sub regexp { | ||||
525 | my ($self, $match_rna) = @_; | ||||
526 | my $re; | ||||
527 | my $seq = $self->{'_seq'}->seq; | ||||
528 | my %iupac = $self->iupac; | ||||
529 | my %iupac_amb = $self->iupac_amb; | ||||
530 | for my $pos (0 .. length($seq)-1) { | ||||
531 | my $res = substr $seq, $pos, 1; | ||||
532 | my $iupacs = $iupac{$res}; | ||||
533 | my $iupacs_amb = $iupac_amb{$res} || []; | ||||
534 | if (not defined $iupacs) { | ||||
535 | $self->throw("Primer sequence '$seq' is not a valid IUPAC sequence.". | ||||
536 | " Offending character was '$res'.\n"); | ||||
537 | } | ||||
538 | my $part = join '', (@$iupacs, @$iupacs_amb); | ||||
539 | if ($match_rna) { | ||||
540 | $part =~ s/T/TU/i || $part =~ s/U/TU/i; | ||||
541 | } | ||||
542 | if (length $part > 1) { | ||||
543 | $part = '['.$part.']'; | ||||
544 | } | ||||
545 | $re .= $part; | ||||
546 | } | ||||
547 | return $re; | ||||
548 | } | ||||
549 | |||||
550 | |||||
551 | sub AUTOLOAD { | ||||
552 | my $self = shift @_; | ||||
553 | my $method = $AUTOLOAD; | ||||
554 | $method =~ s/.*:://; | ||||
555 | return $self->{'_seq'}->$method(@_) | ||||
556 | unless $method eq 'DESTROY'; | ||||
557 | } | ||||
558 | |||||
559 | 1 | 3µs | 1; | ||
560 |