Filename | /Users/ap13/perl5/lib/perl5/Bio/Tools/CodonTable.pm |
Statements | Executed 303 statements in 2.75ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 5.35ms | 6.63ms | BEGIN@191 | Bio::Tools::CodonTable::
1 | 1 | 1 | 1.76ms | 2.03ms | BEGIN@190 | Bio::Tools::CodonTable::
1 | 1 | 1 | 215µs | 361µs | BEGIN@198 | Bio::Tools::CodonTable::
1 | 1 | 1 | 15µs | 212µs | BEGIN@185 | Bio::Tools::CodonTable::
1 | 1 | 1 | 12µs | 86µs | BEGIN@193 | Bio::Tools::CodonTable::
1 | 1 | 1 | 10µs | 64µs | BEGIN@199 | Bio::Tools::CodonTable::
1 | 1 | 1 | 10µs | 25µs | BEGIN@187 | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | _codon_is | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | _make_iupac_string | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | _translate_ambiguous_codon | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | _unambiquous_codons | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | add_table | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | id | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | is_start_codon | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | is_ter_codon | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | is_unknown_codon | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | name | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | new | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | reverse_translate_all | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | reverse_translate_best | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | revtranslate | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | tables | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | translate | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | translate_strict | Bio::Tools::CodonTable::
0 | 0 | 0 | 0s | 0s | unambiguous_codons | Bio::Tools::CodonTable::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | # bioperl module for Bio::Tools::CodonTable | ||||
3 | # | ||||
4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
5 | # | ||||
6 | # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org> | ||||
7 | # | ||||
8 | # Copyright Heikki Lehvaslaiho | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | |||||
12 | # POD documentation - main docs before the code | ||||
13 | |||||
14 | =head1 NAME | ||||
15 | |||||
16 | Bio::Tools::CodonTable - Codon table object | ||||
17 | |||||
18 | =head1 SYNOPSIS | ||||
19 | |||||
20 | # This is a read-only class for all known codon tables. The IDs are | ||||
21 | # the ones used by nucleotide sequence databases. All common IUPAC | ||||
22 | # ambiguity codes for DNA, RNA and amino acids are recognized. | ||||
23 | |||||
24 | use Bio::Tools::CodonTable; | ||||
25 | |||||
26 | # defaults to ID 1 "Standard" | ||||
27 | $myCodonTable = Bio::Tools::CodonTable->new(); | ||||
28 | $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 ); | ||||
29 | |||||
30 | # change codon table | ||||
31 | $myCodonTable->id(5); | ||||
32 | |||||
33 | # examine codon table | ||||
34 | print join (' ', "The name of the codon table no.", $myCodonTable->id(4), | ||||
35 | "is:", $myCodonTable->name(), "\n"); | ||||
36 | |||||
37 | # print possible codon tables | ||||
38 | $tables = Bio::Tools::CodonTable->tables; | ||||
39 | while ( ($id,$name) = each %{$tables} ) { | ||||
40 | print "$id = $name\n"; | ||||
41 | } | ||||
42 | |||||
43 | # translate a codon | ||||
44 | $aa = $myCodonTable->translate('ACU'); | ||||
45 | $aa = $myCodonTable->translate('act'); | ||||
46 | $aa = $myCodonTable->translate('ytr'); | ||||
47 | |||||
48 | # reverse translate an amino acid | ||||
49 | @codons = $myCodonTable->revtranslate('A'); | ||||
50 | @codons = $myCodonTable->revtranslate('Ser'); | ||||
51 | @codons = $myCodonTable->revtranslate('Glx'); | ||||
52 | @codons = $myCodonTable->revtranslate('cYS', 'rna'); | ||||
53 | |||||
54 | # reverse translate an entire amino acid sequence into a IUPAC | ||||
55 | # nucleotide string | ||||
56 | |||||
57 | my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL'); | ||||
58 | my $iupac_str = $myCodonTable->reverse_translate_all($seqobj); | ||||
59 | |||||
60 | # boolean tests | ||||
61 | print "Is a start\n" if $myCodonTable->is_start_codon('ATG'); | ||||
62 | print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar'); | ||||
63 | print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG'); | ||||
64 | |||||
65 | =head1 DESCRIPTION | ||||
66 | |||||
67 | Codon tables are also called translation tables or genetic codes | ||||
68 | since that is what they represent. A bit more complete picture | ||||
69 | of the full complexity of codon usage in various taxonomic groups | ||||
70 | is presented at the NCBI Genetic Codes Home page. | ||||
71 | |||||
72 | CodonTable is a BioPerl class that knows all current translation | ||||
73 | tables that are used by primary nucleotide sequence databases | ||||
74 | (GenBank, EMBL and DDBJ). It provides methods to output information | ||||
75 | about tables and relationships between codons and amino acids. | ||||
76 | |||||
77 | This class and its methods recognized all common IUPAC ambiguity codes | ||||
78 | for DNA, RNA and animo acids. The translation method follows the | ||||
79 | conventions in EMBL and TREMBL databases. | ||||
80 | |||||
81 | It is a nuisance to separate RNA and cDNA representations of nucleic | ||||
82 | acid transcripts. The CodonTable object accepts codons of both type as | ||||
83 | input and allows the user to set the mode for output when reverse | ||||
84 | translating. Its default for output is DNA. | ||||
85 | |||||
86 | Note: | ||||
87 | |||||
88 | This class deals primarily with individual codons and amino | ||||
89 | acids. However in the interest of speed you can L<translate> | ||||
90 | longer sequence, too. The full complexity of protein translation | ||||
91 | is tackled by L<Bio::PrimarySeqI::translate>. | ||||
92 | |||||
93 | |||||
94 | The amino acid codes are IUPAC recommendations for common amino acids: | ||||
95 | |||||
96 | A Ala Alanine | ||||
97 | R Arg Arginine | ||||
98 | N Asn Asparagine | ||||
99 | D Asp Aspartic acid | ||||
100 | C Cys Cysteine | ||||
101 | Q Gln Glutamine | ||||
102 | E Glu Glutamic acid | ||||
103 | G Gly Glycine | ||||
104 | H His Histidine | ||||
105 | I Ile Isoleucine | ||||
106 | L Leu Leucine | ||||
107 | K Lys Lysine | ||||
108 | M Met Methionine | ||||
109 | F Phe Phenylalanine | ||||
110 | P Pro Proline | ||||
111 | O Pyl Pyrrolysine (22nd amino acid) | ||||
112 | U Sec Selenocysteine (21st amino acid) | ||||
113 | S Ser Serine | ||||
114 | T Thr Threonine | ||||
115 | W Trp Tryptophan | ||||
116 | Y Tyr Tyrosine | ||||
117 | V Val Valine | ||||
118 | B Asx Aspartic acid or Asparagine | ||||
119 | Z Glx Glutamine or Glutamic acid | ||||
120 | J Xle Isoleucine or Valine (mass spec ambiguity) | ||||
121 | X Xaa Any or unknown amino acid | ||||
122 | |||||
123 | |||||
124 | It is worth noting that, "Bacterial" codon table no. 11 produces an | ||||
125 | polypeptide that is, confusingly, identical to the standard one. The | ||||
126 | only differences are in available initiator codons. | ||||
127 | |||||
128 | |||||
129 | NCBI Genetic Codes home page: | ||||
130 | http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | ||||
131 | |||||
132 | EBI Translation Table Viewer: | ||||
133 | http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi | ||||
134 | |||||
135 | Amended ASN.1 version with ids 16 and 21 is at: | ||||
136 | ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/ | ||||
137 | |||||
138 | Thanks to Matteo diTomasso for the original Perl implementation | ||||
139 | of these tables. | ||||
140 | |||||
141 | =head1 FEEDBACK | ||||
142 | |||||
143 | =head2 Mailing Lists | ||||
144 | |||||
145 | User feedback is an integral part of the evolution of this and other | ||||
146 | Bioperl modules. Send your comments and suggestions preferably to the | ||||
147 | Bioperl mailing lists Your participation is much appreciated. | ||||
148 | |||||
149 | bioperl-l@bioperl.org - General discussion | ||||
150 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
151 | |||||
152 | =head2 Support | ||||
153 | |||||
154 | Please direct usage questions or support issues to the mailing list: | ||||
155 | |||||
156 | I<bioperl-l@bioperl.org> | ||||
157 | |||||
158 | rather than to the module maintainer directly. Many experienced and | ||||
159 | reponsive experts will be able look at the problem and quickly | ||||
160 | address it. Please include a thorough description of the problem | ||||
161 | with code and data examples if at all possible. | ||||
162 | |||||
163 | =head2 Reporting Bugs | ||||
164 | |||||
165 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
166 | the bugs and their resolution. Bug reports can be submitted via the | ||||
167 | web: | ||||
168 | |||||
169 | https://github.com/bioperl/bioperl-live/issues | ||||
170 | |||||
171 | =head1 AUTHOR - Heikki Lehvaslaiho | ||||
172 | |||||
173 | Email: heikki-at-bioperl-dot-org | ||||
174 | |||||
175 | =head1 APPENDIX | ||||
176 | |||||
177 | The rest of the documentation details each of the object | ||||
178 | methods. Internal methods are usually preceded with a _ | ||||
179 | |||||
180 | =cut | ||||
181 | |||||
182 | # Let the code begin... | ||||
183 | |||||
184 | package Bio::Tools::CodonTable; | ||||
185 | 2 | 7µs | # spent 212µs (15+196) within Bio::Tools::CodonTable::BEGIN@185 which was called:
# once (15µs+196µs) by Bio::PrimarySeqI::BEGIN@124 at line 186 | ||
186 | 1 | 29µs | 2 | 408µs | %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); # spent 212µs making 1 call to Bio::Tools::CodonTable::BEGIN@185
# spent 196µs making 1 call to vars::import |
187 | 2 | 28µs | 2 | 40µs | # spent 25µs (10+15) within Bio::Tools::CodonTable::BEGIN@187 which was called:
# once (10µs+15µs) by Bio::PrimarySeqI::BEGIN@124 at line 187 # spent 25µs making 1 call to Bio::Tools::CodonTable::BEGIN@187
# spent 15µs making 1 call to strict::import |
188 | |||||
189 | # Object preamble - inherits from Bio::Root::Root | ||||
190 | 2 | 221µs | 1 | 2.03ms | # spent 2.03ms (1.76+271µs) within Bio::Tools::CodonTable::BEGIN@190 which was called:
# once (1.76ms+271µs) by Bio::PrimarySeqI::BEGIN@124 at line 190 # spent 2.03ms making 1 call to Bio::Tools::CodonTable::BEGIN@190 |
191 | 2 | 205µs | 1 | 6.63ms | # spent 6.63ms (5.35+1.28) within Bio::Tools::CodonTable::BEGIN@191 which was called:
# once (5.35ms+1.28ms) by Bio::PrimarySeqI::BEGIN@124 at line 191 # spent 6.63ms making 1 call to Bio::Tools::CodonTable::BEGIN@191 |
192 | |||||
193 | 2 | 33µs | 2 | 86µs | # spent 86µs (12+73) within Bio::Tools::CodonTable::BEGIN@193 which was called:
# once (12µs+73µs) by Bio::PrimarySeqI::BEGIN@124 at line 193 # spent 86µs making 1 call to Bio::Tools::CodonTable::BEGIN@193
# spent 73µs making 1 call to base::import, recursion: max depth 2, sum of overlapping time 73µs |
194 | |||||
195 | |||||
196 | # first set internal values for all translation tables | ||||
197 | |||||
198 | # spent 361µs (215+146) within Bio::Tools::CodonTable::BEGIN@198 which was called:
# once (215µs+146µs) by Bio::PrimarySeqI::BEGIN@124 at line 298 | ||||
199 | 2 | 264µs | 2 | 116µs | # spent 64µs (10+53) within Bio::Tools::CodonTable::BEGIN@199 which was called:
# once (10µs+53µs) by Bio::PrimarySeqI::BEGIN@124 at line 199 # spent 64µs making 1 call to Bio::Tools::CodonTable::BEGIN@199
# spent 53µs making 1 call to constant::import |
200 | 290 | 164µs | $GAP = '-'; | ||
201 | $CODONGAP = $GAP x CODONSIZE; | ||||
202 | |||||
203 | @NAMES = #id | ||||
204 | ( | ||||
205 | 'Standard', #1 | ||||
206 | 'Vertebrate Mitochondrial',#2 | ||||
207 | 'Yeast Mitochondrial',# 3 | ||||
208 | 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 | ||||
209 | 'Invertebrate Mitochondrial',#5 | ||||
210 | 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 | ||||
211 | '', '', | ||||
212 | 'Echinoderm Mitochondrial',#9 | ||||
213 | 'Euplotid Nuclear',#10 | ||||
214 | '"Bacterial"',# 11 | ||||
215 | 'Alternative Yeast Nuclear',# 12 | ||||
216 | 'Ascidian Mitochondrial',# 13 | ||||
217 | 'Flatworm Mitochondrial',# 14 | ||||
218 | 'Blepharisma Nuclear',# 15 | ||||
219 | 'Chlorophycean Mitochondrial',# 16 | ||||
220 | '', '', '', '', | ||||
221 | 'Trematode Mitochondrial',# 21 | ||||
222 | 'Scenedesmus obliquus Mitochondrial', #22 | ||||
223 | 'Thraustochytrium Mitochondrial', #23 | ||||
224 | 'Strict', #24, option for only ATG start | ||||
225 | ); | ||||
226 | |||||
227 | @TABLES = | ||||
228 | qw( | ||||
229 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
230 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | ||||
231 | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
232 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
233 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | ||||
234 | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
235 | '' '' | ||||
236 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
237 | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
238 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
239 | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
240 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | ||||
241 | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
242 | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
243 | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
244 | '' '' '' '' | ||||
245 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
246 | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
247 | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
248 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
249 | ); | ||||
250 | |||||
251 | # (bases used for these tables, for reference) | ||||
252 | # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG | ||||
253 | # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG | ||||
254 | # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG | ||||
255 | |||||
256 | @STARTS = | ||||
257 | qw( | ||||
258 | ---M---------------M---------------M---------------------------- | ||||
259 | --------------------------------MMMM---------------M------------ | ||||
260 | ----------------------------------MM---------------------------- | ||||
261 | --MM---------------M------------MMMM---------------M------------ | ||||
262 | ---M----------------------------MMMM---------------M------------ | ||||
263 | -----------------------------------M---------------------------- | ||||
264 | '' '' | ||||
265 | -----------------------------------M---------------------------- | ||||
266 | -----------------------------------M---------------------------- | ||||
267 | ---M---------------M------------MMMM---------------M------------ | ||||
268 | -------------------M---------------M---------------------------- | ||||
269 | -----------------------------------M---------------------------- | ||||
270 | -----------------------------------M---------------------------- | ||||
271 | -----------------------------------M---------------------------- | ||||
272 | -----------------------------------M---------------------------- | ||||
273 | '' '' '' '' | ||||
274 | -----------------------------------M---------------M------------ | ||||
275 | -----------------------------------M---------------------------- | ||||
276 | --------------------------------M--M---------------M------------ | ||||
277 | -----------------------------------M---------------------------- | ||||
278 | ); | ||||
279 | |||||
280 | my @nucs = qw(t c a g); | ||||
281 | my $x = 0; | ||||
282 | ($CODONS, $TRCOL) = ({}, {}); | ||||
283 | for my $i (@nucs) { | ||||
284 | for my $j (@nucs) { | ||||
285 | for my $k (@nucs) { | ||||
286 | my $codon = "$i$j$k"; | ||||
287 | $CODONS->{$codon} = $x; | ||||
288 | $TRCOL->{$x} = $codon; | ||||
289 | $x++; | ||||
290 | } | ||||
291 | } | ||||
292 | } | ||||
293 | 1 | 5µs | %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); # spent 5µs making 1 call to Bio::Tools::IUPAC::iupac_iub | ||
294 | 1 | 8µs | %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); # spent 8µs making 1 call to Bio::Tools::IUPAC::iupac_iup | ||
295 | 1 | 51µs | %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); # spent 51µs making 1 call to Bio::SeqUtils::valid_aa | ||
296 | 1 | 82µs | $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']'; # spent 82µs making 1 call to Bio::SeqUtils::valid_aa | ||
297 | $TERMINATOR = '*'; | ||||
298 | 1 | 1.80ms | 1 | 361µs | } # spent 361µs making 1 call to Bio::Tools::CodonTable::BEGIN@198 |
299 | |||||
300 | sub new { | ||||
301 | my($class,@args) = @_; | ||||
302 | my $self = $class->SUPER::new(@args); | ||||
303 | |||||
304 | my($id) = | ||||
305 | $self->_rearrange([qw(ID | ||||
306 | )], | ||||
307 | @args); | ||||
308 | |||||
309 | $id = 1 if ( ! $id ); | ||||
310 | $id && $self->id($id); | ||||
311 | return $self; # success - we hope! | ||||
312 | } | ||||
313 | |||||
314 | =head2 id | ||||
315 | |||||
316 | Title : id | ||||
317 | Usage : $obj->id(3); $id_integer = $obj->id(); | ||||
318 | Function: Sets or returns the id of the translation table. IDs are | ||||
319 | integers from 1 to 15, excluding 7 and 8 which have been | ||||
320 | removed as redundant. If an invalid ID is given the method | ||||
321 | returns 0, false. | ||||
322 | Example : | ||||
323 | Returns : value of id, a scalar, 0 if not a valid | ||||
324 | Args : newvalue (optional) | ||||
325 | |||||
326 | =cut | ||||
327 | |||||
328 | sub id{ | ||||
329 | my ($self,$value) = @_; | ||||
330 | if( defined $value) { | ||||
331 | if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { | ||||
332 | $self->warn("Not a valid codon table ID [$value] "); | ||||
333 | $value = 0; | ||||
334 | } | ||||
335 | $self->{'id'} = $value; | ||||
336 | } | ||||
337 | return $self->{'id'}; | ||||
338 | } | ||||
339 | |||||
340 | =head2 name | ||||
341 | |||||
342 | Title : name | ||||
343 | Usage : $obj->name() | ||||
344 | Function: returns the descriptive name of the translation table | ||||
345 | Example : | ||||
346 | Returns : A string | ||||
347 | Args : None | ||||
348 | |||||
349 | |||||
350 | =cut | ||||
351 | |||||
352 | sub name{ | ||||
353 | my ($self) = @_; | ||||
354 | |||||
355 | my ($id) = $self->{'id'}; | ||||
356 | return $NAMES[$id-1]; | ||||
357 | } | ||||
358 | |||||
359 | =head2 tables | ||||
360 | |||||
361 | Title : tables | ||||
362 | Usage : $obj->tables() or Bio::Tools::CodonTable->tables() | ||||
363 | Function: returns a hash reference where each key is a valid codon | ||||
364 | table id() number, and each value is the corresponding | ||||
365 | codon table name() string | ||||
366 | Example : | ||||
367 | Returns : A hashref | ||||
368 | Args : None | ||||
369 | |||||
370 | |||||
371 | =cut | ||||
372 | |||||
373 | sub tables{ | ||||
374 | my %tables; | ||||
375 | for my $id (1 .. @NAMES) { | ||||
376 | my $name = $NAMES[$id-1]; | ||||
377 | $tables{$id} = $name if $name; | ||||
378 | } | ||||
379 | return \%tables; | ||||
380 | } | ||||
381 | |||||
382 | =head2 translate | ||||
383 | |||||
384 | Title : translate | ||||
385 | Usage : $obj->translate('YTR') | ||||
386 | Function: Returns a string of one letter amino acid codes from | ||||
387 | nucleotide sequence input. The imput can be of any length. | ||||
388 | |||||
389 | Returns 'X' for unknown codons and codons that code for | ||||
390 | more than one amino acid. Returns an empty string if input | ||||
391 | is not three characters long. Exceptions for these are: | ||||
392 | |||||
393 | - IUPAC amino acid code B for Aspartic Acid and | ||||
394 | Asparagine, is used. | ||||
395 | - IUPAC amino acid code Z for Glutamic Acid, Glutamine is | ||||
396 | used. | ||||
397 | - if the codon is two nucleotides long and if by adding | ||||
398 | an a third character 'N', it codes for a single amino | ||||
399 | acid (with exceptions above), return that, otherwise | ||||
400 | return empty string. | ||||
401 | |||||
402 | Returns empty string for other input strings that are not | ||||
403 | three characters long. | ||||
404 | |||||
405 | Example : | ||||
406 | Returns : a string of one letter ambiguous IUPAC amino acid codes | ||||
407 | Args : ambiguous IUPAC nucleotide string | ||||
408 | |||||
409 | |||||
410 | =cut | ||||
411 | |||||
412 | sub translate { | ||||
413 | my ($self, $seq, $complete_codon) = @_; | ||||
414 | $self->throw("Calling translate without a seq argument!") unless defined $seq; | ||||
415 | return '' unless $seq; | ||||
416 | |||||
417 | my $id = $self->id; | ||||
418 | my ($partial) = 0; | ||||
419 | $partial = 2 if length($seq) % CODONSIZE == 2; | ||||
420 | |||||
421 | $seq = lc $seq; | ||||
422 | $seq =~ tr/u/t/; | ||||
423 | my $protein = ""; | ||||
424 | if ($seq =~ /[^actg]/ ) { #ambiguous chars | ||||
425 | for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { | ||||
426 | my $triplet = substr($seq, $i, CODONSIZE); | ||||
427 | if( $triplet eq $CODONGAP ) { | ||||
428 | $protein .= $GAP; | ||||
429 | } elsif (exists $CODONS->{$triplet}) { | ||||
430 | $protein .= substr($TABLES[$id-1], | ||||
431 | $CODONS->{$triplet},1); | ||||
432 | } else { | ||||
433 | $protein .= $self->_translate_ambiguous_codon($triplet); | ||||
434 | } | ||||
435 | } | ||||
436 | } else { # simple, strict translation | ||||
437 | for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { | ||||
438 | my $triplet = substr($seq, $i, CODONSIZE); | ||||
439 | if( $triplet eq $CODONGAP ) { | ||||
440 | $protein .= $GAP; | ||||
441 | } if (exists $CODONS->{$triplet}) { | ||||
442 | $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); | ||||
443 | } else { | ||||
444 | $protein .= 'X'; | ||||
445 | } | ||||
446 | } | ||||
447 | } | ||||
448 | if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides | ||||
449 | my $triplet = substr($seq, ($partial -4)). "n"; | ||||
450 | if( $triplet eq $CODONGAP ) { | ||||
451 | $protein .= $GAP; | ||||
452 | } elsif (exists $CODONS->{$triplet}) { | ||||
453 | my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); | ||||
454 | $protein .= $aa; | ||||
455 | } else { | ||||
456 | $protein .= $self->_translate_ambiguous_codon($triplet, $partial); | ||||
457 | } | ||||
458 | } | ||||
459 | return $protein; | ||||
460 | } | ||||
461 | |||||
462 | sub _translate_ambiguous_codon { | ||||
463 | my ($self, $triplet, $partial) = @_; | ||||
464 | $partial ||= 0; | ||||
465 | my $id = $self->id; | ||||
466 | my $aa; | ||||
467 | my @codons = $self->unambiguous_codons($triplet); | ||||
468 | my %aas =(); | ||||
469 | foreach my $codon (@codons) { | ||||
470 | $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; | ||||
471 | } | ||||
472 | my $count = scalar keys %aas; | ||||
473 | if ( $count == 1 ) { | ||||
474 | $aa = (keys %aas)[0]; | ||||
475 | } | ||||
476 | elsif ( $count == 2 ) { | ||||
477 | if ($aas{'D'} and $aas{'N'}) { | ||||
478 | $aa = 'B'; | ||||
479 | } | ||||
480 | elsif ($aas{'E'} and $aas{'Q'}) { | ||||
481 | $aa = 'Z'; | ||||
482 | } else { | ||||
483 | $partial ? ($aa = '') : ($aa = 'X'); | ||||
484 | } | ||||
485 | } else { | ||||
486 | $partial ? ($aa = '') : ($aa = 'X'); | ||||
487 | } | ||||
488 | return $aa; | ||||
489 | } | ||||
490 | |||||
491 | =head2 translate_strict | ||||
492 | |||||
493 | Title : translate_strict | ||||
494 | Usage : $obj->translate_strict('ACT') | ||||
495 | Function: returns one letter amino acid code for a codon input | ||||
496 | |||||
497 | Fast and simple translation. User is responsible to resolve | ||||
498 | ambiguous nucleotide codes before calling this | ||||
499 | method. Returns 'X' for unknown codons and an empty string | ||||
500 | for input strings that are not three characters long. | ||||
501 | |||||
502 | It is not recommended to use this method in a production | ||||
503 | environment. Use method translate, instead. | ||||
504 | |||||
505 | Example : | ||||
506 | Returns : A string | ||||
507 | Args : a codon = a three nucleotide character string | ||||
508 | |||||
509 | |||||
510 | =cut | ||||
511 | |||||
512 | sub translate_strict{ | ||||
513 | my ($self, $value) = @_; | ||||
514 | my $id = $self->{'id'}; | ||||
515 | |||||
516 | $value = lc $value; | ||||
517 | $value =~ tr/u/t/; | ||||
518 | |||||
519 | return '' unless length $value == 3; | ||||
520 | |||||
521 | return 'X' unless defined $CODONS->{$value}; | ||||
522 | |||||
523 | return substr( $TABLES[$id-1], $CODONS->{$value}, 1 ); | ||||
524 | } | ||||
525 | |||||
526 | =head2 revtranslate | ||||
527 | |||||
528 | Title : revtranslate | ||||
529 | Usage : $obj->revtranslate('G') | ||||
530 | Function: returns codons for an amino acid | ||||
531 | |||||
532 | Returns an empty string for unknown amino acid | ||||
533 | codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and | ||||
534 | Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three | ||||
535 | letter amino acid codes are accepted. '*' and 'Ter' are | ||||
536 | used for terminator. | ||||
537 | |||||
538 | By default, the output codons are shown in DNA. If the | ||||
539 | output is needed in RNA (tr/t/u/), add a second argument | ||||
540 | 'RNA'. | ||||
541 | |||||
542 | Example : $obj->revtranslate('Gly', 'RNA') | ||||
543 | Returns : An array of three lower case letter strings i.e. codons | ||||
544 | Args : amino acid, 'RNA' | ||||
545 | |||||
546 | =cut | ||||
547 | |||||
548 | sub revtranslate { | ||||
549 | my ($self, $value, $coding) = @_; | ||||
550 | my @codons; | ||||
551 | |||||
552 | if (length($value) == 3 ) { | ||||
553 | $value = lc $value; | ||||
554 | $value = ucfirst $value; | ||||
555 | $value = $THREELETTERSYMBOLS{$value}; | ||||
556 | } | ||||
557 | if ( defined $value and $value =~ /$VALID_PROTEIN/ | ||||
558 | and length($value) == 1 ) { | ||||
559 | my $id = $self->{'id'}; | ||||
560 | |||||
561 | $value = uc $value; | ||||
562 | my @aas = @{$IUPAC_AA{$value}}; | ||||
563 | foreach my $aa (@aas) { | ||||
564 | #print $aa, " -2\n"; | ||||
565 | $aa = '\*' if $aa eq '*'; | ||||
566 | while ($TABLES[$id-1] =~ m/$aa/g) { | ||||
567 | my $p = pos $TABLES[$id-1]; | ||||
568 | push (@codons, $TRCOL->{--$p}); | ||||
569 | } | ||||
570 | } | ||||
571 | } | ||||
572 | |||||
573 | if ($coding and uc ($coding) eq 'RNA') { | ||||
574 | for my $i (0..$#codons) { | ||||
575 | $codons[$i] =~ tr/t/u/; | ||||
576 | } | ||||
577 | } | ||||
578 | |||||
579 | return @codons; | ||||
580 | } | ||||
581 | |||||
582 | =head2 reverse_translate_all | ||||
583 | |||||
584 | Title : reverse_translate_all | ||||
585 | Usage : my $iup_str = $cttable->reverse_translate_all($seq_object) | ||||
586 | my $iup_str = $cttable->reverse_translate_all($seq_object, | ||||
587 | $cutable, | ||||
588 | 15); | ||||
589 | Function: reverse translates a protein sequence into IUPAC nucleotide | ||||
590 | sequence. An 'X' in the protein sequence is converted to 'NNN' | ||||
591 | in the nucleotide sequence. | ||||
592 | Returns : a string | ||||
593 | Args : a Bio::PrimarySeqI compatible object (mandatory) | ||||
594 | a Bio::CodonUsage::Table object and a threshold if only | ||||
595 | codons with a relative frequency above the threshold are | ||||
596 | to be considered. | ||||
597 | =cut | ||||
598 | |||||
599 | sub reverse_translate_all { | ||||
600 | |||||
601 | my ($self, $obj, $cut, $threshold) = @_; | ||||
602 | |||||
603 | ## check args are OK | ||||
604 | |||||
605 | if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ | ||||
606 | $self->throw(" I need a Bio::PrimarySeqI object, not a [". | ||||
607 | ref($obj) . "]"); | ||||
608 | } | ||||
609 | if($obj->alphabet ne 'protein') { | ||||
610 | $self->throw("Cannot reverse translate, need an amino acid sequence .". | ||||
611 | "This sequence is of type [" . $obj->alphabet ."]"); | ||||
612 | } | ||||
613 | my @data; | ||||
614 | my @seq = split '', $obj->seq; | ||||
615 | |||||
616 | ## if we're not supplying a codon usage table... | ||||
617 | if( !$cut && !$threshold) { | ||||
618 | ## get lists of possible codons for each aa. | ||||
619 | for my $aa (@seq) { | ||||
620 | if ($aa =~ /x/i) { | ||||
621 | push @data, (['NNN']); | ||||
622 | }else { | ||||
623 | my @cods = $self->revtranslate($aa); | ||||
624 | push @data, \@cods; | ||||
625 | } | ||||
626 | } | ||||
627 | }else{ | ||||
628 | #else we are supplying a codon usage table, we just want common codons | ||||
629 | #check args first. | ||||
630 | if(!$cut->isa('Bio::CodonUsage::Table')) { | ||||
631 | $self->throw("I need a Bio::CodonUsage::Table object, not a [". | ||||
632 | ref($cut). "]."); | ||||
633 | } | ||||
634 | my $cod_ref = $cut->probable_codons($threshold); | ||||
635 | for my $aa (@seq) { | ||||
636 | if ($aa =~ /x/i) { | ||||
637 | push @data, (['NNN']); | ||||
638 | next; | ||||
639 | } | ||||
640 | push @data, $cod_ref->{$aa}; | ||||
641 | } | ||||
642 | } | ||||
643 | |||||
644 | return $self->_make_iupac_string(\@data); | ||||
645 | |||||
646 | } | ||||
647 | |||||
648 | =head2 reverse_translate_best | ||||
649 | |||||
650 | Title : reverse_translate_best | ||||
651 | Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable); | ||||
652 | Function: Reverse translates a protein sequence into plain nucleotide | ||||
653 | sequence (GATC), uses the most common codon for each amino acid | ||||
654 | Returns : A string | ||||
655 | Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object | ||||
656 | |||||
657 | =cut | ||||
658 | |||||
659 | sub reverse_translate_best { | ||||
660 | |||||
661 | my ($self, $obj, $cut) = @_; | ||||
662 | |||||
663 | if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ | ||||
664 | $self->throw(" I need a Bio::PrimarySeqI object, not a [". | ||||
665 | ref($obj) . "]"); | ||||
666 | } | ||||
667 | if ($obj->alphabet ne 'protein') { | ||||
668 | $self->throw("Cannot reverse translate, need an amino acid sequence .". | ||||
669 | "This sequence is of type [" . $obj->alphabet ."]"); | ||||
670 | } | ||||
671 | if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) { | ||||
672 | $self->throw("I need a Bio::CodonUsage::Table object, not a [". | ||||
673 | ref($cut). "]."); | ||||
674 | } | ||||
675 | |||||
676 | my $str = ''; | ||||
677 | my @seq = split '', $obj->seq; | ||||
678 | |||||
679 | my $cod_ref = $cut->most_common_codons(); | ||||
680 | |||||
681 | for my $aa ( @seq ) { | ||||
682 | if ($aa =~ /x/i) { | ||||
683 | $str .= 'NNN'; | ||||
684 | next; | ||||
685 | } | ||||
686 | if ( defined $cod_ref->{$aa} ) { | ||||
687 | $str .= $cod_ref->{$aa}; | ||||
688 | } else { | ||||
689 | $self->throw("Input sequence contains invalid character: $aa"); | ||||
690 | } | ||||
691 | } | ||||
692 | $str; | ||||
693 | } | ||||
694 | |||||
695 | =head2 is_start_codon | ||||
696 | |||||
697 | Title : is_start_codon | ||||
698 | Usage : $obj->is_start_codon('ATG') | ||||
699 | Function: returns true (1) for all codons that can be used as a | ||||
700 | translation start, false (0) for others. | ||||
701 | Example : $myCodonTable->is_start_codon('ATG') | ||||
702 | Returns : boolean | ||||
703 | Args : codon | ||||
704 | |||||
705 | =cut | ||||
706 | |||||
707 | sub is_start_codon{ | ||||
708 | shift->_codon_is( shift, \@STARTS, 'M' ); | ||||
709 | } | ||||
710 | |||||
711 | =head2 is_ter_codon | ||||
712 | |||||
713 | Title : is_ter_codon | ||||
714 | Usage : $obj->is_ter_codon('GAA') | ||||
715 | Function: returns true (1) for all codons that can be used as a | ||||
716 | translation tarminator, false (0) for others. | ||||
717 | Example : $myCodonTable->is_ter_codon('ATG') | ||||
718 | Returns : boolean | ||||
719 | Args : codon | ||||
720 | |||||
721 | =cut | ||||
722 | |||||
723 | sub is_ter_codon{ | ||||
724 | shift->_codon_is( shift, \@TABLES, $TERMINATOR ); | ||||
725 | } | ||||
726 | |||||
727 | # desc: compares the passed value with a single entry in the given | ||||
728 | # codon table | ||||
729 | # args: a value (typically a three-char string like 'atg'), | ||||
730 | # a reference to the appropriate set of codon tables, | ||||
731 | # a single-character value to check for at the position in the | ||||
732 | # given codon table | ||||
733 | # ret: boolean, true if the given codon table contains the $key at the | ||||
734 | # position corresponding to $value | ||||
735 | sub _codon_is { | ||||
736 | my ($self, $value, $table, $key ) = @_; | ||||
737 | |||||
738 | return 0 unless length $value == 3; | ||||
739 | |||||
740 | $value = lc $value; | ||||
741 | $value =~ tr/u/t/; | ||||
742 | |||||
743 | my $id = $self->{'id'}; | ||||
744 | for my $c ( $self->unambiguous_codons($value) ) { | ||||
745 | my $m = substr( $table->[$id-1], $CODONS->{$c}, 1 ); | ||||
746 | return 0 unless $m eq $key; | ||||
747 | } | ||||
748 | return 1; | ||||
749 | } | ||||
750 | |||||
751 | =head2 is_unknown_codon | ||||
752 | |||||
753 | Title : is_unknown_codon | ||||
754 | Usage : $obj->is_unknown_codon('GAJ') | ||||
755 | Function: returns false (0) for all codons that are valid, | ||||
756 | true (1) for others. | ||||
757 | Example : $myCodonTable->is_unknown_codon('NTG') | ||||
758 | Returns : boolean | ||||
759 | Args : codon | ||||
760 | |||||
761 | |||||
762 | =cut | ||||
763 | |||||
764 | sub is_unknown_codon{ | ||||
765 | my ($self, $value) = @_; | ||||
766 | $value = lc $value; | ||||
767 | $value =~ tr/u/t/; | ||||
768 | return 1 unless $self->unambiguous_codons($value); | ||||
769 | return 0; | ||||
770 | } | ||||
771 | |||||
772 | =head2 unambiguous_codons | ||||
773 | |||||
774 | Title : unambiguous_codons | ||||
775 | Usage : @codons = $self->unambiguous_codons('ACN') | ||||
776 | Returns : array of strings (one-letter unambiguous amino acid codes) | ||||
777 | Args : a codon = a three IUPAC nucleotide character string | ||||
778 | |||||
779 | =cut | ||||
780 | |||||
781 | sub unambiguous_codons{ | ||||
782 | my ($self,$value) = @_; | ||||
783 | my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value); | ||||
784 | |||||
785 | my @codons; | ||||
786 | for my $i ( @{$nts[0]} ) { | ||||
787 | for my $j ( @{$nts[1]} ) { | ||||
788 | for my $k ( @{$nts[2]} ) { | ||||
789 | push @codons, lc "$i$j$k"; | ||||
790 | }}} | ||||
791 | return @codons; | ||||
792 | } | ||||
793 | |||||
794 | =head2 _unambiquous_codons | ||||
795 | |||||
796 | deprecated, now an alias for unambiguous_codons | ||||
797 | |||||
798 | =cut | ||||
799 | |||||
800 | sub _unambiquous_codons { | ||||
801 | unambiguous_codons( undef, @_ ); | ||||
802 | } | ||||
803 | |||||
804 | =head2 add_table | ||||
805 | |||||
806 | Title : add_table | ||||
807 | Usage : $newid = $ct->add_table($name, $table, $starts) | ||||
808 | Function: Add a custom Codon Table into the object. | ||||
809 | Know what you are doing, only the length of | ||||
810 | the argument strings is checked! | ||||
811 | Returns : the id of the new codon table | ||||
812 | Args : name, a string, optional (can be empty) | ||||
813 | table, a string of 64 characters | ||||
814 | startcodons, a string of 64 characters, defaults to standard | ||||
815 | |||||
816 | =cut | ||||
817 | |||||
818 | sub add_table { | ||||
819 | my ($self, $name, $table, $starts) = @_; | ||||
820 | |||||
821 | $name ||= 'Custom'. scalar @NAMES + 1; | ||||
822 | $starts ||= $STARTS[0]; | ||||
823 | $self->throw('Suspect input!') | ||||
824 | unless length($table) == 64 and length($starts) == 64; | ||||
825 | |||||
826 | push @NAMES, $name; | ||||
827 | push @TABLES, $table; | ||||
828 | push @STARTS, $starts; | ||||
829 | |||||
830 | return scalar @NAMES; | ||||
831 | |||||
832 | } | ||||
833 | |||||
834 | sub _make_iupac_string { | ||||
835 | |||||
836 | my ($self, $cod_ref) = @_; | ||||
837 | if(ref($cod_ref) ne 'ARRAY') { | ||||
838 | $self->throw(" I need a reference to a list of references to codons, ". | ||||
839 | " not a [". ref($cod_ref) . "]."); | ||||
840 | } | ||||
841 | my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub(); | ||||
842 | my $iupac_string = ''; ## the string to be returned | ||||
843 | for my $aa (@$cod_ref) { | ||||
844 | |||||
845 | ## scan through codon positions, record the differing values, | ||||
846 | # then look up in the iub hash | ||||
847 | for my $index(0..2) { | ||||
848 | my %h; | ||||
849 | map { my $k = substr($_,$index,1); | ||||
850 | $h{$k} = undef;} @$aa; | ||||
851 | my $lookup_key = join '', sort{$a cmp $b}keys %h; | ||||
852 | |||||
853 | ## extend string | ||||
854 | $iupac_string .= $iupac_hash{uc$lookup_key}; | ||||
855 | } | ||||
856 | } | ||||
857 | return $iupac_string; | ||||
858 | |||||
859 | } | ||||
860 | |||||
861 | |||||
862 | 1 | 3µs | 1; |