Filename | /Users/ap13/perl5/lib/perl5/Bio/Seq.pm |
Statements | Executed 12 statements in 1.41ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 12µs | 24µs | BEGIN@459 | Bio::Seq::
1 | 1 | 1 | 10µs | 10µs | BEGIN@462 | Bio::Seq::
1 | 1 | 1 | 8µs | 1.65ms | BEGIN@464 | Bio::Seq::
1 | 1 | 1 | 5µs | 5µs | BEGIN@461 | Bio::Seq::
0 | 0 | 0 | 0s | 0s | DESTROY | Bio::Seq::
0 | 0 | 0 | 0s | 0s | accession | Bio::Seq::
0 | 0 | 0 | 0s | 0s | accession_number | Bio::Seq::
0 | 0 | 0 | 0s | 0s | add_Annotation | Bio::Seq::
0 | 0 | 0 | 0s | 0s | add_SeqFeature | Bio::Seq::
0 | 0 | 0 | 0s | 0s | all_SeqFeatures | Bio::Seq::
0 | 0 | 0 | 0s | 0s | alphabet | Bio::Seq::
0 | 0 | 0 | 0s | 0s | annotation | Bio::Seq::
0 | 0 | 0 | 0s | 0s | authority | Bio::Seq::
0 | 0 | 0 | 0s | 0s | can_call_new | Bio::Seq::
0 | 0 | 0 | 0s | 0s | desc | Bio::Seq::
0 | 0 | 0 | 0s | 0s | description | Bio::Seq::
0 | 0 | 0 | 0s | 0s | display_id | Bio::Seq::
0 | 0 | 0 | 0s | 0s | display_name | Bio::Seq::
0 | 0 | 0 | 0s | 0s | feature_count | Bio::Seq::
0 | 0 | 0 | 0s | 0s | get_Annotations | Bio::Seq::
0 | 0 | 0 | 0s | 0s | get_SeqFeatures | Bio::Seq::
0 | 0 | 0 | 0s | 0s | get_num_of_annotations | Bio::Seq::
0 | 0 | 0 | 0s | 0s | id | Bio::Seq::
0 | 0 | 0 | 0s | 0s | is_circular | Bio::Seq::
0 | 0 | 0 | 0s | 0s | length | Bio::Seq::
0 | 0 | 0 | 0s | 0s | namespace | Bio::Seq::
0 | 0 | 0 | 0s | 0s | new | Bio::Seq::
0 | 0 | 0 | 0s | 0s | num_Annotations | Bio::Seq::
0 | 0 | 0 | 0s | 0s | object_id | Bio::Seq::
0 | 0 | 0 | 0s | 0s | primary_id | Bio::Seq::
0 | 0 | 0 | 0s | 0s | primary_seq | Bio::Seq::
0 | 0 | 0 | 0s | 0s | remove_Annotations | Bio::Seq::
0 | 0 | 0 | 0s | 0s | remove_SeqFeatures | Bio::Seq::
0 | 0 | 0 | 0s | 0s | seq | Bio::Seq::
0 | 0 | 0 | 0s | 0s | species | Bio::Seq::
0 | 0 | 0 | 0s | 0s | subseq | Bio::Seq::
0 | 0 | 0 | 0s | 0s | validate_seq | Bio::Seq::
0 | 0 | 0 | 0s | 0s | version | Bio::Seq::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | # BioPerl module for Bio::Seq | ||||
3 | # | ||||
4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
5 | # | ||||
6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
7 | # | ||||
8 | # Copyright Ewan Birney | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | |||||
12 | # POD documentation - main docs before the code | ||||
13 | |||||
14 | =head1 NAME | ||||
15 | |||||
16 | Bio::Seq - Sequence object, with features | ||||
17 | |||||
18 | =head1 SYNOPSIS | ||||
19 | |||||
20 | # This is the main sequence object in Bioperl | ||||
21 | |||||
22 | # gets a sequence from a file | ||||
23 | $seqio = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat'); | ||||
24 | $seqobj = $seqio->next_seq(); | ||||
25 | |||||
26 | # SeqIO can both read and write sequences; see Bio::SeqIO | ||||
27 | # for more information and examples | ||||
28 | |||||
29 | # get from database | ||||
30 | $db = Bio::DB::GenBank->new(); | ||||
31 | $seqobj = $db->get_Seq_by_acc('X78121'); | ||||
32 | |||||
33 | # make from strings in script | ||||
34 | $seqobj = Bio::Seq->new( -display_id => 'my_id', | ||||
35 | -seq => $sequence_as_string); | ||||
36 | |||||
37 | # gets sequence as a string from sequence object | ||||
38 | $seqstr = $seqobj->seq(); # actual sequence as a string | ||||
39 | $seqstr = $seqobj->subseq(10,50); # slice in biological coordinates | ||||
40 | |||||
41 | # retrieves information from the sequence | ||||
42 | # features must implement Bio::SeqFeatureI interface | ||||
43 | |||||
44 | @features = $seqobj->get_SeqFeatures(); # just top level | ||||
45 | foreach my $feat ( @features ) { | ||||
46 | print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ", | ||||
47 | $feat->end," strand ",$feat->strand,"\n"; | ||||
48 | |||||
49 | # features retain link to underlying sequence object | ||||
50 | print "Feature sequence is ",$feat->seq->seq(),"\n" | ||||
51 | } | ||||
52 | |||||
53 | # sequences may have a species | ||||
54 | |||||
55 | if( defined $seq->species ) { | ||||
56 | print "Sequence is from ",$species->binomial," [",$species->common_name,"]\n"; | ||||
57 | } | ||||
58 | |||||
59 | # annotation objects are Bio::AnnotationCollectionI's | ||||
60 | $ann = $seqobj->annotation(); # annotation object | ||||
61 | |||||
62 | # references is one type of annotations to get. Also get | ||||
63 | # comment and dblink. Look at Bio::AnnotationCollection for | ||||
64 | # more information | ||||
65 | |||||
66 | foreach my $ref ( $ann->get_Annotations('reference') ) { | ||||
67 | print "Reference ",$ref->title,"\n"; | ||||
68 | } | ||||
69 | |||||
70 | # you can get truncations, translations and reverse complements, these | ||||
71 | # all give back Bio::Seq objects themselves, though currently with no | ||||
72 | # features transfered | ||||
73 | |||||
74 | my $trunc = $seqobj->trunc(100,200); | ||||
75 | my $rev = $seqobj->revcom(); | ||||
76 | |||||
77 | # there are many options to translate - check out the docs | ||||
78 | my $trans = $seqobj->translate(); | ||||
79 | |||||
80 | # these functions can be chained together | ||||
81 | |||||
82 | my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate(); | ||||
83 | |||||
- - | |||||
86 | =head1 DESCRIPTION | ||||
87 | |||||
88 | A Seq object is a sequence with sequence features placed on it. The | ||||
89 | Seq object contains a PrimarySeq object for the actual sequence and | ||||
90 | also implements its interface. | ||||
91 | |||||
92 | In Bioperl we have 3 main players that people are going to use frequently | ||||
93 | |||||
94 | Bio::PrimarySeq - just the sequence and its names, nothing else. | ||||
95 | Bio::SeqFeatureI - a feature on a sequence, potentially with a sequence | ||||
96 | and a location and annotation. | ||||
97 | Bio::Seq - A sequence and a collection of sequence features | ||||
98 | (an aggregate) with its own annotation. | ||||
99 | |||||
100 | Although Bioperl is not tied heavily to file formats these distinctions do | ||||
101 | map to file formats sensibly and for some bioinformaticians this might help | ||||
102 | |||||
103 | Bio::PrimarySeq - Fasta file of a sequence | ||||
104 | Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table | ||||
105 | Bio::Seq - A single EMBL/GenBank/DDBJ entry | ||||
106 | |||||
107 | By having this split we avoid a lot of nasty circular references | ||||
108 | (sequence features can hold a reference to a sequence without the sequence | ||||
109 | holding a reference to the sequence feature). See L<Bio::PrimarySeq> and | ||||
110 | L<Bio::SeqFeatureI> for more information. | ||||
111 | |||||
112 | Ian Korf really helped in the design of the Seq and SeqFeature system. | ||||
113 | |||||
114 | =head2 Examples | ||||
115 | |||||
116 | A simple and fundamental block of code: | ||||
117 | |||||
118 | use Bio::SeqIO; | ||||
119 | |||||
120 | my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object | ||||
121 | my $seqobj = $seqIOobj->next_seq; # get a Seq object | ||||
122 | |||||
123 | With the Seq object in hand one has access to a powerful set of Bioperl | ||||
124 | methods and related Bioperl objects. This next script will take a file of sequences | ||||
125 | in EMBL format and create a file of the reverse-complemented sequences | ||||
126 | in Fasta format using Seq objects. It also prints out details about the | ||||
127 | exons it finds as sequence features in Genbank Flat File format. | ||||
128 | |||||
129 | use Bio::Seq; | ||||
130 | use Bio::SeqIO; | ||||
131 | |||||
132 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
133 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
134 | |||||
135 | while((my $seqobj = $seqin->next_seq())) { | ||||
136 | print "Seen sequence ",$seqobj->display_id,", start of seq ", | ||||
137 | substr($seqobj->seq,1,10),"\n"; | ||||
138 | if( $seqobj->alphabet eq 'dna') { | ||||
139 | $rev = $seqobj->revcom; | ||||
140 | $id = $seqobj->display_id(); | ||||
141 | $id = "$id.rev"; | ||||
142 | $rev->display_id($id); | ||||
143 | $seqout->write_seq($rev); | ||||
144 | } | ||||
145 | |||||
146 | foreach $feat ( $seqobj->get_SeqFeatures() ) { | ||||
147 | if( $feat->primary_tag eq 'exon' ) { | ||||
148 | print STDOUT "Location ",$feat->start,":", | ||||
149 | $feat->end," GFF[",$feat->gff_string,"]\n"; | ||||
150 | } | ||||
151 | } | ||||
152 | } | ||||
153 | |||||
154 | Let's examine the script. The lines below import the Bioperl modules. | ||||
155 | Seq is the main Bioperl sequence object and SeqIO is the Bioperl support | ||||
156 | for reading sequences from files and to files | ||||
157 | |||||
158 | use Bio::Seq; | ||||
159 | use Bio::SeqIO; | ||||
160 | |||||
161 | These two lines create two SeqIO streams: one for reading in sequences | ||||
162 | and one for outputting sequences: | ||||
163 | |||||
164 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
165 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
166 | |||||
167 | Notice that in the "$seqout" case there is a greater-than sign, | ||||
168 | indicating the file is being opened for writing. | ||||
169 | |||||
170 | Using the | ||||
171 | |||||
172 | '-argument' => value | ||||
173 | |||||
174 | syntax is common in Bioperl. The file argument is like an argument | ||||
175 | to open() . You can also pass in filehandles or FileHandle objects by | ||||
176 | using the -fh argument (see L<Bio::SeqIO> documentation for details). | ||||
177 | Many formats in Bioperl are handled, including Fasta, EMBL, GenBank, | ||||
178 | Swissprot (swiss), PIR, and GCG. | ||||
179 | |||||
180 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
181 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
182 | |||||
183 | This is the main loop which will loop progressively through sequences | ||||
184 | in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq | ||||
185 | object from the file: | ||||
186 | |||||
187 | while((my $seqobj = $seqio->next_seq())) { | ||||
188 | |||||
189 | This print line below accesses fields in the Seq object directly. The | ||||
190 | $seqobj-E<gt>display_id is the way to access the display_id attribute | ||||
191 | of the Seq object. The $seqobj-E<gt>seq method gets the actual | ||||
192 | sequence out as string. Then you can do manipulation of this if | ||||
193 | you want to (there are however easy ways of doing truncation, | ||||
194 | reverse-complement and translation). | ||||
195 | |||||
196 | print "Seen sequence ",$seqobj->display_id,", start of seq ", | ||||
197 | substr($seqobj->seq,1,10),"\n"; | ||||
198 | |||||
199 | Bioperl has to guess the alphabet of the sequence, being either 'dna', | ||||
200 | 'rna', or 'protein'. The alphabet attribute is one of these three | ||||
201 | possibilities. | ||||
202 | |||||
203 | if( $seqobj->alphabet eq 'dna') { | ||||
204 | |||||
205 | The $seqobj-E<gt>revcom method provides the reverse complement of the Seq | ||||
206 | object as another Seq object. Thus, the $rev variable is a reference to | ||||
207 | another Seq object. For example, one could repeat the above print line | ||||
208 | for this Seq object (putting $rev in place of $seqobj). In this | ||||
209 | case we are going to output the object into the file stream we built | ||||
210 | earlier on. | ||||
211 | |||||
212 | $rev = $seqobj->revcom; | ||||
213 | |||||
214 | When we output it, we want the id of the outputted object | ||||
215 | to be changed to "$id.rev", ie, with .rev on the end of the name. The | ||||
216 | following lines retrieve the id of the sequence object, add .rev | ||||
217 | to this and then set the display_id of the rev sequence object to | ||||
218 | this. Notice that to set the display_id attribute you just need | ||||
219 | call the same method, display_id(), with the new value as an argument. | ||||
220 | Getting and setting values with the same method is common in Bioperl. | ||||
221 | |||||
222 | $id = $seqobj->display_id(); | ||||
223 | $id = "$id.rev"; | ||||
224 | $rev->display_id($id); | ||||
225 | |||||
226 | The write_seq method on the SeqIO output object, $seqout, writes the | ||||
227 | $rev object to the filestream we built at the top of the script. | ||||
228 | The filestream knows that it is outputting in fasta format, and | ||||
229 | so it provides fasta output. | ||||
230 | |||||
231 | $seqout->write_seq($rev); | ||||
232 | |||||
233 | This block of code loops over sequence features in the sequence | ||||
234 | object, trying to find ones who have been tagged as 'exon'. | ||||
235 | Features have start and end attributes and can be outputted | ||||
236 | in Genbank Flat File format, GFF, a standarized format for sequence | ||||
237 | features. | ||||
238 | |||||
239 | foreach $feat ( $seqobj->get_SeqFeatures() ) { | ||||
240 | if( $feat->primary_tag eq 'exon' ) { | ||||
241 | print STDOUT "Location ",$feat->start,":", | ||||
242 | $feat->end," GFF[",$feat->gff_string,"]\n"; | ||||
243 | } | ||||
244 | } | ||||
245 | |||||
246 | The code above shows how a few Bio::Seq methods suffice to read, parse, | ||||
247 | reformat and analyze sequences from a file. A full list of methods | ||||
248 | available to Bio::Seq objects is shown below. Bear in mind that some of | ||||
249 | these methods come from PrimarySeq objects, which are simpler | ||||
250 | than Seq objects, stripped of features (see L<Bio::PrimarySeq> for | ||||
251 | more information). | ||||
252 | |||||
253 | # these methods return strings, and accept strings in some cases: | ||||
254 | |||||
255 | $seqobj->seq(); # string of sequence | ||||
256 | $seqobj->subseq(5,10); # part of the sequence as a string | ||||
257 | $seqobj->accession_number(); # when there, the accession number | ||||
258 | $seqobj->alphabet(); # one of 'dna','rna',or 'protein' | ||||
259 | $seqobj->version() # when there, the version | ||||
260 | $seqobj->keywords(); # when there, the Keywords line | ||||
261 | $seqobj->length() # length | ||||
262 | $seqobj->desc(); # description | ||||
263 | $seqobj->primary_id(); # a unique id for this sequence regardless | ||||
264 | # of its display_id or accession number | ||||
265 | $seqobj->display_id(); # the human readable id of the sequence | ||||
266 | |||||
267 | Some of these values map to fields in common formats. For example, The | ||||
268 | display_id() method returns the LOCUS name of a Genbank entry, | ||||
269 | the (\S+) following the E<gt> character in a Fasta file, the ID from | ||||
270 | a SwissProt file, and so on. The desc() method will return the DEFINITION | ||||
271 | line of a Genbank file, the description following the display_id in a | ||||
272 | Fasta file, and the DE field in a SwissProt file. | ||||
273 | |||||
274 | # the following methods return new Seq objects, but | ||||
275 | # do not transfer features across to the new object: | ||||
276 | |||||
277 | $seqobj->trunc(5,10) # truncation from 5 to 10 as new object | ||||
278 | $seqobj->revcom # reverse complements sequence | ||||
279 | $seqobj->translate # translation of the sequence | ||||
280 | |||||
281 | # if new() can be called this method returns 1, else 0 | ||||
282 | |||||
283 | $seqobj->can_call_new | ||||
284 | |||||
285 | # the following method determines if the given string will be accepted | ||||
286 | # by the seq() method - if the string is acceptable then validate() | ||||
287 | # returns 1, or 0 if not | ||||
288 | |||||
289 | $seqobj->validate_seq($string) | ||||
290 | |||||
291 | # the following method returns or accepts a Species object: | ||||
292 | |||||
293 | $seqobj->species(); | ||||
294 | |||||
295 | Please see L<Bio::Species> for more information on this object. | ||||
296 | |||||
297 | # the following method returns or accepts an Annotation object | ||||
298 | # which in turn allows access to Annotation::Reference | ||||
299 | # and Annotation::Comment objects: | ||||
300 | |||||
301 | $seqobj->annotation(); | ||||
302 | |||||
303 | These annotations typically refer to entire sequences, unlike | ||||
304 | features. See L<Bio::AnnotationCollectionI>, | ||||
305 | L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and | ||||
306 | L<Bio::Annotation::Comment> for details. | ||||
307 | |||||
308 | It is also important to be able to describe defined portions of a | ||||
309 | sequence. The combination of some description and the corresponding | ||||
310 | sub-sequence is called a feature - an exon and its coordinates within | ||||
311 | a gene is an example of a feature, or a domain within a protein. | ||||
312 | |||||
313 | # the following methods return an array of SeqFeatureI objects: | ||||
314 | |||||
315 | $seqobj->get_SeqFeatures # The 'top level' sequence features | ||||
316 | $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq | ||||
317 | # features, such as features in an exon | ||||
318 | |||||
319 | # to find out the number of features use: | ||||
320 | |||||
321 | $seqobj->feature_count | ||||
322 | |||||
323 | Here are just some of the methods available to SeqFeatureI objects: | ||||
324 | |||||
325 | # these methods return numbers: | ||||
326 | |||||
327 | $feat->start # start position (1 is the first base) | ||||
328 | $feat->end # end position (2 is the second base) | ||||
329 | $feat->strand # 1 means forward, -1 reverse, 0 not relevant | ||||
330 | |||||
331 | # these methods return or accept strings: | ||||
332 | |||||
333 | $feat->primary_tag # the name of the sequence feature, eg | ||||
334 | # 'exon', 'glycoslyation site', 'TM domain' | ||||
335 | $feat->source_tag # where the feature comes from, eg, 'EMBL_GenBank', | ||||
336 | # or 'BLAST' | ||||
337 | |||||
338 | # this method returns the more austere PrimarySeq object, not a | ||||
339 | # Seq object - the main difference is that PrimarySeq objects do not | ||||
340 | # themselves contain sequence features | ||||
341 | |||||
342 | $feat->seq # the sequence between start,end on the | ||||
343 | # correct strand of the sequence | ||||
344 | |||||
345 | See L<Bio::PrimarySeq> for more details on PrimarySeq objects. | ||||
346 | |||||
347 | # useful methods for feature comparisons, for start/end points | ||||
348 | |||||
349 | $feat->overlaps($other) # do $feat and $other overlap? | ||||
350 | $feat->contains($other) # is $other completely within $feat? | ||||
351 | $feat->equals($other) # do $feat and $other completely agree? | ||||
352 | |||||
353 | # one can also add features | ||||
354 | |||||
355 | $seqobj->add_SeqFeature($feat) # returns 1 if successful | ||||
356 | |||||
357 | # sub features. For complex join() statements, the feature | ||||
358 | # is one sequence feature with many sub SeqFeatures | ||||
359 | |||||
360 | $feat->sub_SeqFeature # returns array of sub seq features | ||||
361 | |||||
362 | Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>, | ||||
363 | for more information on sequence features. | ||||
364 | |||||
365 | It is worth mentioning that one can also retrieve the start and end | ||||
366 | positions of a feature using a Bio::LocationI object: | ||||
367 | |||||
368 | $location = $feat->location # $location is a Bio::LocationI object | ||||
369 | $location->start; # start position | ||||
370 | $location->end; # end position | ||||
371 | |||||
372 | This is useful because one needs a Bio::Location::SplitLocationI object | ||||
373 | in order to retrieve the coordinates inside the Genbank or EMBL join() | ||||
374 | statements (e.g. "CDS join(51..142,273..495,1346..1474)"): | ||||
375 | |||||
376 | if ( $feat->location->isa('Bio::Location::SplitLocationI') && | ||||
377 | $feat->primary_tag eq 'CDS' ) { | ||||
378 | foreach $loc ( $feat->location->sub_Location ) { | ||||
379 | print $loc->start . ".." . $loc->end . "\n"; | ||||
380 | } | ||||
381 | } | ||||
382 | |||||
383 | See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more | ||||
384 | information. | ||||
385 | |||||
386 | =head1 Implemented Interfaces | ||||
387 | |||||
388 | This class implements the following interfaces. | ||||
389 | |||||
390 | =over 4 | ||||
391 | |||||
392 | =item Bio::SeqI | ||||
393 | |||||
394 | Note that this includes implementing Bio::PrimarySeqI. | ||||
395 | |||||
396 | =item Bio::IdentifiableI | ||||
397 | |||||
398 | =item Bio::DescribableI | ||||
399 | |||||
400 | =item Bio::AnnotatableI | ||||
401 | |||||
402 | =item Bio::FeatureHolderI | ||||
403 | |||||
404 | =back | ||||
405 | |||||
406 | =head1 FEEDBACK | ||||
407 | |||||
408 | |||||
409 | =head2 Mailing Lists | ||||
410 | |||||
411 | User feedback is an integral part of the evolution of this and other | ||||
412 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
413 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
414 | |||||
415 | bioperl-l@bioperl.org - General discussion | ||||
416 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
417 | |||||
418 | =head2 Support | ||||
419 | |||||
420 | Please direct usage questions or support issues to the mailing list: | ||||
421 | |||||
422 | I<bioperl-l@bioperl.org> | ||||
423 | |||||
424 | rather than to the module maintainer directly. Many experienced and | ||||
425 | reponsive experts will be able look at the problem and quickly | ||||
426 | address it. Please include a thorough description of the problem | ||||
427 | with code and data examples if at all possible. | ||||
428 | |||||
429 | =head2 Reporting Bugs | ||||
430 | |||||
431 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
432 | the bugs and their resolution. Bug reports can be submitted via the | ||||
433 | web: | ||||
434 | |||||
435 | https://github.com/bioperl/bioperl-live/issues | ||||
436 | |||||
437 | =head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects | ||||
438 | |||||
439 | Email birney@ebi.ac.uk | ||||
440 | |||||
441 | =head1 CONTRIBUTORS | ||||
442 | |||||
443 | Jason Stajich E<lt>jason@bioperl.orgE<gt> | ||||
444 | Mark A. Jensen maj -at- fortinbras -dot- us | ||||
445 | |||||
446 | =head1 APPENDIX | ||||
447 | |||||
448 | |||||
449 | The rest of the documentation details each of the object | ||||
450 | methods. Internal methods are usually preceded with a "_". | ||||
451 | |||||
452 | =cut | ||||
453 | |||||
454 | #' | ||||
455 | # Let the code begin... | ||||
456 | |||||
457 | |||||
458 | package Bio::Seq; | ||||
459 | 2 | 22µs | 2 | 36µs | # spent 24µs (12+12) within Bio::Seq::BEGIN@459 which was called:
# once (12µs+12µs) by Bio::DB::InMemoryCache::BEGIN@75 at line 459 # spent 24µs making 1 call to Bio::Seq::BEGIN@459
# spent 12µs making 1 call to strict::import |
460 | |||||
461 | 2 | 20µs | 1 | 5µs | # spent 5µs within Bio::Seq::BEGIN@461 which was called:
# once (5µs+0s) by Bio::DB::InMemoryCache::BEGIN@75 at line 461 # spent 5µs making 1 call to Bio::Seq::BEGIN@461 |
462 | 2 | 31µs | 1 | 10µs | # spent 10µs within Bio::Seq::BEGIN@462 which was called:
# once (10µs+0s) by Bio::DB::InMemoryCache::BEGIN@75 at line 462 # spent 10µs making 1 call to Bio::Seq::BEGIN@462 |
463 | |||||
464 | 2 | 1.32ms | 2 | 1.65ms | # spent 1.65ms (8µs+1.64) within Bio::Seq::BEGIN@464 which was called:
# once (8µs+1.64ms) by Bio::DB::InMemoryCache::BEGIN@75 at line 464 # spent 1.65ms making 1 call to Bio::Seq::BEGIN@464
# spent 1.64ms making 1 call to base::import, recursion: max depth 1, sum of overlapping time 1.64ms |
465 | |||||
466 | =head2 new | ||||
467 | |||||
468 | Title : new | ||||
469 | Usage : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | ||||
470 | -id => 'human_id', | ||||
471 | -accession_number => 'AL000012', | ||||
472 | ); | ||||
473 | |||||
474 | Function: Returns a new Seq object from | ||||
475 | basic constructors, being a string for the sequence | ||||
476 | and strings for id and accession_number | ||||
477 | Returns : a new Bio::Seq object | ||||
478 | |||||
479 | =cut | ||||
480 | |||||
481 | sub new { | ||||
482 | my($caller,@args) = @_; | ||||
483 | |||||
484 | if( $caller ne 'Bio::Seq') { | ||||
485 | $caller = ref($caller) if ref($caller); | ||||
486 | } | ||||
487 | |||||
488 | # we know our inherietance hierarchy | ||||
489 | my $self = Bio::Root::Root->new(@args); | ||||
490 | bless $self,$caller; | ||||
491 | |||||
492 | # this is way too sneaky probably. We delegate the construction of | ||||
493 | # the Seq object onto PrimarySeq and then pop primary_seq into | ||||
494 | # our primary_seq slot | ||||
495 | |||||
496 | my $pseq = Bio::PrimarySeq->new(@args); | ||||
497 | |||||
498 | # as we have just made this, we know it is ok to set hash directly | ||||
499 | # rather than going through the method | ||||
500 | |||||
501 | $self->{'primary_seq'} = $pseq; | ||||
502 | |||||
503 | # setting this array is now delayed until the final | ||||
504 | # moment, again speed ups for non feature containing things | ||||
505 | # $self->{'_as_feat'} = []; | ||||
506 | |||||
507 | |||||
508 | my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args); | ||||
509 | |||||
510 | # for a number of cases - reading fasta files - these are never set. This | ||||
511 | # gives a quick optimisation around testing things later on | ||||
512 | |||||
513 | if( defined $ann || defined $pid || defined $feat || defined $species ) { | ||||
514 | $pid && $self->primary_id($pid); | ||||
515 | $species && $self->species($species); | ||||
516 | $ann && $self->annotation($ann); | ||||
517 | |||||
518 | if( defined $feat ) { | ||||
519 | if( ref($feat) !~ /ARRAY/i ) { | ||||
520 | if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) { | ||||
521 | $self->add_SeqFeature($feat); | ||||
522 | } else { | ||||
523 | $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self)); | ||||
524 | } | ||||
525 | } else { | ||||
526 | foreach my $feature ( @$feat ) { | ||||
527 | $self->add_SeqFeature($feature); | ||||
528 | } | ||||
529 | } | ||||
530 | } | ||||
531 | } | ||||
532 | |||||
533 | return $self; | ||||
534 | } | ||||
535 | |||||
536 | |||||
537 | =head1 PrimarySeq interface | ||||
538 | |||||
539 | |||||
540 | The PrimarySeq interface provides the basic sequence getting | ||||
541 | and setting methods for on all sequences. | ||||
542 | |||||
543 | These methods implement the Bio::PrimarySeq interface by delegating | ||||
544 | to the primary_seq inside the object. This means that you | ||||
545 | can use a Seq object wherever there is a PrimarySeq, and | ||||
546 | of course, you are free to use these functions anyway. | ||||
547 | |||||
548 | =cut | ||||
549 | |||||
550 | =head2 seq | ||||
551 | |||||
552 | Title : seq | ||||
553 | Usage : $string = $obj->seq() | ||||
554 | Function: Get/Set the sequence as a string of letters. The | ||||
555 | case of the letters is left up to the implementer. | ||||
556 | Suggested cases are upper case for proteins and lower case for | ||||
557 | DNA sequence (IUPAC standard), | ||||
558 | but implementations are suggested to keep an open mind about | ||||
559 | case (some users... want mixed case!) | ||||
560 | Returns : A scalar | ||||
561 | Args : Optionally on set the new value (a string). An optional second | ||||
562 | argument presets the alphabet (otherwise it will be guessed). | ||||
563 | Both parameters may also be given in named parameter style | ||||
564 | with -seq and -alphabet being the names. | ||||
565 | |||||
566 | =cut | ||||
567 | |||||
568 | sub seq { | ||||
569 | return shift->primary_seq()->seq(@_); | ||||
570 | } | ||||
571 | |||||
572 | |||||
573 | =head2 validate_seq | ||||
574 | |||||
575 | Title : validate_seq | ||||
576 | Usage : if(! $seqobj->validate_seq($seq_str) ) { | ||||
577 | print "sequence $seq_str is not valid for an object of | ||||
578 | alphabet ",$seqobj->alphabet, "\n"; | ||||
579 | } | ||||
580 | Function: Test that the given sequence is valid, i.e. contains only valid | ||||
581 | characters. The allowed characters are all letters (A-Z) and '-','.', | ||||
582 | '*','?','=' and '~'. Spaces are not valid. Note that this | ||||
583 | implementation does not take alphabet() into account. | ||||
584 | Returns : 1 if the supplied sequence string is valid, 0 otherwise. | ||||
585 | Args : - Sequence string to be validated | ||||
586 | - Boolean to throw an error if the sequence is invalid | ||||
587 | |||||
588 | =cut | ||||
589 | |||||
590 | sub validate_seq { | ||||
591 | return shift->primary_seq()->validate_seq(@_); | ||||
592 | } | ||||
593 | |||||
594 | |||||
595 | =head2 length | ||||
596 | |||||
597 | Title : length | ||||
598 | Usage : $len = $seq->length() | ||||
599 | Function: | ||||
600 | Example : | ||||
601 | Returns : Integer representing the length of the sequence. | ||||
602 | Args : None | ||||
603 | |||||
604 | =cut | ||||
605 | |||||
606 | sub length { | ||||
607 | return shift->primary_seq()->length(@_); | ||||
608 | } | ||||
609 | |||||
610 | |||||
611 | =head1 Methods from the Bio::PrimarySeqI interface | ||||
612 | |||||
613 | =head2 subseq | ||||
614 | |||||
615 | Title : subseq | ||||
616 | Usage : $substring = $obj->subseq(10,40); | ||||
617 | Function: Returns the subseq from start to end, where the first base | ||||
618 | is 1 and the number is inclusive, ie 1-2 are the first two | ||||
619 | bases of the sequence | ||||
620 | |||||
621 | Start cannot be larger than end but can be equal | ||||
622 | |||||
623 | Returns : A string | ||||
624 | Args : 2 integers | ||||
625 | |||||
626 | |||||
627 | =cut | ||||
628 | |||||
629 | sub subseq { | ||||
630 | return shift->primary_seq()->subseq(@_); | ||||
631 | } | ||||
632 | |||||
633 | |||||
634 | =head2 display_id | ||||
635 | |||||
636 | Title : display_id | ||||
637 | Usage : $id = $obj->display_id or $obj->display_id($newid); | ||||
638 | Function: Gets or sets the display id, also known as the common name of | ||||
639 | the Seq object. | ||||
640 | |||||
641 | The semantics of this is that it is the most likely string | ||||
642 | to be used as an identifier of the sequence, and likely to | ||||
643 | have "human" readability. The id is equivalent to the LOCUS | ||||
644 | field of the GenBank/EMBL databanks and the ID field of the | ||||
645 | Swissprot/sptrembl database. In fasta format, the >(\S+) is | ||||
646 | presumed to be the id, though some people overload the id | ||||
647 | to embed other information. Bioperl does not use any | ||||
648 | embedded information in the ID field, and people are | ||||
649 | encouraged to use other mechanisms (accession field for | ||||
650 | example, or extending the sequence object) to solve this. | ||||
651 | |||||
652 | Notice that $seq->id() maps to this function, mainly for | ||||
653 | legacy/convenience issues. | ||||
654 | Returns : A string | ||||
655 | Args : None or a new id | ||||
656 | |||||
657 | =cut | ||||
658 | |||||
659 | sub display_id { | ||||
660 | return shift->primary_seq->display_id(@_); | ||||
661 | } | ||||
662 | |||||
663 | |||||
664 | =head2 accession_number | ||||
665 | |||||
666 | Title : accession_number | ||||
667 | Usage : $unique_biological_key = $obj->accession_number; | ||||
668 | Function: Returns the unique biological id for a sequence, commonly | ||||
669 | called the accession_number. For sequences from established | ||||
670 | databases, the implementors should try to use the correct | ||||
671 | accession number. Notice that primary_id() provides the | ||||
672 | unique id for the implemetation, allowing multiple objects | ||||
673 | to have the same accession number in a particular implementation. | ||||
674 | |||||
675 | For sequences with no accession number, this method should return | ||||
676 | "unknown". | ||||
677 | |||||
678 | Can also be used to set the accession number. | ||||
679 | Example : $key = $seq->accession_number or $seq->accession_number($key) | ||||
680 | Returns : A string | ||||
681 | Args : None or an accession number | ||||
682 | |||||
683 | =cut | ||||
684 | |||||
685 | sub accession_number { | ||||
686 | return shift->primary_seq->accession_number(@_); | ||||
687 | } | ||||
688 | |||||
689 | |||||
690 | =head2 desc | ||||
691 | |||||
692 | Title : desc | ||||
693 | Usage : $seqobj->desc($string) or $seqobj->desc() | ||||
694 | Function: Sets or gets the description of the sequence | ||||
695 | Example : | ||||
696 | Returns : The description | ||||
697 | Args : The description or none | ||||
698 | |||||
699 | =cut | ||||
700 | |||||
701 | sub desc { | ||||
702 | return shift->primary_seq->desc(@_); | ||||
703 | } | ||||
704 | |||||
705 | |||||
706 | =head2 primary_id | ||||
707 | |||||
708 | Title : primary_id | ||||
709 | Usage : $unique_implementation_key = $obj->primary_id; | ||||
710 | Function: Returns the unique id for this object in this | ||||
711 | implementation. This allows implementations to manage | ||||
712 | their own object ids in a way the implementation can control | ||||
713 | clients can expect one id to map to one object. | ||||
714 | |||||
715 | For sequences with no natural id, this method should return | ||||
716 | a stringified memory location. | ||||
717 | |||||
718 | Can also be used to set the primary_id (or unset to undef). | ||||
719 | |||||
720 | [Note this method name is likely to change in 1.3] | ||||
721 | |||||
722 | Example : $id = $seq->primary_id or $seq->primary_id($id) | ||||
723 | Returns : A string | ||||
724 | Args : None or an id, or undef to unset the primary id. | ||||
725 | |||||
726 | =cut | ||||
727 | |||||
728 | sub primary_id { | ||||
729 | # Note: this used to not delegate to the primary seq. This is | ||||
730 | # really bad in very subtle ways. E.g., if you created the object | ||||
731 | # with a primary id given to the constructor and then later you | ||||
732 | # change the primary id, if this method wouldn't delegate you'd | ||||
733 | # have different values for primary id in the PrimarySeq object | ||||
734 | # compared to this instance. Not good. | ||||
735 | |||||
736 | # I can't remember why not delegating was ever deemed | ||||
737 | # advantageous, but I hereby claim that its problems far outweigh | ||||
738 | # its advantages, if there are any. Convince me otherwise if you | ||||
739 | # disagree. HL 2004/08/05 | ||||
740 | |||||
741 | return shift->primary_seq->primary_id(@_); | ||||
742 | } | ||||
743 | |||||
744 | |||||
745 | =head2 can_call_new | ||||
746 | |||||
747 | Title : can_call_new | ||||
748 | Usage : if ( $obj->can_call_new ) { | ||||
749 | $newobj = $obj->new( %param ); | ||||
750 | } | ||||
751 | Function: can_call_new returns 1 or 0 depending | ||||
752 | on whether an implementation allows new | ||||
753 | constructor to be called. If a new constructor | ||||
754 | is allowed, then it should take the followed hashed | ||||
755 | constructor list. | ||||
756 | |||||
757 | $myobject->new( -seq => $sequence_as_string, | ||||
758 | -display_id => $id | ||||
759 | -accession_number => $accession | ||||
760 | -alphabet => 'dna', | ||||
761 | ); | ||||
762 | Example : | ||||
763 | Returns : 1 or 0 | ||||
764 | Args : None | ||||
765 | |||||
766 | =cut | ||||
767 | |||||
768 | sub can_call_new { | ||||
769 | return 1; | ||||
770 | } | ||||
771 | |||||
772 | |||||
773 | =head2 alphabet | ||||
774 | |||||
775 | Title : alphabet | ||||
776 | Usage : if ( $obj->alphabet eq 'dna' ) { /Do Something/ } | ||||
777 | Function: Get/Set the type of sequence being one of | ||||
778 | 'dna', 'rna' or 'protein'. This is case sensitive. | ||||
779 | |||||
780 | This is not called <type> because this would cause | ||||
781 | upgrade problems from the 0.5 and earlier Seq objects. | ||||
782 | |||||
783 | Returns : A string either 'dna','rna','protein'. NB - the object must | ||||
784 | make a call of the type - if there is no type specified it | ||||
785 | has to guess. | ||||
786 | Args : optional string to set : 'dna' | 'rna' | 'protein' | ||||
787 | |||||
788 | =cut | ||||
789 | |||||
790 | sub alphabet { | ||||
791 | my $self = shift; | ||||
792 | return $self->primary_seq->alphabet(@_) if @_ && defined $_[0]; | ||||
793 | return $self->primary_seq->alphabet(); | ||||
794 | } | ||||
795 | |||||
796 | |||||
797 | =head2 is_circular | ||||
798 | |||||
799 | Title : is_circular | ||||
800 | Usage : if( $obj->is_circular) { /Do Something/ } | ||||
801 | Function: Returns true if the molecule is circular | ||||
802 | Returns : Boolean value | ||||
803 | Args : none | ||||
804 | |||||
805 | =cut | ||||
806 | |||||
807 | sub is_circular { | ||||
808 | return shift->primary_seq()->is_circular(@_); | ||||
809 | } | ||||
810 | |||||
811 | |||||
812 | =head1 Methods for Bio::IdentifiableI compliance | ||||
813 | |||||
814 | =head2 object_id | ||||
815 | |||||
816 | Title : object_id | ||||
817 | Usage : $string = $obj->object_id() | ||||
818 | Function: a string which represents the stable primary identifier | ||||
819 | in this namespace of this object. For DNA sequences this | ||||
820 | is its accession_number, similarly for protein sequences | ||||
821 | |||||
822 | This is aliased to accession_number(). | ||||
823 | Returns : A scalar | ||||
824 | |||||
825 | =cut | ||||
826 | |||||
827 | sub object_id { | ||||
828 | return shift->accession_number(@_); | ||||
829 | } | ||||
830 | |||||
831 | |||||
832 | =head2 version | ||||
833 | |||||
834 | Title : version | ||||
835 | Usage : $version = $obj->version() | ||||
836 | Function: a number which differentiates between versions of | ||||
837 | the same object. Higher numbers are considered to be | ||||
838 | later and more relevant, but a single object described | ||||
839 | the same identifier should represent the same concept | ||||
840 | |||||
841 | Returns : A number | ||||
842 | |||||
843 | =cut | ||||
844 | |||||
845 | sub version{ | ||||
846 | return shift->primary_seq->version(@_); | ||||
847 | } | ||||
848 | |||||
849 | |||||
850 | =head2 authority | ||||
851 | |||||
852 | Title : authority | ||||
853 | Usage : $authority = $obj->authority() | ||||
854 | Function: a string which represents the organisation which | ||||
855 | granted the namespace, written as the DNS name for | ||||
856 | organisation (eg, wormbase.org) | ||||
857 | |||||
858 | Returns : A scalar | ||||
859 | |||||
860 | =cut | ||||
861 | |||||
862 | sub authority { | ||||
863 | return shift->primary_seq()->authority(@_); | ||||
864 | } | ||||
865 | |||||
866 | |||||
867 | =head2 namespace | ||||
868 | |||||
869 | Title : namespace | ||||
870 | Usage : $string = $obj->namespace() | ||||
871 | Function: A string representing the name space this identifier | ||||
872 | is valid in, often the database name or the name | ||||
873 | describing the collection | ||||
874 | |||||
875 | Returns : A scalar | ||||
876 | |||||
877 | =cut | ||||
878 | |||||
879 | sub namespace{ | ||||
880 | return shift->primary_seq()->namespace(@_); | ||||
881 | } | ||||
882 | |||||
883 | |||||
884 | =head1 Methods for Bio::DescribableI compliance | ||||
885 | |||||
886 | =head2 display_name | ||||
887 | |||||
888 | Title : display_name | ||||
889 | Usage : $string = $obj->display_name() | ||||
890 | Function: A string which is what should be displayed to the user | ||||
891 | the string should have no spaces (ideally, though a cautious | ||||
892 | user of this interface would not assumme this) and should be | ||||
893 | less than thirty characters (though again, double checking | ||||
894 | this is a good idea) | ||||
895 | |||||
896 | This is aliased to display_id(). | ||||
897 | Returns : A scalar | ||||
898 | |||||
899 | =cut | ||||
900 | |||||
901 | sub display_name { | ||||
902 | return shift->display_id(@_); | ||||
903 | } | ||||
904 | |||||
905 | =head2 description | ||||
906 | |||||
907 | Title : description | ||||
908 | Usage : $string = $obj->description() | ||||
909 | Function: A text string suitable for displaying to the user a | ||||
910 | description. This string is likely to have spaces, but | ||||
911 | should not have any newlines or formatting - just plain | ||||
912 | text. The string should not be greater than 255 characters | ||||
913 | and clients can feel justified at truncating strings at 255 | ||||
914 | characters for the purposes of display | ||||
915 | |||||
916 | This is aliased to desc(). | ||||
917 | Returns : A scalar | ||||
918 | |||||
919 | =cut | ||||
920 | |||||
921 | sub description { | ||||
922 | return shift->desc(@_); | ||||
923 | } | ||||
924 | |||||
925 | |||||
926 | =head1 Methods for implementing Bio::AnnotatableI | ||||
927 | |||||
928 | =head2 annotation | ||||
929 | |||||
930 | Title : annotation | ||||
931 | Usage : $ann = $seq->annotation or | ||||
932 | $seq->annotation($ann) | ||||
933 | Function: Gets or sets the annotation | ||||
934 | Returns : Bio::AnnotationCollectionI object | ||||
935 | Args : None or Bio::AnnotationCollectionI object | ||||
936 | |||||
937 | See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection> | ||||
938 | for more information | ||||
939 | |||||
940 | =cut | ||||
941 | |||||
942 | sub annotation { | ||||
943 | my ($obj,$value) = @_; | ||||
944 | if( defined $value ) { | ||||
945 | $obj->throw("object of class ".ref($value)." does not implement ". | ||||
946 | "Bio::AnnotationCollectionI. Too bad.") | ||||
947 | unless $value->isa("Bio::AnnotationCollectionI"); | ||||
948 | $obj->{'_annotation'} = $value; | ||||
949 | } elsif( ! defined $obj->{'_annotation'}) { | ||||
950 | $obj->{'_annotation'} = Bio::Annotation::Collection->new(); | ||||
951 | } | ||||
952 | return $obj->{'_annotation'}; | ||||
953 | } | ||||
954 | |||||
955 | |||||
956 | =head1 Methods for delegating Bio::AnnotationCollectionI | ||||
957 | |||||
958 | =head2 get_Annotations() | ||||
959 | |||||
960 | Usage : my @annotations = $seq->get_Annotations('key') | ||||
961 | Function: Retrieves all the Bio::AnnotationI objects for a specific key | ||||
962 | for this object | ||||
963 | Returns : list of Bio::AnnotationI - empty if no objects stored for a key | ||||
964 | Args : string which is key for annotations | ||||
965 | |||||
966 | =cut | ||||
967 | |||||
968 | sub get_Annotations { shift->annotation->get_Annotations(@_); } | ||||
969 | |||||
970 | |||||
971 | =head2 add_Annotation() | ||||
972 | |||||
973 | Usage : $seq->add_Annotation('reference',$object); | ||||
974 | $seq->add_Annotation($object,'Bio::MyInterface::DiseaseI'); | ||||
975 | $seq->add_Annotation($object); | ||||
976 | $seq->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI'); | ||||
977 | Function: Adds an annotation for a specific key for this sequence object. | ||||
978 | |||||
979 | If the key is omitted, the object to be added must provide a value | ||||
980 | via its tagname(). | ||||
981 | |||||
982 | If the archetype is provided, this and future objects added under | ||||
983 | that tag have to comply with the archetype and will be rejected | ||||
984 | otherwise. | ||||
985 | |||||
986 | Returns : none | ||||
987 | Args : annotation key ('disease', 'dblink', ...) | ||||
988 | object to store (must be Bio::AnnotationI compliant) | ||||
989 | [optional] object archetype to map future storage of object | ||||
990 | of these types to | ||||
991 | |||||
992 | =cut | ||||
993 | |||||
994 | sub add_Annotation { shift->annotation->add_Annotation(@_) } | ||||
995 | |||||
996 | |||||
997 | =head2 remove_Annotations() | ||||
998 | |||||
999 | Usage : $seq->remove_Annotations() | ||||
1000 | Function: Remove the annotations for the specified key from this sequence | ||||
1001 | object | ||||
1002 | Returns : an list of Bio::AnnotationI compliant objects which were stored | ||||
1003 | under the given key(s) for this sequence object | ||||
1004 | Args : the key(s) (tag name(s), one or more strings) for which to | ||||
1005 | remove annotations (optional; if none given, flushes all | ||||
1006 | annotations) | ||||
1007 | |||||
1008 | =cut | ||||
1009 | |||||
1010 | sub remove_Annotations { shift->annotation->remove_Annotations(@_) } | ||||
1011 | |||||
1012 | |||||
1013 | =head2 get_num_of_annotations() | ||||
1014 | |||||
1015 | Usage : my $count = $seq->get_num_of_annotations() | ||||
1016 | Alias : num_Annotations | ||||
1017 | Function: Returns the count of all annotations stored for this sequence | ||||
1018 | object | ||||
1019 | Returns : integer | ||||
1020 | Args : none | ||||
1021 | |||||
1022 | =cut | ||||
1023 | |||||
1024 | sub get_num_of_annotations { shift->annotation->get_num_of_annotations(@_) } | ||||
1025 | sub num_Annotations { shift->get_num_of_annotations }; #DWYM | ||||
1026 | |||||
1027 | |||||
1028 | =head1 Methods to implement Bio::FeatureHolderI | ||||
1029 | |||||
1030 | This includes methods for retrieving, adding, and removing features. | ||||
1031 | |||||
1032 | =cut | ||||
1033 | |||||
1034 | =head2 get_SeqFeatures | ||||
1035 | |||||
1036 | Title : get_SeqFeatures | ||||
1037 | Usage : | ||||
1038 | Function: Get the feature objects held by this feature holder. | ||||
1039 | |||||
1040 | Features which are not top-level are subfeatures of one or | ||||
1041 | more of the returned feature objects, which means that you | ||||
1042 | must traverse the subfeature arrays of each top-level | ||||
1043 | feature object in order to traverse all features associated | ||||
1044 | with this sequence. | ||||
1045 | |||||
1046 | Top-level features can be obtained by tag, specified in | ||||
1047 | the argument. | ||||
1048 | |||||
1049 | Use get_all_SeqFeatures() if you want the feature tree | ||||
1050 | flattened into one single array. | ||||
1051 | |||||
1052 | Example : | ||||
1053 | Returns : an array of Bio::SeqFeatureI implementing objects | ||||
1054 | Args : [optional] scalar string (feature tag) | ||||
1055 | |||||
1056 | =cut | ||||
1057 | |||||
1058 | sub get_SeqFeatures{ | ||||
1059 | my $self = shift; | ||||
1060 | my $tag = shift; | ||||
1061 | |||||
1062 | if( !defined $self->{'_as_feat'} ) { | ||||
1063 | $self->{'_as_feat'} = []; | ||||
1064 | } | ||||
1065 | if ($tag) { | ||||
1066 | return map { $_->primary_tag eq $tag ? $_ : () } @{$self->{'_as_feat'}}; | ||||
1067 | } | ||||
1068 | else { | ||||
1069 | return @{$self->{'_as_feat'}}; | ||||
1070 | } | ||||
1071 | } | ||||
1072 | |||||
1073 | |||||
1074 | =head2 get_all_SeqFeatures | ||||
1075 | |||||
1076 | Title : get_all_SeqFeatures | ||||
1077 | Usage : @feat_ary = $seq->get_all_SeqFeatures(); | ||||
1078 | Function: Returns the tree of feature objects attached to this | ||||
1079 | sequence object flattened into one single array. Top-level | ||||
1080 | features will still contain their subfeature-arrays, which | ||||
1081 | means that you will encounter subfeatures twice if you | ||||
1082 | traverse the subfeature tree of the returned objects. | ||||
1083 | |||||
1084 | Use get_SeqFeatures() if you want the array to contain only | ||||
1085 | the top-level features. | ||||
1086 | |||||
1087 | Returns : An array of Bio::SeqFeatureI implementing objects. | ||||
1088 | Args : None | ||||
1089 | |||||
1090 | =cut | ||||
1091 | |||||
1092 | # this implementation is inherited from FeatureHolderI | ||||
1093 | |||||
1094 | =head2 feature_count | ||||
1095 | |||||
1096 | Title : feature_count | ||||
1097 | Usage : $seq->feature_count() | ||||
1098 | Function: Return the number of SeqFeatures attached to a sequence | ||||
1099 | Returns : integer representing the number of SeqFeatures | ||||
1100 | Args : None | ||||
1101 | |||||
1102 | =cut | ||||
1103 | |||||
1104 | sub feature_count { | ||||
1105 | my ($self) = @_; | ||||
1106 | |||||
1107 | if (defined($self->{'_as_feat'})) { | ||||
1108 | return ($#{$self->{'_as_feat'}} + 1); | ||||
1109 | } else { | ||||
1110 | return 0; | ||||
1111 | } | ||||
1112 | } | ||||
1113 | |||||
1114 | |||||
1115 | =head2 add_SeqFeature | ||||
1116 | |||||
1117 | Title : add_SeqFeature | ||||
1118 | Usage : $seq->add_SeqFeature($feat); | ||||
1119 | Function: Adds the given feature object to the feature array of this | ||||
1120 | sequence. The object passed is required to implement the | ||||
1121 | Bio::SeqFeatureI interface. | ||||
1122 | The 'EXPAND' qualifier (see L<Bio::FeatureHolderI>) is supported, but | ||||
1123 | has no effect, | ||||
1124 | Returns : 1 on success | ||||
1125 | Args : A Bio::SeqFeatureI implementing object. | ||||
1126 | |||||
1127 | =cut | ||||
1128 | |||||
1129 | sub add_SeqFeature { | ||||
1130 | my ($self, @feat) = @_; | ||||
1131 | |||||
1132 | $self->{'_as_feat'} = [] unless $self->{'_as_feat'}; | ||||
1133 | |||||
1134 | if (scalar @feat > 1) { | ||||
1135 | $self->deprecated( | ||||
1136 | -message => 'Providing an array of features to Bio::Seq add_SeqFeature()'. | ||||
1137 | ' is deprecated and will be removed in a future version. '. | ||||
1138 | 'Add a single feature at a time instead.', | ||||
1139 | -warn_version => 1.007, | ||||
1140 | -throw_version => 1.009, | ||||
1141 | ); | ||||
1142 | } | ||||
1143 | |||||
1144 | for my $feat ( @feat ) { | ||||
1145 | |||||
1146 | next if $feat eq 'EXPAND'; # Need to support it for FeatureHolderI compliance | ||||
1147 | |||||
1148 | if( !$feat->isa("Bio::SeqFeatureI") ) { | ||||
1149 | $self->throw("Expected a Bio::SeqFeatureI object, but got a $feat."); | ||||
1150 | } | ||||
1151 | |||||
1152 | # make sure we attach ourselves to the feature if the feature wants it | ||||
1153 | my $aseq = $self->primary_seq; | ||||
1154 | $feat->attach_seq($aseq) if $aseq; | ||||
1155 | |||||
1156 | push(@{$self->{'_as_feat'}},$feat); | ||||
1157 | } | ||||
1158 | return 1; | ||||
1159 | } | ||||
1160 | |||||
1161 | |||||
1162 | =head2 remove_SeqFeatures | ||||
1163 | |||||
1164 | Title : remove_SeqFeatures | ||||
1165 | Usage : $seq->remove_SeqFeatures(); | ||||
1166 | Function: Flushes all attached SeqFeatureI objects. | ||||
1167 | |||||
1168 | To remove individual feature objects, delete those from the returned | ||||
1169 | array and re-add the rest. | ||||
1170 | Example : | ||||
1171 | Returns : The array of Bio::SeqFeatureI objects removed from this seq. | ||||
1172 | Args : None | ||||
1173 | |||||
1174 | =cut | ||||
1175 | |||||
1176 | sub remove_SeqFeatures { | ||||
1177 | my $self = shift; | ||||
1178 | |||||
1179 | return () unless $self->{'_as_feat'}; | ||||
1180 | my @feats = @{$self->{'_as_feat'}}; | ||||
1181 | $self->{'_as_feat'} = []; | ||||
1182 | return @feats; | ||||
1183 | } | ||||
1184 | |||||
1185 | |||||
1186 | =head1 Methods provided in the Bio::PrimarySeqI interface | ||||
1187 | |||||
1188 | These methods are inherited from the PrimarySeq interface | ||||
1189 | and work as one expects, building new Bio::Seq objects | ||||
1190 | or other information as expected. See L<Bio::PrimarySeq> | ||||
1191 | for more information. | ||||
1192 | |||||
1193 | Sequence Features are B<not> transferred to the new objects. | ||||
1194 | This is possibly a mistake. Anyone who feels the urge in | ||||
1195 | dealing with this is welcome to give it a go. | ||||
1196 | |||||
1197 | =head2 revcom | ||||
1198 | |||||
1199 | Title : revcom | ||||
1200 | Usage : $rev = $seq->revcom() | ||||
1201 | Function: Produces a new Bio::Seq object which | ||||
1202 | is the reversed complement of the sequence. For protein | ||||
1203 | sequences this throws an exception of "Sequence is a protein. | ||||
1204 | Cannot revcom" | ||||
1205 | |||||
1206 | The id is the same id as the original sequence, and the | ||||
1207 | accession number is also identical. If someone wants to track | ||||
1208 | that this sequence has be reversed, it needs to define its own | ||||
1209 | extensions | ||||
1210 | |||||
1211 | To do an in-place edit of an object you can go: | ||||
1212 | |||||
1213 | $seq = $seq->revcom(); | ||||
1214 | |||||
1215 | This of course, causes Perl to handle the garbage collection of | ||||
1216 | the old object, but it is roughly speaking as efficient as an | ||||
1217 | in-place edit. | ||||
1218 | |||||
1219 | Returns : A new (fresh) Bio::Seq object | ||||
1220 | Args : None | ||||
1221 | |||||
1222 | =head2 trunc | ||||
1223 | |||||
1224 | Title : trunc | ||||
1225 | Usage : $subseq = $myseq->trunc(10,100); | ||||
1226 | Function: Provides a truncation of a sequence | ||||
1227 | |||||
1228 | Example : | ||||
1229 | Returns : A fresh Seq object | ||||
1230 | Args : A Seq object | ||||
1231 | |||||
1232 | =head2 id | ||||
1233 | |||||
1234 | Title : id | ||||
1235 | Usage : $id = $seq->id() | ||||
1236 | Function: This is mapped on display_id | ||||
1237 | Returns : value of display_id() | ||||
1238 | Args : [optional] value to update display_id | ||||
1239 | |||||
1240 | =cut | ||||
1241 | |||||
1242 | sub id { | ||||
1243 | return shift->display_id(@_); | ||||
1244 | } | ||||
1245 | |||||
1246 | |||||
1247 | =head1 Seq only methods | ||||
1248 | |||||
1249 | These methods are specific to the Bio::Seq object, and not | ||||
1250 | found on the Bio::PrimarySeq object | ||||
1251 | |||||
1252 | =head2 primary_seq | ||||
1253 | |||||
1254 | Title : primary_seq | ||||
1255 | Usage : $seq->primary_seq or $seq->primary_seq($newval) | ||||
1256 | Function: Get or set a PrimarySeq object | ||||
1257 | Example : | ||||
1258 | Returns : PrimarySeq object | ||||
1259 | Args : None or PrimarySeq object | ||||
1260 | |||||
1261 | =cut | ||||
1262 | |||||
1263 | sub primary_seq { | ||||
1264 | my ($obj,$value) = @_; | ||||
1265 | |||||
1266 | if( defined $value) { | ||||
1267 | if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) { | ||||
1268 | $obj->throw("$value is not a Bio::PrimarySeq compliant object"); | ||||
1269 | } | ||||
1270 | |||||
1271 | $obj->{'primary_seq'} = $value; | ||||
1272 | # descend down over all seqfeature objects, seeing whether they | ||||
1273 | # want an attached seq. | ||||
1274 | |||||
1275 | foreach my $sf ( $obj->get_SeqFeatures() ) { | ||||
1276 | $sf->attach_seq($value); | ||||
1277 | } | ||||
1278 | |||||
1279 | } | ||||
1280 | return $obj->{'primary_seq'}; | ||||
1281 | |||||
1282 | } | ||||
1283 | |||||
1284 | |||||
1285 | =head2 species | ||||
1286 | |||||
1287 | Title : species | ||||
1288 | Usage : $species = $seq->species() or $seq->species($species) | ||||
1289 | Function: Gets or sets the species | ||||
1290 | Returns : L<Bio::Species> object | ||||
1291 | Args : None or L<Bio::Species> object | ||||
1292 | |||||
1293 | See L<Bio::Species> for more information | ||||
1294 | |||||
1295 | =cut | ||||
1296 | |||||
1297 | sub species { | ||||
1298 | my ($self, $species) = @_; | ||||
1299 | if ($species) { | ||||
1300 | $self->{'species'} = $species; | ||||
1301 | } else { | ||||
1302 | return $self->{'species'}; | ||||
1303 | } | ||||
1304 | } | ||||
1305 | |||||
1306 | |||||
1307 | # Internal methods follow... | ||||
1308 | |||||
1309 | # keep AUTOLOAD happy | ||||
1310 | sub DESTROY { } | ||||
1311 | |||||
1312 | ############################################################################ | ||||
1313 | # aliases due to name changes or to compensate for our lack of consistency # | ||||
1314 | ############################################################################ | ||||
1315 | |||||
1316 | # in all other modules we use the object in the singular -- | ||||
1317 | # lack of consistency sucks | ||||
1318 | 1 | 2µs | *flush_SeqFeature = \&remove_SeqFeatures; | ||
1319 | 1 | 200ns | *flush_SeqFeatures = \&remove_SeqFeatures; | ||
1320 | |||||
1321 | # this is now get_SeqFeatures() (from FeatureHolderI) | ||||
1322 | 1 | 200ns | *top_SeqFeatures = \&get_SeqFeatures; | ||
1323 | |||||
1324 | # this is now get_all_SeqFeatures() in FeatureHolderI | ||||
1325 | sub all_SeqFeatures{ | ||||
1326 | return shift->get_all_SeqFeatures(@_); | ||||
1327 | } | ||||
1328 | |||||
1329 | sub accession { | ||||
1330 | my $self = shift; | ||||
1331 | $self->warn(ref($self)."::accession is deprecated, ". | ||||
1332 | "use accession_number() instead"); | ||||
1333 | return $self->accession_number(@_); | ||||
1334 | } | ||||
1335 | |||||
1336 | 1 | 7µs | 1; |