← Index
NYTProf Performance Profile   « block view • line view • sub view »
For bin/pan_genome_post_analysis
  Run on Fri Mar 27 11:43:32 2015
Reported on Fri Mar 27 11:46:10 2015

Filename/Users/ap13/perl5/lib/perl5/Bio/SeqFeature/Generic.pm
StatementsExecuted 17 statements in 3.00ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1111.91ms2.50msBio::SeqFeature::Generic::::BEGIN@147Bio::SeqFeature::Generic::BEGIN@147
1111.83ms3.95msBio::SeqFeature::Generic::::BEGIN@145Bio::SeqFeature::Generic::BEGIN@145
11118µs40µsBio::SeqFeature::Generic::::BEGIN@143Bio::SeqFeature::Generic::BEGIN@143
11110µs7.23msBio::SeqFeature::Generic::::BEGIN@151Bio::SeqFeature::Generic::BEGIN@151
1119µs9µsBio::SeqFeature::Generic::::BEGIN@148Bio::SeqFeature::Generic::BEGIN@148
1117µs7µsBio::SeqFeature::Generic::::BEGIN@146Bio::SeqFeature::Generic::BEGIN@146
0000s0sBio::SeqFeature::Generic::::_expand_regionBio::SeqFeature::Generic::_expand_region
0000s0sBio::SeqFeature::Generic::::_from_gff_stringBio::SeqFeature::Generic::_from_gff_string
0000s0sBio::SeqFeature::Generic::::_parseBio::SeqFeature::Generic::_parse
0000s0sBio::SeqFeature::Generic::::_tag_valueBio::SeqFeature::Generic::_tag_value
0000s0sBio::SeqFeature::Generic::::add_SeqFeatureBio::SeqFeature::Generic::add_SeqFeature
0000s0sBio::SeqFeature::Generic::::add_tag_valueBio::SeqFeature::Generic::add_tag_value
0000s0sBio::SeqFeature::Generic::::all_tagsBio::SeqFeature::Generic::all_tags
0000s0sBio::SeqFeature::Generic::::annotationBio::SeqFeature::Generic::annotation
0000s0sBio::SeqFeature::Generic::::attach_seqBio::SeqFeature::Generic::attach_seq
0000s0sBio::SeqFeature::Generic::::cleanup_genericBio::SeqFeature::Generic::cleanup_generic
0000s0sBio::SeqFeature::Generic::::direct_newBio::SeqFeature::Generic::direct_new
0000s0sBio::SeqFeature::Generic::::display_idBio::SeqFeature::Generic::display_id
0000s0sBio::SeqFeature::Generic::::display_nameBio::SeqFeature::Generic::display_name
0000s0sBio::SeqFeature::Generic::::each_tag_valueBio::SeqFeature::Generic::each_tag_value
0000s0sBio::SeqFeature::Generic::::endBio::SeqFeature::Generic::end
0000s0sBio::SeqFeature::Generic::::entire_seqBio::SeqFeature::Generic::entire_seq
0000s0sBio::SeqFeature::Generic::::frameBio::SeqFeature::Generic::frame
0000s0sBio::SeqFeature::Generic::::get_SeqFeaturesBio::SeqFeature::Generic::get_SeqFeatures
0000s0sBio::SeqFeature::Generic::::get_all_tagsBio::SeqFeature::Generic::get_all_tags
0000s0sBio::SeqFeature::Generic::::get_tag_valuesBio::SeqFeature::Generic::get_tag_values
0000s0sBio::SeqFeature::Generic::::gff_formatBio::SeqFeature::Generic::gff_format
0000s0sBio::SeqFeature::Generic::::gff_stringBio::SeqFeature::Generic::gff_string
0000s0sBio::SeqFeature::Generic::::has_tagBio::SeqFeature::Generic::has_tag
0000s0sBio::SeqFeature::Generic::::lengthBio::SeqFeature::Generic::length
0000s0sBio::SeqFeature::Generic::::locationBio::SeqFeature::Generic::location
0000s0sBio::SeqFeature::Generic::::newBio::SeqFeature::Generic::new
0000s0sBio::SeqFeature::Generic::::primary_tagBio::SeqFeature::Generic::primary_tag
0000s0sBio::SeqFeature::Generic::::remove_SeqFeaturesBio::SeqFeature::Generic::remove_SeqFeatures
0000s0sBio::SeqFeature::Generic::::remove_tagBio::SeqFeature::Generic::remove_tag
0000s0sBio::SeqFeature::Generic::::scoreBio::SeqFeature::Generic::score
0000s0sBio::SeqFeature::Generic::::seqBio::SeqFeature::Generic::seq
0000s0sBio::SeqFeature::Generic::::seq_idBio::SeqFeature::Generic::seq_id
0000s0sBio::SeqFeature::Generic::::seqnameBio::SeqFeature::Generic::seqname
0000s0sBio::SeqFeature::Generic::::set_attributesBio::SeqFeature::Generic::set_attributes
0000s0sBio::SeqFeature::Generic::::slurp_gff_fileBio::SeqFeature::Generic::slurp_gff_file
0000s0sBio::SeqFeature::Generic::::source_tagBio::SeqFeature::Generic::source_tag
0000s0sBio::SeqFeature::Generic::::startBio::SeqFeature::Generic::start
0000s0sBio::SeqFeature::Generic::::strandBio::SeqFeature::Generic::strand
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1#
2# BioPerl module for Bio::SeqFeature::Generic
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Ewan Birney <birney@sanger.ac.uk>
7#
8# Copyright Ewan Birney
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::SeqFeature::Generic - Generic SeqFeature
17
18=head1 SYNOPSIS
19
20 $feat = Bio::SeqFeature::Generic->new(
21 -start => 10,
22 -end => 100,
23 -strand => -1,
24 -primary => 'repeat', # -primary_tag is a synonym
25 -source_tag => 'repeatmasker',
26 -display_name => 'alu family',
27 -score => 1000,
28 -tag => { new => 1,
29 author => 'someone',
30 sillytag => 'this is silly!' } );
31
32 $feat = Bio::SeqFeature::Generic->new( -gff_string => $string );
33 # if you want explicitly GFF1
34 $feat = Bio::SeqFeature::Generic->new( -gff1_string => $string );
35
36 # add it to an annotated sequence
37
38 $annseq->add_SeqFeature($feat);
39
40=head1 DESCRIPTION
41
42Bio::SeqFeature::Generic is a generic implementation for the
43Bio::SeqFeatureI interface, providing a simple object to provide all
44the information for a feature on a sequence.
45
46For many Features, this is all you will need to use (for example, this
47is fine for Repeats in DNA sequence or Domains in protein
48sequence). For other features, which have more structure, this is a
49good base class to extend using inheritence to have new things: this
50is what is done in the L<Bio::SeqFeature::Gene>,
51L<Bio::SeqFeature::Transcript> and L<Bio::SeqFeature::Exon>, which provide
52well coordinated classes to represent genes on DNA sequence (for
53example, you can get the protein sequence out from a transcript
54class).
55
56For many Features, you want to add some piece of information, for
57example a common one is that this feature is 'new' whereas other
58features are 'old'. The tag system, which here is implemented using a
59hash can be used here. You can use the tag system to extend the
60L<Bio::SeqFeature::Generic> programmatically: that is, you know that you have
61read in more information into the tag 'mytag' which you can then
62retrieve. This means you do not need to know how to write inherited
63Perl to provide more complex information on a feature, and/or, if you
64do know but you do not want to write a new class every time you need
65some extra piece of information, you can use the tag system to easily
66store and then retrieve information.
67
68The tag system can be written in/out of GFF format, and also into EMBL
69format via the L<Bio::SeqIO> system
70
71=head1 Implemented Interfaces
72
73This class implements the following interfaces.
74
75=over 4
76
77=item L<Bio::SeqFeatureI>
78
79Note that this includes implementing Bio::RangeI.
80
81=item L<Bio::AnnotatableI>
82
83=item L<Bio::FeatureHolderI>
84
85Features held by a feature are essentially sub-features.
86
87=back
88
89=head1 FEEDBACK
90
91=head2 Mailing Lists
92
93User feedback is an integral part of the evolution of this and other
94Bioperl modules. Send your comments and suggestions preferably to one
95of the Bioperl mailing lists. Your participation is much appreciated.
96
97 bioperl-l@bioperl.org - General discussion
98 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99
100=head2 Support
101
102Please direct usage questions or support issues to the mailing list:
103
104I<bioperl-l@bioperl.org>
105
106rather than to the module maintainer directly. Many experienced and
107reponsive experts will be able look at the problem and quickly
108address it. Please include a thorough description of the problem
109with code and data examples if at all possible.
110
111=head2 Reporting Bugs
112
113Report bugs to the Bioperl bug tracking system to help us keep track
114the bugs and their resolution. Bug reports can be submitted via
115the web:
116
117 https://github.com/bioperl/bioperl-live/issues
118
119=head1 AUTHOR - Ewan Birney
120
121Ewan Birney E<lt>birney@sanger.ac.ukE<gt>
122
123=head1 DEVELOPERS
124
125This class has been written with an eye out for inheritance. The fields
126the actual object hash are:
127
128 _gsf_tag_hash = reference to a hash for the tags
129 _gsf_sub_array = reference to an array for subfeatures
130
131=head1 APPENDIX
132
133The rest of the documentation details each of the object
134methods. Internal methods are usually preceded with a _
135
136=cut
137
138
139# Let the code begin...
140
141
142package Bio::SeqFeature::Generic;
143229µs262µs
# spent 40µs (18+22) within Bio::SeqFeature::Generic::BEGIN@143 which was called: # once (18µs+22µs) by Bio::Tools::GFF::BEGIN@152 at line 143
use strict;
# spent 40µs making 1 call to Bio::SeqFeature::Generic::BEGIN@143 # spent 22µs making 1 call to strict::import
144
1452156µs13.95ms
# spent 3.95ms (1.83+2.12) within Bio::SeqFeature::Generic::BEGIN@145 which was called: # once (1.83ms+2.12ms) by Bio::Tools::GFF::BEGIN@152 at line 145
use Bio::Annotation::Collection;
# spent 3.95ms making 1 call to Bio::SeqFeature::Generic::BEGIN@145
146223µs17µs
# spent 7µs within Bio::SeqFeature::Generic::BEGIN@146 which was called: # once (7µs+0s) by Bio::Tools::GFF::BEGIN@152 at line 146
use Bio::Location::Simple;
# spent 7µs making 1 call to Bio::SeqFeature::Generic::BEGIN@146
1472160µs12.50ms
# spent 2.50ms (1.91+593µs) within Bio::SeqFeature::Generic::BEGIN@147 which was called: # once (1.91ms+593µs) by Bio::Tools::GFF::BEGIN@152 at line 147
use Bio::Location::Split;
# spent 2.50ms making 1 call to Bio::SeqFeature::Generic::BEGIN@147
148229µs19µs
# spent 9µs within Bio::SeqFeature::Generic::BEGIN@148 which was called: # once (9µs+0s) by Bio::Tools::GFF::BEGIN@152 at line 148
use Bio::Tools::GFF;
# spent 9µs making 1 call to Bio::SeqFeature::Generic::BEGIN@148
149#use Tie::IxHash;
150
15122.59ms214.5ms
# spent 7.23ms (10µs+7.22) within Bio::SeqFeature::Generic::BEGIN@151 which was called: # once (10µs+7.22ms) by Bio::Tools::GFF::BEGIN@152 at line 151
use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI Bio::AnnotatableI);
# spent 7.23ms making 1 call to Bio::SeqFeature::Generic::BEGIN@151 # spent 7.22ms making 1 call to base::import
152
153sub new {
154 my ( $caller, @args) = @_;
155 my ($self) = $caller->SUPER::new(@args);
156 $self->_register_for_cleanup(\&cleanup_generic);
157 $self->{'_parse_h'} = {};
158 $self->{'_gsf_tag_hash'} = {};
159
160 # bulk-set attributes
161 $self->set_attributes(@args);
162
163 # done - we hope
164 return $self;
165}
166
167=head2 set_attributes
168
169 Title : set_attributes
170 Usage :
171 Function: Sets a whole array of parameters at once.
172 Example :
173 Returns : none
174 Args : Named parameters, in the form as they would otherwise be passed
175 to new(). Currently recognized are:
176
177 -start start position
178 -end end position
179 -strand strand
180 -phase the phase of the feature (0..2)
181 -primary_tag primary tag
182 -primary (synonym for -primary_tag)
183 -source source tag
184 -frame frame
185 -score score value
186 -tag a reference to a tag/value hash
187 -gff_string GFF v.2 string to initialize from
188 -gff1_string GFF v.1 string to initialize from
189 -seq_id the display name of the sequence
190 -annotation the AnnotationCollectionI object
191 -location the LocationI object
192
193=cut
194
195sub set_attributes {
196 my ($self,@args) = @_;
197 my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
198 $source, $frame, $score, $tag, $gff_string, $gff1_string,
199 $seqname, $seqid, $annot, $location,$display_name, $pid,$phase) =
200 $self->_rearrange([qw(START
201 END
202 STRAND
203 PRIMARY_TAG
204 SOURCE_TAG
205 PRIMARY
206 SOURCE
207 FRAME
208 SCORE
209 TAG
210 GFF_STRING
211 GFF1_STRING
212 SEQNAME
213 SEQ_ID
214 ANNOTATION
215 LOCATION
216 DISPLAY_NAME
217 PRIMARY_ID
218 PHASE
219 )], @args);
220 $location && $self->location($location);
221 $gff_string && $self->_from_gff_string($gff_string);
222 $gff1_string && do {
223 $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
224 $self->_from_gff_stream($gff1_string);
225 };
226
227 $pid && $self->primary_id($pid);
228 $primary_tag && $self->primary_tag($primary_tag);
229 $source_tag && $self->source_tag($source_tag);
230 $primary && $self->primary_tag($primary);
231 $source && $self->source_tag($source);
232 defined $start && $self->start($start);
233 defined $end && $self->end($end);
234 defined $strand && $self->strand($strand);
235 defined $frame && $self->frame($frame);
236 defined $display_name && $self->display_name($display_name);
237 defined $score && $self->score($score);
238 $annot && $self->annotation($annot);
239 if($seqname) {
240 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
241 $seqid = $seqname unless $seqid;
242 }
243 $self->seq_id($seqid) if (defined($seqid));
244 $tag && do {
245 foreach my $t ( keys %$tag ) {
246 $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? @{$tag->{$t}} : $tag->{$t});
247 }
248 };
249 defined $phase && $self->phase($phase);
250}
251
252
253=head2 direct_new
254
255 Title : direct_new
256 Usage : my $feat = Bio::SeqFeature::Generic->direct_new;
257 Function: create a blessed hash - for performance improvement in
258 object creation
259 Returns : Bio::SeqFeature::Generic object
260 Args : none
261
262=cut
263
264sub direct_new {
265 my ( $class) = @_;
266 my ($self) = {};
267
268 bless $self,$class;
269
270 return $self;
271}
272
273
274=head2 location
275
276 Title : location
277 Usage : my $location = $feat->location();
278 Function: returns a location object suitable for identifying location
279 of feature on sequence or parent feature
280 Returns : Bio::LocationI object
281 Args : [optional] Bio::LocationI object to set the value to.
282
283=cut
284
285sub location {
286 my($self, $value ) = @_;
287
288 if (defined($value)) {
289 unless (ref($value) and $value->isa('Bio::LocationI')) {
290 $self->throw("object $value pretends to be a location but ".
291 "does not implement Bio::LocationI");
292 }
293 $self->{'_location'} = $value;
294 }
295 elsif (! $self->{'_location'}) {
296 # guarantees a real location object is returned every time
297 $self->{'_location'} = Bio::Location::Simple->new();
298 }
299 return $self->{'_location'};
300}
301
302
303=head2 start
304
305 Title : start
306 Usage : my $start = $feat->start;
307 $feat->start(20);
308 Function: Get/set on the start coordinate of the feature
309 Returns : integer
310 Args : none
311
312=cut
313
314sub start {
315 my ($self, $value) = @_;
316 # Return soon if setting value
317 if (defined $value) {
318 return $self->location->start($value);
319 }
320
321 return $self->location->start() if not defined $self->{'_gsf_seq'};
322 # Check circular sequences cut by origin
323 my $start;
324 if ( $self->{'_gsf_seq'}->is_circular
325 and $self->location->isa('Bio::Location::SplitLocationI')
326 ) {
327 my $primary_seq_length = $self->{'_gsf_seq'}->length;
328 my @sublocs = $self->location->sub_Location;
329
330 my $cut_by_origin = 0;
331 my ($a_end, $a_strand) = (0, 0);
332 my ($b_start, $b_strand) = (0, 0);
333 for (my $i = 1; $i < scalar @sublocs; $i++) {
334 $a_end = $sublocs[$i-1]->end;
335 $a_strand = $sublocs[$i-1]->strand;
336 $b_start = $sublocs[$i]->start;
337 $b_strand = $sublocs[$i]->strand;
338 # cut by origin condition
339 if ( $a_end == $primary_seq_length
340 and $b_start == 1
341 and $a_strand == $b_strand
342 ) {
343 $cut_by_origin = 1;
344 last;
345 }
346 }
347 $start = ($cut_by_origin == 1) ? ($sublocs[0]->start) : ($self->location->start);
348 }
349 else {
350 $start = $self->location->start;
351 }
352 return $start;
353}
354
355
356=head2 end
357
358 Title : end
359 Usage : my $end = $feat->end;
360 $feat->end($end);
361 Function: get/set on the end coordinate of the feature
362 Returns : integer
363 Args : none
364
365=cut
366
367sub end {
368 my ($self, $value) = @_;
369 # Return soon if setting value
370 if (defined $value) {
371 return $self->location->end($value);
372 }
373
374 return $self->location->end() if not defined $self->{'_gsf_seq'};
375 # Check circular sequences cut by origin
376 my $end;
377 if ( $self->{'_gsf_seq'}->is_circular
378 and $self->location->isa('Bio::Location::SplitLocationI')
379 ) {
380 my $primary_seq_length = $self->{'_gsf_seq'}->length;
381 my @sublocs = $self->location->sub_Location;
382
383 my $cut_by_origin = 0;
384 my ($a_end, $a_strand) = (0, 0);
385 my ($b_start, $b_strand) = (0, 0);
386 for (my $i = 1; $i < scalar @sublocs; $i++) {
387 $a_end = $sublocs[$i-1]->end;
388 $a_strand = $sublocs[$i-1]->strand;
389 $b_start = $sublocs[$i]->start;
390 $b_strand = $sublocs[$i]->strand;
391 # cut by origin condition
392 if ( $a_end == $primary_seq_length
393 and $b_start == 1
394 and $a_strand == $b_strand
395 ) {
396 $cut_by_origin = 1;
397 last;
398 }
399 }
400 $end = ($cut_by_origin == 1) ? ($sublocs[-1]->end) : ($self->location->end);
401 }
402 else {
403 $end = $self->location->end;
404 }
405 return $end;
406}
407
408
409=head2 length
410
411 Title : length
412 Usage : my $len = $feat->length;
413 Function: Get the feature length computed as:
414 $feat->end - $feat->start + 1
415 Returns : integer
416 Args : none
417
418=cut
419
420sub length {
421 my $self = shift;
422 my $length = $self->end() - $self->start() + 1;
423
424 # In circular sequences cut by origin $start > $end,
425 # e.g., join(5075..5386,1..51)), $start = 5075, $end = 51,
426 # then adjust using the primary_seq length (5386)
427 if ($length < 0 and defined $self->{'_gsf_seq'}) {
428 $length += $self->{'_gsf_seq'}->length;
429 }
430 return $length;
431}
432
433
434=head2 strand
435
436 Title : strand
437 Usage : my $strand = $feat->strand();
438 $feat->strand($strand);
439 Function: get/set on strand information, being 1,-1 or 0
440 Returns : -1,1 or 0
441 Args : none
442
443=cut
444
445sub strand {
446 my $self = shift;
447 return $self->location->strand(@_);
448}
449
450
451=head2 score
452
453 Title : score
454 Usage : my $score = $feat->score();
455 $feat->score($score);
456 Function: get/set on score information
457 Returns : float
458 Args : none if get, the new value if set
459
460=cut
461
462sub score {
463 my $self = shift;
464
465 if (@_) {
466 my $value = shift;
467
468 if ( defined $value && $value && $value !~ /^[A-Za-z]+$/ &&
469 $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
470 $self->throw(-class=>'Bio::Root::BadParameter',
471 -text=>"'$value' is not a valid score",
472 -value=>$value);
473 }
474 if ($self->has_tag('score')) {
475 $self->warn("Removing score value(s)");
476 $self->remove_tag('score');
477 }
478 $self->add_tag_value('score',$value);
479 }
480 my ($score) = $self->has_tag('score') ? $self->get_tag_values('score') : undef;
481 return $score;
482}
483
484
485=head2 frame
486
487 Title : frame
488 Usage : my $frame = $feat->frame();
489 $feat->frame($frame);
490 Function: get/set on frame information
491 Returns : 0,1,2, '.'
492 Args : none if get, the new value if set
493
494=cut
495
496sub frame {
497 my $self = shift;
498
499 if ( @_ ) {
500 my $value = shift;
501 if ( defined $value &&
502 $value !~ /^[0-2.]$/ ) {
503 $self->throw("'$value' is not a valid frame");
504 }
505 if( defined $value && $value eq '.' ) { $value = '.' }
506 return $self->{'_gsf_frame'} = $value;
507 }
508 return $self->{'_gsf_frame'};
509}
510
511
512=head2 primary_tag
513
514 Title : primary_tag
515 Usage : my $tag = $feat->primary_tag();
516 $feat->primary_tag('exon');
517 Function: get/set on the primary tag for a feature,
518 eg 'exon'
519 Returns : a string
520 Args : none
521
522=cut
523
524sub primary_tag {
525 my $self = shift;
526 return $self->{'_primary_tag'} = shift if @_;
527 return $self->{'_primary_tag'} || '';
528}
529
530
531=head2 source_tag
532
533 Title : source_tag
534 Usage : my $tag = $feat->source_tag();
535 $feat->source_tag('genscan');
536 Function: Returns the source tag for a feature,
537 eg, 'genscan'
538 Returns : a string
539 Args : none
540
541=cut
542
543sub source_tag {
544 my $self = shift;
545 return $self->{'_source_tag'} = shift if @_;
546 return $self->{'_source_tag'} || '';
547}
548
549
550=head2 has_tag
551
552 Title : has_tag
553 Usage : my $value = $feat->has_tag('some_tag');
554 Function: Tests wether a feature contaings a tag
555 Returns : TRUE if the SeqFeature has the tag,
556 and FALSE otherwise.
557 Args : The name of a tag
558
559=cut
560
561sub has_tag {
562 my ($self, $tag) = @_;
563 return exists $_[0]->{'_gsf_tag_hash'}->{$tag};
564}
565
566
567=head2 add_tag_value
568
569 Title : add_tag_value
570 Usage : $feat->add_tag_value('note',"this is a note");
571 Returns : TRUE on success
572 Args : tag (string) and one or more values (any scalar(s))
573
574=cut
575
576sub add_tag_value {
577 my $self = shift;
578 my $tag = shift;
579 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
580 push(@{$self->{'_gsf_tag_hash'}->{$tag}},@_);
581}
582
583
584=head2 get_tag_values
585
586 Title : get_tag_values
587 Usage : my @values = $feat->get_tag_values('note');
588 Function: Returns a list of all the values stored
589 under a particular tag.
590 Returns : A list of scalars
591 Args : The name of the tag
592
593=cut
594
595sub get_tag_values {
596 my ($self, $tag) = @_;
597
598 if( ! defined $tag ) { return (); }
599 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
600 $self->throw("asking for tag value that does not exist $tag");
601 }
602 return @{$self->{'_gsf_tag_hash'}->{$tag}};
603}
604
605
606=head2 get_all_tags
607
608 Title : get_all_tags
609 Usage : my @tags = $feat->get_all_tags();
610 Function: Get a list of all the tags in a feature
611 Returns : An array of tag names
612 Args : none
613
614# added a sort so that tags will be returned in a predictable order
615# I still think we should be able to specify a sort function
616# to the object at some point
617# -js
618
619=cut
620
621sub get_all_tags {
622 my ($self, @args) = @_;
623 return sort keys %{ $self->{'_gsf_tag_hash'}};
624}
625
626
627=head2 remove_tag
628
629 Title : remove_tag
630 Usage : $feat->remove_tag('some_tag');
631 Function: removes a tag from this feature
632 Returns : the array of values for this tag before removing it
633 Args : tag (string)
634
635=cut
636
637sub remove_tag {
638 my ($self, $tag) = @_;
639
640 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
641 $self->throw("trying to remove a tag that does not exist: $tag");
642 }
643 my @vals = @{$self->{'_gsf_tag_hash'}->{$tag}};
644 delete $self->{'_gsf_tag_hash'}->{$tag};
645 return @vals;
646}
647
648
649=head2 attach_seq
650
651 Title : attach_seq
652 Usage : $feat->attach_seq($seq);
653 Function: Attaches a Bio::Seq object to this feature. This
654 Bio::Seq object is for the *entire* sequence: ie
655 from 1 to 10000
656 Example :
657 Returns : TRUE on success
658 Args : a Bio::PrimarySeqI compliant object
659
660=cut
661
662sub attach_seq {
663 my ($self, $seq) = @_;
664
665 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
666 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures but got '".ref($seq)."'");
667 }
668
669 $self->{'_gsf_seq'} = $seq;
670
671 # attach to sub features if they want it
672 foreach ( $self->sub_SeqFeature() ) {
673 $_->attach_seq($seq);
674 }
675 return 1;
676}
677
678
679=head2 seq
680
681 Title : seq
682 Usage : my $tseq = $feat->seq();
683 Function: returns the truncated sequence (if there) for this
684 Example :
685 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
686 bounded by start & end, or undef if there is no sequence attached
687 Args : none
688
689=cut
690
691sub seq {
692 my ($self, $arg) = @_;
693
694 if ( defined $arg ) {
695 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
696 }
697
698 if ( ! exists $self->{'_gsf_seq'} ) {
699 return;
700 }
701
702 # assumming our seq object is sensible, it should not have to yank
703 # the entire sequence out here.
704
705 my $seq = $self->{'_gsf_seq'}->trunc($self->start(), $self->end());
706
707
708 if ( defined $self->strand &&
709 $self->strand == -1 ) {
710
711 # ok. this does not work well (?)
712 #print STDERR "Before revcom", $seq->str, "\n";
713 $seq = $seq->revcom;
714 #print STDERR "After revcom", $seq->str, "\n";
715 }
716
717 return $seq;
718}
719
720
721=head2 entire_seq
722
723 Title : entire_seq
724 Usage : my $whole_seq = $feat->entire_seq();
725 Function: gives the entire sequence that this seqfeature is attached to
726 Example :
727 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
728 sequence attached
729 Args :
730
731=cut
732
733sub entire_seq {
734 return shift->{'_gsf_seq'};
735}
736
737
738=head2 seq_id
739
740 Title : seq_id
741 Usage : $feat->seq_id($newval)
742 Function: There are many cases when you make a feature that you
743 do know the sequence name, but do not know its actual
744 sequence. This is an attribute such that you can store
745 the ID (e.g., display_id) of the sequence.
746
747 This attribute should *not* be used in GFF dumping, as
748 that should come from the collection in which the seq
749 feature was found.
750 Returns : value of seq_id
751 Args : newvalue (optional)
752
753=cut
754
755sub seq_id {
756 my $obj = shift;
757 return $obj->{'_gsf_seq_id'} = shift if @_;
758 return $obj->{'_gsf_seq_id'};
759}
760
761
762=head2 display_name
763
764 Title : display_name
765 Usage : my $featname = $feat->display_name;
766 Function: Implements the display_name() method, which is a human-readable
767 name for the feature.
768 Returns : value of display_name (a string)
769 Args : Optionally, on set the new value or undef
770
771=cut
772
773sub display_name {
774 my $self = shift;
775 return $self->{'display_name'} = shift if @_;
776 return $self->{'display_name'} || '';
777}
778
779
780=head1 Methods for implementing Bio::AnnotatableI
781
782=head2 annotation
783
784 Title : annotation
785 Usage : $feat->annotation($annot_obj);
786 Function: Get/set the annotation collection object for annotating this
787 feature.
788
789 Example :
790 Returns : A Bio::AnnotationCollectionI object
791 Args : newvalue (optional)
792
793=cut
794
795sub annotation {
796 my ($obj,$value) = @_;
797
798 # we are smart if someone references the object and there hasn't been
799 # one set yet
800 if(defined $value || ! defined $obj->{'annotation'} ) {
801 $value = Bio::Annotation::Collection->new() unless ( defined $value );
802 $obj->{'annotation'} = $value;
803 }
804 return $obj->{'annotation'};
805}
806
807
808=head1 Methods to implement Bio::FeatureHolderI
809
810This includes methods for retrieving, adding, and removing
811features. Since this is already a feature, features held by this
812feature holder are essentially sub-features.
813
814=head2 get_SeqFeatures
815
816 Title : get_SeqFeatures
817 Usage : my @feats = $feat->get_SeqFeatures();
818 Function: Returns an array of sub Sequence Features
819 Returns : An array
820 Args : none
821
822=cut
823
824sub get_SeqFeatures {
825 return @{ shift->{'_gsf_sub_array'} || []};
826}
827
828
829=head2 add_SeqFeature
830
831 Title : add_SeqFeature
832 Usage : $feat->add_SeqFeature($subfeat);
833 $feat->add_SeqFeature($subfeat,'EXPAND');
834 Function: Adds a SeqFeature into the subSeqFeature array.
835 With no 'EXPAND' qualifer, subfeat will be tested
836 as to whether it lies inside the parent, and throw
837 an exception if not.
838
839 If EXPAND is used, the parent's start/end/strand will
840 be adjusted so that it grows to accommodate the new
841 subFeature
842
843 !IMPORTANT! The coordinates of the subfeature should not be relative
844 to the parent feature it is attached to, but relative to the sequence
845 the parent feature is located on.
846
847 Returns : nothing
848 Args : An object which has the SeqFeatureI interface
849
850=cut
851
852sub add_SeqFeature {
853 my ($self,$feat,$expand) = @_;
854 unless( defined $feat ) {
855 $self->warn("Called add_SeqFeature with no feature, ignoring");
856 return;
857 }
858 if ( !$feat->isa('Bio::SeqFeatureI') ) {
859 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
860 }
861
862 if($expand && ($expand eq 'EXPAND')) {
863 $self->_expand_region($feat);
864 } else {
865 if ( !$self->contains($feat) ) {
866 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
867 }
868 }
869
870 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
871 push(@{$self->{'_gsf_sub_array'}},$feat);
872
873}
874
875
876=head2 remove_SeqFeatures
877
878 Title : remove_SeqFeatures
879 Usage : $feat->remove_SeqFeatures;
880 Function: Removes all SeqFeatures
881
882 If you want to remove only a subset of features then remove that
883 subset from the returned array, and add back the rest.
884 Example :
885 Returns : The array of Bio::SeqFeatureI implementing features that was
886 deleted.
887 Args : none
888
889=cut
890
891sub remove_SeqFeatures {
892 my ($self) = @_;
893 my @subfeats = @{$self->{'_gsf_sub_array'} || []};
894 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
895 return @subfeats;
896}
897
898
899=head1 GFF-related methods
900
901=head2 gff_format
902
903 Title : gff_format
904 Usage : # get:
905 my $gffio = $feat->gff_format();
906 # set (change the default version of GFF2):
907 $feat->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
908 Function: Get/set the GFF format interpreter. This object is supposed to
909 format and parse GFF. See Bio::Tools::GFF for the interface.
910
911 If this method is called as class method, the default for all
912 newly created instances will be changed. Otherwise only this
913 instance will be affected.
914 Example :
915 Returns : a Bio::Tools::GFF compliant object
916 Args : On set, an instance of Bio::Tools::GFF or a derived object.
917
918=cut
919
920sub gff_format {
921 my ($self, $gffio) = @_;
922 if(defined($gffio)) {
923 if(ref($self)) {
924 $self->{'_gffio'} = $gffio;
925 } else {
926 $Bio::SeqFeatureI::static_gff_formatter = $gffio;
927 }
928 }
929 return (ref($self) && exists($self->{'_gffio'}) ?
930 $self->{'_gffio'} : $self->_static_gff_formatter);
931}
932
933
934=head2 gff_string
935
936 Title : gff_string
937 Usage : my $str = $feat->gff_string;
938 my $str = $feat->gff_string($gff_formatter);
939 Function: Provides the feature information in GFF format.
940
941 We override this here from Bio::SeqFeatureI in order to use the
942 formatter returned by gff_format().
943
944 Returns : A string
945 Args : Optionally, an object implementing gff_string().
946
947=cut
948
949sub gff_string {
950 my ($self,$formatter) = @_;
951 $formatter = $self->gff_format() unless $formatter;
952 return $formatter->gff_string($self);
953}
954
955
956=head2 slurp_gff_file
957
958 Title : slurp_file
959 Usage : my @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
960 Function: Sneaky function to load an entire file as in memory objects.
961 Beware of big files.
962
963 This method is deprecated. Use Bio::Tools::GFF instead, which can
964 also handle large files.
965
966 Example :
967 Returns :
968 Args :
969
970=cut
971
972sub slurp_gff_file {
973 my ($f) = @_;
974 my @out;
975 if ( !defined $f ) {
976 Bio::Root::Root->throw("Must have a filehandle");
977 }
978
979 Bio::Root::Root->deprecated( -message => "deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.",
980 -warn_version => '1.005',
981 -throw_version => '1.007',
982 );
983
984 while(<$f>) {
985 my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
986 push(@out, $sf);
987 }
988
989 return @out;
990}
991
992
993=head2 _from_gff_string
994
995 Title : _from_gff_string
996 Usage :
997 Function: Set feature properties from GFF string.
998
999 This method uses the object returned by gff_format() for the
1000 actual interpretation of the string. Set a different GFF format
1001 interpreter first if you need a specific version, like GFF1. (The
1002 default is GFF2.)
1003 Example :
1004 Returns :
1005 Args : a GFF-formatted string
1006
1007=cut
1008
1009sub _from_gff_string {
1010 my ($self, $string) = @_;
1011 $self->gff_format()->from_gff_string($self, $string);
1012}
1013
1014
1015=head2 _expand_region
1016
1017 Title : _expand_region
1018 Usage : $feat->_expand_region($feature);
1019 Function: Expand the total region covered by this feature to
1020 accommodate for the given feature.
1021
1022 May be called whenever any kind of subfeature is added to this
1023 feature. add_SeqFeature() already does this.
1024 Returns :
1025 Args : A Bio::SeqFeatureI implementing object.
1026
1027=cut
1028
1029sub _expand_region {
1030 my ($self, $feat) = @_;
1031 if(! $feat->isa('Bio::SeqFeatureI')) {
1032 $self->warn("$feat does not implement Bio::SeqFeatureI");
1033 }
1034 # if this doesn't have start set - forget it!
1035 # changed to reflect sanity checks for LocationI
1036 if(!$self->location->valid_Location) {
1037 $self->start($feat->start);
1038 $self->end($feat->end);
1039 $self->strand($feat->strand) unless $self->strand;
1040 } else {
1041 my ($start,$end,$strand) = $self->union($feat);
1042 $self->start($start);
1043 $self->end($end);
1044 $self->strand($strand);
1045 }
1046}
1047
1048
1049=head2 _parse
1050
1051 Title : _parse
1052 Usage :
1053 Function: Parsing hints
1054 Example :
1055 Returns :
1056 Args :
1057
1058=cut
1059
1060sub _parse {
1061 my ($self) = @_;
1062 return $self->{'_parse_h'};
1063}
1064
1065
1066=head2 _tag_value
1067
1068 Title : _tag_value
1069 Usage :
1070 Function: For internal use only. Convenience method for those tags that
1071 may only have a single value.
1072 Returns : The first value under the given tag as a scalar (string)
1073 Args : The tag as a string. Optionally, the value on set.
1074
1075=cut
1076
1077sub _tag_value {
1078 my $self = shift;
1079 my $tag = shift;
1080
1081 if(@_ || (! $self->has_tag($tag))) {
1082 $self->remove_tag($tag) if($self->has_tag($tag));
1083 $self->add_tag_value($tag, @_);
1084 }
1085 return ($self->get_tag_values($tag))[0];
1086}
1087
1088
1089#######################################################################
1090# aliases for methods that changed their names in an attempt to make #
1091# bioperl names more consistent #
1092#######################################################################
1093
1094sub seqname {
1095 my $self = shift;
1096 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
1097 return $self->seq_id(@_);
1098}
1099
1100sub display_id {
1101 my $self = shift;
1102 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
1103 return $self->display_name(@_);
1104}
1105
1106# this is towards consistent naming
1107sub each_tag_value { return shift->get_tag_values(@_); }
1108sub all_tags { return shift->get_all_tags(@_); }
1109
1110# we revamped the feature containing property to implementing
1111# Bio::FeatureHolderI
111213µs*sub_SeqFeature = \&get_SeqFeatures;
11131200ns*add_sub_SeqFeature = \&add_SeqFeature;
11141200ns*flush_sub_SeqFeatures = \&remove_SeqFeatures;
1115# this one is because of inconsistent naming ...
11161200ns*flush_sub_SeqFeature = \&remove_SeqFeatures;
1117
1118sub cleanup_generic {
1119 my $self = shift;
1120 foreach my $f ( @{$self->{'_gsf_sub_array'} || []} ) {
1121 $f = undef;
1122 }
1123 $self->{'_gsf_seq'} = undef;
1124 foreach my $t ( keys %{$self->{'_gsf_tag_hash'} } ) {
1125 $self->{'_gsf_tag_hash'}->{$t} = undef;
1126 delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix
1127 }
1128}
1129
113018µs1;