Filename | /Users/ap13/perl5/lib/perl5/Bio/SeqFeatureI.pm |
Statements | Executed 16 statements in 1.93ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 825µs | 4.91ms | BEGIN@99 | Bio::SeqFeatureI::
1 | 1 | 1 | 14µs | 45µs | BEGIN@97 | Bio::SeqFeatureI::
1 | 1 | 1 | 9µs | 47µs | BEGIN@107 | Bio::SeqFeatureI::
1 | 1 | 1 | 8µs | 66µs | BEGIN@109 | Bio::SeqFeatureI::
1 | 1 | 1 | 8µs | 20µs | BEGIN@98 | Bio::SeqFeatureI::
1 | 1 | 1 | 6µs | 6µs | BEGIN@105 | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | _static_gff_formatter | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | attach_seq | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | display_name | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | entire_seq | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | generate_unique_persistent_id | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | get_SeqFeatures | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | get_all_tags | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | get_tag_values | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | get_tagset_values | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | gff_string | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | has_tag | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | location | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | phase | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | primary_id | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | primary_tag | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | seq | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | seq_id | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | source_tag | Bio::SeqFeatureI::
0 | 0 | 0 | 0s | 0s | spliced_seq | Bio::SeqFeatureI::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | # BioPerl module for Bio::SeqFeatureI | ||||
3 | # | ||||
4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
5 | # | ||||
6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
7 | # | ||||
8 | # Copyright Ewan Birney | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | |||||
12 | # POD documentation - main docs before the code | ||||
13 | |||||
14 | =head1 NAME | ||||
15 | |||||
16 | Bio::SeqFeatureI - Abstract interface of a Sequence Feature | ||||
17 | |||||
18 | =head1 SYNOPSIS | ||||
19 | |||||
20 | # get a seqfeature somehow, eg, from a Sequence with Features attached | ||||
21 | |||||
22 | foreach $feat ( $seq->get_SeqFeatures() ) { | ||||
23 | print "Feature from ", $feat->start, "to ", | ||||
24 | $feat->end, " Primary tag ", $feat->primary_tag, | ||||
25 | ", produced by ", $feat->source_tag(), "\n"; | ||||
26 | |||||
27 | if ( $feat->strand == 0 ) { | ||||
28 | print "Feature applicable to either strand\n"; | ||||
29 | } | ||||
30 | else { | ||||
31 | print "Feature on strand ", $feat->strand,"\n"; # -1,1 | ||||
32 | } | ||||
33 | |||||
34 | print "feature location is ",$feat->start, "..", | ||||
35 | $feat->end, " on strand ", $feat->strand, "\n"; | ||||
36 | print "easy utility to print locations in GenBank/EMBL way ", | ||||
37 | $feat->location->to_FTstring(), "\n"; | ||||
38 | |||||
39 | foreach $tag ( $feat->get_all_tags() ) { | ||||
40 | print "Feature has tag ", $tag, " with values, ", | ||||
41 | join(' ',$feat->get_tag_values($tag)), "\n"; | ||||
42 | } | ||||
43 | print "new feature\n" if $feat->has_tag('new'); | ||||
44 | # features can have sub features | ||||
45 | my @subfeat = $feat->get_SeqFeatures(); | ||||
46 | } | ||||
47 | |||||
48 | =head1 DESCRIPTION | ||||
49 | |||||
50 | This interface is the functions one can expect for any Sequence | ||||
51 | Feature, whatever its implementation or whether it is a more complex | ||||
52 | type (eg, a Gene). This object does not actually provide any | ||||
53 | implementation, it just provides the definitions of what methods one can | ||||
54 | call. See Bio::SeqFeature::Generic for a good standard implementation | ||||
55 | of this object | ||||
56 | |||||
57 | =head1 FEEDBACK | ||||
58 | |||||
59 | User feedback is an integral part of the evolution of this and other | ||||
60 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
61 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
62 | |||||
63 | bioperl-l@bioperl.org - General discussion | ||||
64 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
65 | |||||
66 | =head2 Support | ||||
67 | |||||
68 | Please direct usage questions or support issues to the mailing list: | ||||
69 | |||||
70 | I<bioperl-l@bioperl.org> | ||||
71 | |||||
72 | rather than to the module maintainer directly. Many experienced and | ||||
73 | reponsive experts will be able look at the problem and quickly | ||||
74 | address it. Please include a thorough description of the problem | ||||
75 | with code and data examples if at all possible. | ||||
76 | |||||
77 | =head2 Reporting Bugs | ||||
78 | |||||
79 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
80 | the bugs and their resolution. Bug reports can be submitted via the | ||||
81 | web: | ||||
82 | |||||
83 | https://github.com/bioperl/bioperl-live/issues | ||||
84 | |||||
85 | =head1 APPENDIX | ||||
86 | |||||
87 | The rest of the documentation details each of the object | ||||
88 | methods. Internal methods are usually preceded with a _ | ||||
89 | |||||
90 | =cut | ||||
91 | |||||
92 | |||||
93 | # Let the code begin... | ||||
94 | |||||
95 | |||||
96 | package Bio::SeqFeatureI; | ||||
97 | 2 | 27µs | 2 | 75µs | # spent 45µs (14+30) within Bio::SeqFeatureI::BEGIN@97 which was called:
# once (14µs+30µs) by base::import at line 97 # spent 45µs making 1 call to Bio::SeqFeatureI::BEGIN@97
# spent 30µs making 1 call to vars::import |
98 | 2 | 45µs | 2 | 32µs | # spent 20µs (8+12) within Bio::SeqFeatureI::BEGIN@98 which was called:
# once (8µs+12µs) by base::import at line 98 # spent 20µs making 1 call to Bio::SeqFeatureI::BEGIN@98
# spent 12µs making 1 call to strict::import |
99 | # spent 4.91ms (825µs+4.08) within Bio::SeqFeatureI::BEGIN@99 which was called:
# once (825µs+4.08ms) by base::import at line 103 | ||||
100 | 4 | 108µs | eval { require Bio::DB::InMemoryCache }; | ||
101 | if( $@ ) { $HasInMemory = 0 } | ||||
102 | else { $HasInMemory = 1 } | ||||
103 | 1 | 23µs | 1 | 4.91ms | } # spent 4.91ms making 1 call to Bio::SeqFeatureI::BEGIN@99 |
104 | |||||
105 | 2 | 22µs | 1 | 6µs | # spent 6µs within Bio::SeqFeatureI::BEGIN@105 which was called:
# once (6µs+0s) by base::import at line 105 # spent 6µs making 1 call to Bio::SeqFeatureI::BEGIN@105 |
106 | |||||
107 | 2 | 25µs | 2 | 86µs | # spent 47µs (9+38) within Bio::SeqFeatureI::BEGIN@107 which was called:
# once (9µs+38µs) by base::import at line 107 # spent 47µs making 1 call to Bio::SeqFeatureI::BEGIN@107
# spent 38µs making 1 call to Exporter::import |
108 | |||||
109 | 2 | 1.68ms | 2 | 66µs | # spent 66µs (8+58) within Bio::SeqFeatureI::BEGIN@109 which was called:
# once (8µs+58µs) by base::import at line 109 # spent 66µs making 1 call to Bio::SeqFeatureI::BEGIN@109
# spent 58µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 58µs |
110 | |||||
111 | =head1 Bio::SeqFeatureI specific methods | ||||
112 | |||||
113 | New method interfaces. | ||||
114 | |||||
115 | =cut | ||||
116 | |||||
117 | =head2 get_SeqFeatures | ||||
118 | |||||
119 | Title : get_SeqFeatures | ||||
120 | Usage : @feats = $feat->get_SeqFeatures(); | ||||
121 | Function: Returns an array of sub Sequence Features | ||||
122 | Returns : An array | ||||
123 | Args : none | ||||
124 | |||||
125 | =cut | ||||
126 | |||||
127 | sub get_SeqFeatures{ | ||||
128 | my ($self,@args) = @_; | ||||
129 | |||||
130 | $self->throw_not_implemented(); | ||||
131 | } | ||||
132 | |||||
133 | =head2 display_name | ||||
134 | |||||
135 | Title : display_name | ||||
136 | Usage : $name = $feat->display_name() | ||||
137 | Function: Returns the human-readable name of the feature for displays. | ||||
138 | Returns : a string | ||||
139 | Args : none | ||||
140 | |||||
141 | =cut | ||||
142 | |||||
143 | sub display_name { | ||||
144 | shift->throw_not_implemented(); | ||||
145 | } | ||||
146 | |||||
147 | =head2 primary_tag | ||||
148 | |||||
149 | Title : primary_tag | ||||
150 | Usage : $tag = $feat->primary_tag() | ||||
151 | Function: Returns the primary tag for a feature, | ||||
152 | eg 'exon' | ||||
153 | Returns : a string | ||||
154 | Args : none | ||||
155 | |||||
156 | |||||
157 | =cut | ||||
158 | |||||
159 | sub primary_tag{ | ||||
160 | my ($self,@args) = @_; | ||||
161 | |||||
162 | $self->throw_not_implemented(); | ||||
163 | |||||
164 | } | ||||
165 | |||||
166 | =head2 source_tag | ||||
167 | |||||
168 | Title : source_tag | ||||
169 | Usage : $tag = $feat->source_tag() | ||||
170 | Function: Returns the source tag for a feature, | ||||
171 | eg, 'genscan' | ||||
172 | Returns : a string | ||||
173 | Args : none | ||||
174 | |||||
175 | |||||
176 | =cut | ||||
177 | |||||
178 | sub source_tag{ | ||||
179 | my ($self,@args) = @_; | ||||
180 | |||||
181 | $self->throw_not_implemented(); | ||||
182 | } | ||||
183 | |||||
184 | =head2 has_tag | ||||
185 | |||||
186 | Title : has_tag | ||||
187 | Usage : $tag_exists = $self->has_tag('some_tag') | ||||
188 | Function: | ||||
189 | Returns : TRUE if the specified tag exists, and FALSE otherwise | ||||
190 | Args : | ||||
191 | |||||
192 | =cut | ||||
193 | |||||
194 | sub has_tag{ | ||||
195 | my ($self,@args) = @_; | ||||
196 | |||||
197 | $self->throw_not_implemented(); | ||||
198 | |||||
199 | } | ||||
200 | |||||
201 | =head2 get_tag_values | ||||
202 | |||||
203 | Title : get_tag_values | ||||
204 | Usage : @values = $self->get_tag_values('some_tag') | ||||
205 | Function: | ||||
206 | Returns : An array comprising the values of the specified tag. | ||||
207 | Args : a string | ||||
208 | |||||
209 | throws an exception if there is no such tag | ||||
210 | |||||
211 | =cut | ||||
212 | |||||
213 | sub get_tag_values { | ||||
214 | shift->throw_not_implemented(); | ||||
215 | } | ||||
216 | |||||
217 | =head2 get_tagset_values | ||||
218 | |||||
219 | Title : get_tagset_values | ||||
220 | Usage : @values = $self->get_tagset_values(qw(label transcript_id product)) | ||||
221 | Function: | ||||
222 | Returns : An array comprising the values of the specified tags, in order of tags | ||||
223 | Args : An array of strings | ||||
224 | |||||
225 | does NOT throw an exception if none of the tags are not present | ||||
226 | |||||
227 | this method is useful for getting a human-readable label for a | ||||
228 | SeqFeatureI; not all tags can be assumed to be present, so a list of | ||||
229 | possible tags in preferential order is provided | ||||
230 | |||||
231 | =cut | ||||
232 | |||||
233 | # interface + abstract method | ||||
234 | sub get_tagset_values { | ||||
235 | my ($self, @args) = @_; | ||||
236 | my @vals = (); | ||||
237 | foreach my $arg (@args) { | ||||
238 | if ($self->has_tag($arg)) { | ||||
239 | push(@vals, $self->get_tag_values($arg)); | ||||
240 | } | ||||
241 | } | ||||
242 | return @vals; | ||||
243 | } | ||||
244 | |||||
245 | =head2 get_all_tags | ||||
246 | |||||
247 | Title : get_all_tags | ||||
248 | Usage : @tags = $feat->get_all_tags() | ||||
249 | Function: gives all tags for this feature | ||||
250 | Returns : an array of strings | ||||
251 | Args : none | ||||
252 | |||||
253 | |||||
254 | =cut | ||||
255 | |||||
256 | sub get_all_tags{ | ||||
257 | shift->throw_not_implemented(); | ||||
258 | } | ||||
259 | |||||
260 | =head2 attach_seq | ||||
261 | |||||
262 | Title : attach_seq | ||||
263 | Usage : $sf->attach_seq($seq) | ||||
264 | Function: Attaches a Bio::Seq object to this feature. This | ||||
265 | Bio::Seq object is for the *entire* sequence: ie | ||||
266 | from 1 to 10000 | ||||
267 | |||||
268 | Note that it is not guaranteed that if you obtain a feature from | ||||
269 | an object in bioperl, it will have a sequence attached. Also, | ||||
270 | implementors of this interface can choose to provide an empty | ||||
271 | implementation of this method. I.e., there is also no guarantee | ||||
272 | that if you do attach a sequence, seq() or entire_seq() will not | ||||
273 | return undef. | ||||
274 | |||||
275 | The reason that this method is here on the interface is to enable | ||||
276 | you to call it on every SeqFeatureI compliant object, and | ||||
277 | that it will be implemented in a useful way and set to a useful | ||||
278 | value for the great majority of use cases. Implementors who choose | ||||
279 | to ignore the call are encouraged to specifically state this in | ||||
280 | their documentation. | ||||
281 | |||||
282 | Example : | ||||
283 | Returns : TRUE on success | ||||
284 | Args : a Bio::PrimarySeqI compliant object | ||||
285 | |||||
286 | |||||
287 | =cut | ||||
288 | |||||
289 | sub attach_seq { | ||||
290 | shift->throw_not_implemented(); | ||||
291 | } | ||||
292 | |||||
293 | =head2 seq | ||||
294 | |||||
295 | Title : seq | ||||
296 | Usage : $tseq = $sf->seq() | ||||
297 | Function: returns the truncated sequence (if there is a sequence attached) | ||||
298 | for this feature | ||||
299 | Example : | ||||
300 | Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence | ||||
301 | bounded by start & end, or undef if there is no sequence attached | ||||
302 | Args : none | ||||
303 | |||||
304 | |||||
305 | =cut | ||||
306 | |||||
307 | sub seq { | ||||
308 | shift->throw_not_implemented(); | ||||
309 | } | ||||
310 | |||||
311 | =head2 entire_seq | ||||
312 | |||||
313 | Title : entire_seq | ||||
314 | Usage : $whole_seq = $sf->entire_seq() | ||||
315 | Function: gives the entire sequence that this seqfeature is attached to | ||||
316 | Example : | ||||
317 | Returns : a Bio::PrimarySeqI compliant object, or undef if there is no | ||||
318 | sequence attached | ||||
319 | Args : none | ||||
320 | |||||
321 | |||||
322 | =cut | ||||
323 | |||||
324 | sub entire_seq { | ||||
325 | shift->throw_not_implemented(); | ||||
326 | } | ||||
327 | |||||
328 | |||||
329 | =head2 seq_id | ||||
330 | |||||
331 | Title : seq_id | ||||
332 | Usage : $obj->seq_id($newval) | ||||
333 | Function: There are many cases when you make a feature that you | ||||
334 | do know the sequence name, but do not know its actual | ||||
335 | sequence. This is an attribute such that you can store | ||||
336 | the ID (e.g., display_id) of the sequence. | ||||
337 | |||||
338 | This attribute should *not* be used in GFF dumping, as | ||||
339 | that should come from the collection in which the seq | ||||
340 | feature was found. | ||||
341 | Returns : value of seq_id | ||||
342 | Args : newvalue (optional) | ||||
343 | |||||
344 | |||||
345 | =cut | ||||
346 | |||||
347 | sub seq_id { | ||||
348 | shift->throw_not_implemented(); | ||||
349 | } | ||||
350 | |||||
351 | =head2 gff_string | ||||
352 | |||||
353 | Title : gff_string | ||||
354 | Usage : $str = $feat->gff_string; | ||||
355 | $str = $feat->gff_string($gff_formatter); | ||||
356 | Function: Provides the feature information in GFF format. | ||||
357 | |||||
358 | The implementation provided here returns GFF2 by default. If you | ||||
359 | want a different version, supply an object implementing a method | ||||
360 | gff_string() accepting a SeqFeatureI object as argument. E.g., to | ||||
361 | obtain GFF1 format, do the following: | ||||
362 | |||||
363 | my $gffio = Bio::Tools::GFF->new(-gff_version => 1); | ||||
364 | $gff1str = $feat->gff_string($gff1io); | ||||
365 | |||||
366 | Returns : A string | ||||
367 | Args : Optionally, an object implementing gff_string(). | ||||
368 | |||||
369 | |||||
370 | =cut | ||||
371 | |||||
372 | sub gff_string{ | ||||
373 | my ($self,$formatter) = @_; | ||||
374 | |||||
375 | $formatter = $self->_static_gff_formatter unless $formatter; | ||||
376 | return $formatter->gff_string($self); | ||||
377 | } | ||||
378 | |||||
379 | 1 | 700ns | my $static_gff_formatter = undef; | ||
380 | |||||
381 | =head2 _static_gff_formatter | ||||
382 | |||||
383 | Title : _static_gff_formatter | ||||
384 | Usage : | ||||
385 | Function: | ||||
386 | Example : | ||||
387 | Returns : | ||||
388 | Args : | ||||
389 | |||||
390 | |||||
391 | =cut | ||||
392 | |||||
393 | sub _static_gff_formatter{ | ||||
394 | my ($self,@args) = @_; | ||||
395 | require Bio::Tools::GFF; # on the fly inclusion -- is this better? | ||||
396 | if( !defined $static_gff_formatter ) { | ||||
397 | $static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2); | ||||
398 | } | ||||
399 | return $static_gff_formatter; | ||||
400 | } | ||||
401 | |||||
402 | |||||
403 | =head1 Decorating methods | ||||
404 | |||||
405 | These methods have an implementation provided by Bio::SeqFeatureI, | ||||
406 | but can be validly overwritten by subclasses | ||||
407 | |||||
408 | =head2 spliced_seq | ||||
409 | |||||
410 | Title : spliced_seq | ||||
411 | |||||
412 | Usage : $seq = $feature->spliced_seq() | ||||
413 | $seq = $feature_with_remote_locations->spliced_seq($db_for_seqs) | ||||
414 | |||||
415 | Function: Provides a sequence of the feature which is the most | ||||
416 | semantically "relevant" feature for this sequence. A default | ||||
417 | implementation is provided which for simple cases returns just | ||||
418 | the sequence, but for split cases, loops over the split location | ||||
419 | to return the sequence. In the case of split locations with | ||||
420 | remote locations, eg | ||||
421 | |||||
422 | join(AB000123:5567-5589,80..1144) | ||||
423 | |||||
424 | in the case when a database object is passed in, it will attempt | ||||
425 | to retrieve the sequence from the database object, and "Do the right thing", | ||||
426 | however if no database object is provided, it will generate the correct | ||||
427 | number of N's (DNA) or X's (protein, though this is unlikely). | ||||
428 | |||||
429 | This function is deliberately "magical" attempting to second guess | ||||
430 | what a user wants as "the" sequence for this feature. | ||||
431 | |||||
432 | Implementing classes are free to override this method with their | ||||
433 | own magic if they have a better idea what the user wants. | ||||
434 | |||||
435 | Args : [optional] | ||||
436 | -db A L<Bio::DB::RandomAccessI> compliant object if | ||||
437 | one needs to retrieve remote seqs. | ||||
438 | -nosort boolean if the locations should not be sorted | ||||
439 | by start location. This may occur, for instance, | ||||
440 | in a circular sequence where a gene span starts | ||||
441 | before the end of the sequence and ends after the | ||||
442 | sequence start. Example : join(15685..16260,1..207) | ||||
443 | (default = if sequence is_circular(), 1, otherwise 0) | ||||
444 | -phase truncates the returned sequence based on the | ||||
445 | intron phase (0,1,2). | ||||
446 | |||||
447 | Returns : A L<Bio::PrimarySeqI> object | ||||
448 | |||||
449 | =cut | ||||
450 | |||||
451 | sub spliced_seq { | ||||
452 | my $self = shift; | ||||
453 | my @args = @_; | ||||
454 | my ($db, $nosort, $phase) = | ||||
455 | $self->_rearrange([qw(DB NOSORT PHASE)], @args); | ||||
456 | |||||
457 | # set no_sort based on the parent sequence status | ||||
458 | if ($self->entire_seq->is_circular) { | ||||
459 | $nosort = 1; | ||||
460 | } | ||||
461 | |||||
462 | # (added 7/7/06 to allow use old API (with warnings) | ||||
463 | my $old_api = (!(grep {$_ =~ /(?:nosort|db|phase)/} @args)) ? 1 : 0; | ||||
464 | if (@args && $old_api) { | ||||
465 | $self->warn( q(API has changed; please use '-db' or '-nosort' ) | ||||
466 | . qq(for args. See POD for more details.)); | ||||
467 | $db = shift @args if @args; | ||||
468 | $nosort = shift @args if @args; | ||||
469 | $phase = shift @args if @args; | ||||
470 | }; | ||||
471 | |||||
472 | if (defined($phase) && ($phase < 0 || $phase > 2)) { | ||||
473 | $self->warn("Phase must be 0,1, or 2. Setting phase to 0..."); | ||||
474 | $phase = 0; | ||||
475 | } | ||||
476 | |||||
477 | if ( $db && ref($db) && ! $db->isa('Bio::DB::RandomAccessI') ) { | ||||
478 | $self->warn( "Must pass in a valid Bio::DB::RandomAccessI object" | ||||
479 | . " for access to remote locations for spliced_seq"); | ||||
480 | $db = undef; | ||||
481 | } | ||||
482 | elsif ( defined $db && $HasInMemory && $db->isa('Bio::DB::InMemoryCache') ) { | ||||
483 | $db = Bio::DB::InMemoryCache->new(-seqdb => $db); | ||||
484 | } | ||||
485 | |||||
486 | if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { | ||||
487 | if ($phase) { | ||||
488 | $self->debug("Subseq start: ",$phase+1,"\tend: ",$self->end,"\n"); | ||||
489 | my $seqstr = substr($self->seq->seq, $phase); | ||||
490 | my $out = Bio::Seq->new( -id => $self->entire_seq->display_id | ||||
491 | . "_spliced_feat", | ||||
492 | -seq => $seqstr); | ||||
493 | return $out; | ||||
494 | } | ||||
495 | else { | ||||
496 | return $self->seq(); # nice and easy! | ||||
497 | } | ||||
498 | } | ||||
499 | |||||
500 | # redundant test, but the above ISA is probably not ideal. | ||||
501 | if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { | ||||
502 | $self->throw("not atomic, not split, yikes, in trouble!"); | ||||
503 | } | ||||
504 | |||||
505 | my $seqstr = ''; | ||||
506 | my $seqid = $self->entire_seq->display_id; | ||||
507 | # This is to deal with reverse strand features | ||||
508 | # so we are really sorting features 5' -> 3' on their strand | ||||
509 | # i.e. rev strand features will be sorted largest to smallest | ||||
510 | # as this how revcom CDSes seem to be annotated in genbank. | ||||
511 | # Might need to eventually allow this to be programable? | ||||
512 | # (can I mention how much fun this is NOT! --jason) | ||||
513 | |||||
514 | my ($mixed,$mixedloc, $fstrand) = (0); | ||||
515 | |||||
516 | if ( $self->isa('Bio::Das::SegmentI') and not $self->absolute ) { | ||||
517 | $self->warn( "Calling spliced_seq with a Bio::Das::SegmentI which " | ||||
518 | . "does have absolute set to 1 -- be warned you may not " | ||||
519 | . "be getting things on the correct strand"); | ||||
520 | } | ||||
521 | |||||
522 | my @locset = $self->location->each_Location; | ||||
523 | my @locs; | ||||
524 | if ( not $nosort ) { | ||||
525 | @locs = map { $_->[0] } | ||||
526 | # sort so that most negative is first basically to order | ||||
527 | # the features on the opposite strand 5'->3' on their strand | ||||
528 | # rather than they way most are input which is on the fwd strand | ||||
529 | |||||
530 | sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation | ||||
531 | map { | ||||
532 | $fstrand = $_->strand unless defined $fstrand; | ||||
533 | $mixed = 1 if defined $_->strand && $fstrand != $_->strand; | ||||
534 | |||||
535 | if( defined $_->seq_id ) { | ||||
536 | $mixedloc = 1 if( $_->seq_id ne $seqid ); | ||||
537 | } | ||||
538 | [ $_, $_->start * ($_->strand || 1) ]; | ||||
539 | } @locset; | ||||
540 | |||||
541 | if ( $mixed ) { | ||||
542 | $self->warn( "Mixed strand locations, spliced seq using the " | ||||
543 | . "input order rather than trying to sort"); | ||||
544 | @locs = @locset; | ||||
545 | } | ||||
546 | } | ||||
547 | else { | ||||
548 | # use the original order instead of trying to sort | ||||
549 | @locs = @locset; | ||||
550 | $fstrand = $locs[0]->strand; | ||||
551 | } | ||||
552 | |||||
553 | |||||
554 | my $last_id = undef; | ||||
555 | my $called_seq = undef; | ||||
556 | # This will be left as undefined if 1) db is remote or 2)seq_id is undefined. | ||||
557 | # In that case, old code is used to make exon sequence | ||||
558 | my $called_seq_seq = undef; | ||||
559 | my $called_seq_len = undef; | ||||
560 | |||||
561 | foreach my $loc ( @locs ) { | ||||
562 | if ( not $loc->isa("Bio::Location::Atomic") ) { | ||||
563 | $self->throw("Can only deal with one level deep locations"); | ||||
564 | } | ||||
565 | |||||
566 | if ( $fstrand != $loc->strand ) { | ||||
567 | $self->warn("feature strand is different from location strand!"); | ||||
568 | } | ||||
569 | |||||
570 | my $loc_seq_id; | ||||
571 | if ( defined $loc->seq_id ) { | ||||
572 | $loc_seq_id = $loc->seq_id; | ||||
573 | |||||
574 | # deal with remote sequences | ||||
575 | if ($loc_seq_id ne $seqid ) { | ||||
576 | # might be too big to download whole sequence | ||||
577 | $called_seq_seq = undef; | ||||
578 | |||||
579 | if ( defined $db ) { | ||||
580 | my $sid = $loc_seq_id; | ||||
581 | $sid =~ s/\.\d+$//g; | ||||
582 | eval { | ||||
583 | $called_seq = $db->get_Seq_by_acc($sid); | ||||
584 | }; | ||||
585 | if( $@ ) { | ||||
586 | $self->warn( "In attempting to join a remote location, sequence $sid " | ||||
587 | . "was not in database. Will provide padding N's. Full exception \n\n$@"); | ||||
588 | $called_seq = undef; | ||||
589 | } | ||||
590 | } | ||||
591 | else { | ||||
592 | $self->warn( "cannot get remote location for ".$loc_seq_id ." without a valid " | ||||
593 | . "Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)"); | ||||
594 | $called_seq = undef; | ||||
595 | } | ||||
596 | if ( !defined $called_seq ) { | ||||
597 | $seqstr .= 'N' x $loc->length; | ||||
598 | next; | ||||
599 | } | ||||
600 | } | ||||
601 | # have local sequence available | ||||
602 | else { | ||||
603 | # don't have to pull out source sequence again if it's local unless | ||||
604 | # it's the first exon or different from previous exon | ||||
605 | unless (defined(($last_id) && $last_id eq $loc_seq_id )){ | ||||
606 | $called_seq = $self->entire_seq; | ||||
607 | $called_seq_seq = $called_seq->seq(); # this is slow | ||||
608 | } | ||||
609 | } | ||||
610 | } | ||||
611 | #undefined $loc->seq->id | ||||
612 | else { | ||||
613 | $called_seq = $self->entire_seq; | ||||
614 | $called_seq_seq = undef; | ||||
615 | } | ||||
616 | |||||
617 | my ($start,$end) = ($loc->start,$loc->end); | ||||
618 | |||||
619 | # does the called sequence make sense? Bug 1780 | ||||
620 | my $called_seq_len; | ||||
621 | |||||
622 | # can avoid a seq() call on called_seq | ||||
623 | if (defined($called_seq_seq)) { | ||||
624 | $called_seq_len = length($called_seq_seq); | ||||
625 | } | ||||
626 | # can't avoid a seq() call on called_seq | ||||
627 | else { | ||||
628 | $called_seq_len = $called_seq->length # this is slow | ||||
629 | } | ||||
630 | |||||
631 | if ($called_seq_len < $loc->end) { | ||||
632 | my $accession = $called_seq->accession; | ||||
633 | my $orig_id = $self->seq_id; # originating sequence | ||||
634 | my ($locus) = $self->get_tagset_values("locus_tag"); | ||||
635 | $self->throw( "Location end ($end) exceeds length ($called_seq_len) of " | ||||
636 | . "called sequence $accession.\nCheck sequence version used in " | ||||
637 | . "$locus locus-tagged SeqFeature in $orig_id."); | ||||
638 | } | ||||
639 | |||||
640 | if ( $self->isa('Bio::Das::SegmentI') ) { | ||||
641 | # $called_seq is Bio::DB::GFF::RelSegment, as well as its subseq(); | ||||
642 | # Bio::DB::GFF::RelSegment::seq() returns a Bio::PrimarySeq, and using seq() | ||||
643 | # in turn returns a string. Confused? | ||||
644 | $seqstr .= $called_seq->subseq($start,$end)->seq()->seq(); # this is slow | ||||
645 | } | ||||
646 | else { | ||||
647 | my $exon_seq; | ||||
648 | if (defined ($called_seq_seq)){ | ||||
649 | $exon_seq = substr($called_seq_seq, $start-1, $end-$start+1); # this is quick | ||||
650 | } | ||||
651 | else { | ||||
652 | $exon_seq = $called_seq->subseq($loc->start,$loc->end); # this is slow | ||||
653 | } | ||||
654 | |||||
655 | # If guide_strand is defined, assemble the sequence first and revcom later if needed, | ||||
656 | # if its not defined, apply revcom immediately to proper locations | ||||
657 | if (defined $self->location->guide_strand) { | ||||
658 | $seqstr .= $exon_seq; | ||||
659 | } | ||||
660 | else { | ||||
661 | my $strand = defined ($loc->strand) ? ($loc->strand) : 0; | ||||
662 | |||||
663 | # revcomp $exon_seq | ||||
664 | if ($strand == -1) { | ||||
665 | $exon_seq = reverse($exon_seq); | ||||
666 | $exon_seq =~ tr/ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy/TVGHCDMKNYSAABWXRtvghcdmknysaabwxr/; | ||||
667 | $seqstr .= $exon_seq; | ||||
668 | } | ||||
669 | else { | ||||
670 | $seqstr .= $exon_seq; | ||||
671 | } | ||||
672 | } | ||||
673 | } | ||||
674 | |||||
675 | $last_id = $loc_seq_id if (defined($loc_seq_id)); | ||||
676 | } #next $loc | ||||
677 | |||||
678 | # Use revcom only after the whole sequence has been assembled | ||||
679 | my $guide_strand = defined ($self->location->guide_strand) ? ($self->location->guide_strand) : 0; | ||||
680 | if ($guide_strand == -1) { | ||||
681 | my $seqstr_obj = Bio::Seq->new(-seq => $seqstr); | ||||
682 | $seqstr = $seqstr_obj->revcom->seq; | ||||
683 | } | ||||
684 | |||||
685 | if (defined($phase)) { | ||||
686 | $seqstr = substr($seqstr, $phase); | ||||
687 | } | ||||
688 | |||||
689 | my $out = Bio::Seq->new( -id => $self->entire_seq->display_id | ||||
690 | . "_spliced_feat", | ||||
691 | -seq => $seqstr); | ||||
692 | |||||
693 | return $out; | ||||
694 | } | ||||
695 | |||||
696 | =head2 location | ||||
697 | |||||
698 | Title : location | ||||
699 | Usage : my $location = $seqfeature->location() | ||||
700 | Function: returns a location object suitable for identifying location | ||||
701 | of feature on sequence or parent feature | ||||
702 | Returns : Bio::LocationI object | ||||
703 | Args : none | ||||
704 | |||||
705 | |||||
706 | =cut | ||||
707 | |||||
708 | sub location { | ||||
709 | my ($self) = @_; | ||||
710 | |||||
711 | $self->throw_not_implemented(); | ||||
712 | } | ||||
713 | |||||
714 | |||||
715 | =head2 primary_id | ||||
716 | |||||
717 | Title : primary_id | ||||
718 | Usage : $obj->primary_id($newval) | ||||
719 | Function: | ||||
720 | Example : | ||||
721 | Returns : value of primary_id (a scalar) | ||||
722 | Args : on set, new value (a scalar or undef, optional) | ||||
723 | |||||
724 | Primary ID is a synonym for the tag 'ID' | ||||
725 | |||||
726 | =cut | ||||
727 | |||||
728 | sub primary_id{ | ||||
729 | my $self = shift; | ||||
730 | # note from cjm@fruitfly.org: | ||||
731 | # I have commented out the following 2 lines: | ||||
732 | |||||
733 | #return $self->{'primary_id'} = shift if @_; | ||||
734 | #return $self->{'primary_id'}; | ||||
735 | |||||
736 | #... and replaced it with the following; see | ||||
737 | # http://bioperl.org/pipermail/bioperl-l/2003-December/014150.html | ||||
738 | # for the discussion that lead to this change | ||||
739 | |||||
740 | if (@_) { | ||||
741 | if ($self->has_tag('ID')) { | ||||
742 | $self->remove_tag('ID'); | ||||
743 | } | ||||
744 | $self->add_tag_value('ID', shift); | ||||
745 | } | ||||
746 | my ($id) = $self->get_tagset_values('ID'); | ||||
747 | return $id; | ||||
748 | } | ||||
749 | |||||
750 | sub generate_unique_persistent_id { | ||||
751 | # DEPRECATED - us IDHandler | ||||
752 | my $self = shift; | ||||
753 | require Bio::SeqFeature::Tools::IDHandler; | ||||
754 | Bio::SeqFeature::Tools::IDHandler->new->generate_unique_persistent_id($self); | ||||
755 | } | ||||
756 | |||||
757 | |||||
758 | =head2 phase | ||||
759 | |||||
760 | Title : phase | ||||
761 | Usage : $obj->phase($newval) | ||||
762 | Function: get/set this feature's phase. | ||||
763 | Example : | ||||
764 | Returns : undef if no phase is set, | ||||
765 | otherwise 0, 1, or 2 (the only valid values for phase) | ||||
766 | Args : on set, the new value | ||||
767 | |||||
768 | Most features do not have or need a defined phase. | ||||
769 | |||||
770 | For features representing a CDS, the phase indicates where the feature | ||||
771 | begins with reference to the reading frame. The phase is one of the | ||||
772 | integers 0, 1, or 2, indicating the number of bases that should be | ||||
773 | removed from the beginning of this feature to reach the first base of | ||||
774 | the next codon. In other words, a phase of "0" indicates that the next | ||||
775 | codon begins at the first base of the region described by the current | ||||
776 | line, a phase of "1" indicates that the next codon begins at the | ||||
777 | second base of this region, and a phase of "2" indicates that the | ||||
778 | codon begins at the third base of this region. This is NOT to be | ||||
779 | confused with the frame, which is simply start modulo 3. | ||||
780 | |||||
781 | For forward strand features, phase is counted from the start | ||||
782 | field. For reverse strand features, phase is counted from the end | ||||
783 | field. | ||||
784 | |||||
785 | =cut | ||||
786 | |||||
787 | sub phase { | ||||
788 | my $self = shift; | ||||
789 | if( @_ ) { | ||||
790 | $self->remove_tag('phase') if $self->has_tag('phase'); | ||||
791 | my $newphase = shift; | ||||
792 | $self->throw("illegal phase value '$newphase', phase must be either undef, 0, 1, or 2") | ||||
793 | unless !defined $newphase || $newphase == 0 || $newphase == 1 || $newphase == 2; | ||||
794 | $self->add_tag_value('phase', $newphase ); | ||||
795 | return $newphase; | ||||
796 | } | ||||
797 | |||||
798 | return $self->has_tag('phase') ? ($self->get_tag_values('phase'))[0] : undef; | ||||
799 | } | ||||
800 | |||||
801 | |||||
802 | =head1 Bio::RangeI methods | ||||
803 | |||||
804 | These methods are inherited from RangeI and can be used | ||||
805 | directly from a SeqFeatureI interface. Remember that a | ||||
806 | SeqFeature is-a RangeI, and so wherever you see RangeI you | ||||
807 | can use a feature ($r in the below documentation). | ||||
808 | |||||
809 | =cut | ||||
810 | |||||
811 | =head2 start() | ||||
812 | |||||
813 | See L<Bio::RangeI> | ||||
814 | |||||
815 | =head2 end() | ||||
816 | |||||
817 | See L<Bio::RangeI> | ||||
818 | |||||
819 | =head2 strand() | ||||
820 | |||||
821 | See L<Bio::RangeI> | ||||
822 | |||||
823 | =head2 overlaps() | ||||
824 | |||||
825 | See L<Bio::RangeI> | ||||
826 | |||||
827 | =head2 contains() | ||||
828 | |||||
829 | See L<Bio::RangeI> | ||||
830 | |||||
831 | =head2 equals() | ||||
832 | |||||
833 | See L<Bio::RangeI> | ||||
834 | |||||
835 | =head2 intersection() | ||||
836 | |||||
837 | See L<Bio::RangeI> | ||||
838 | |||||
839 | =head2 union() | ||||
840 | |||||
841 | See L<Bio::RangeI> | ||||
842 | |||||
843 | =cut | ||||
844 | |||||
845 | 1 | 4µs | 1; |