Filename | /Users/ap13/perl5/lib/perl5/Bio/Factory/FTLocationFactory.pm |
Statements | Executed 14 statements in 1.14ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 20µs | 60µs | BEGIN@92 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 19µs | 37µs | BEGIN@93 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 9µs | 481µs | BEGIN@102 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 9µs | 15µs | BEGIN@104 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 7µs | 7µs | BEGIN@97 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 6µs | 6µs | BEGIN@98 | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 6µs | 6µs | CORE:qr (opcode) | Bio::Factory::FTLocationFactory::
1 | 1 | 1 | 5µs | 5µs | BEGIN@99 | Bio::Factory::FTLocationFactory::
0 | 0 | 0 | 0s | 0s | _parse_location | Bio::Factory::FTLocationFactory::
0 | 0 | 0 | 0s | 0s | from_string | Bio::Factory::FTLocationFactory::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | # BioPerl module for Bio::Factory::FTLocationFactory | ||||
3 | # | ||||
4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
5 | # | ||||
6 | # Cared for by Hilmar Lapp <hlapp at gmx.net> | ||||
7 | # | ||||
8 | # Copyright Hilmar Lapp | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | # | ||||
12 | # (c) Hilmar Lapp, hlapp at gnf.org, 2002. | ||||
13 | # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. | ||||
14 | # | ||||
15 | # You may distribute this module under the same terms as perl itself. | ||||
16 | # Refer to the Perl Artistic License (see the license accompanying this | ||||
17 | # software package, or see http://www.perl.com/language/misc/Artistic.html) | ||||
18 | # for the terms under which you may use, modify, and redistribute this module. | ||||
19 | # | ||||
20 | # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED | ||||
21 | # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | ||||
22 | # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||||
23 | # | ||||
24 | |||||
25 | # POD documentation - main docs before the code | ||||
26 | |||||
27 | =head1 NAME | ||||
28 | |||||
29 | Bio::Factory::FTLocationFactory - A FeatureTable Location Parser | ||||
30 | |||||
31 | =head1 SYNOPSIS | ||||
32 | |||||
33 | # parse a string into a location object | ||||
34 | $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, | ||||
35 | 400..500"); | ||||
36 | |||||
37 | =head1 DESCRIPTION | ||||
38 | |||||
39 | Implementation of string-encoded location parsing for the Genbank feature | ||||
40 | table encoding of locations. | ||||
41 | |||||
42 | =head1 FEEDBACK | ||||
43 | |||||
44 | =head2 Mailing Lists | ||||
45 | |||||
46 | User feedback is an integral part of the evolution of this and other | ||||
47 | Bioperl modules. Send your comments and suggestions preferably to | ||||
48 | the Bioperl mailing list. Your participation is much appreciated. | ||||
49 | |||||
50 | bioperl-l@bioperl.org - General discussion | ||||
51 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
52 | |||||
53 | =head2 Support | ||||
54 | |||||
55 | Please direct usage questions or support issues to the mailing list: | ||||
56 | |||||
57 | I<bioperl-l@bioperl.org> | ||||
58 | |||||
59 | rather than to the module maintainer directly. Many experienced and | ||||
60 | reponsive experts will be able look at the problem and quickly | ||||
61 | address it. Please include a thorough description of the problem | ||||
62 | with code and data examples if at all possible. | ||||
63 | |||||
64 | =head2 Reporting Bugs | ||||
65 | |||||
66 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
67 | of the bugs and their resolution. Bug reports can be submitted via the | ||||
68 | web: | ||||
69 | |||||
70 | https://github.com/bioperl/bioperl-live/issues | ||||
71 | |||||
72 | =head1 AUTHOR - Hilmar Lapp | ||||
73 | |||||
74 | Email hlapp at gmx.net | ||||
75 | |||||
76 | =head1 CONTRIBUTORS | ||||
77 | |||||
78 | Jason Stajich, jason-at-bioperl-dot-org | ||||
79 | Chris Fields, cjfields-at-uiuc-dot-edu | ||||
80 | |||||
81 | =head1 APPENDIX | ||||
82 | |||||
83 | The rest of the documentation details each of the object methods. | ||||
84 | Internal methods are usually preceded with a _ | ||||
85 | |||||
86 | =cut | ||||
87 | |||||
88 | |||||
89 | # Let the code begin... | ||||
90 | |||||
91 | package Bio::Factory::FTLocationFactory; | ||||
92 | 2 | 38µs | 2 | 101µs | # spent 60µs (20+41) within Bio::Factory::FTLocationFactory::BEGIN@92 which was called:
# once (20µs+41µs) by Bio::SeqIO::BEGIN@330 at line 92 # spent 60µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@92
# spent 41µs making 1 call to vars::import |
93 | 2 | 40µs | 2 | 54µs | # spent 37µs (19+18) within Bio::Factory::FTLocationFactory::BEGIN@93 which was called:
# once (19µs+18µs) by Bio::SeqIO::BEGIN@330 at line 93 # spent 37µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@93
# spent 18µs making 1 call to strict::import |
94 | |||||
95 | # Object preamble - inherits from Bio::Root::Root | ||||
96 | |||||
97 | 2 | 28µs | 1 | 7µs | # spent 7µs within Bio::Factory::FTLocationFactory::BEGIN@97 which was called:
# once (7µs+0s) by Bio::SeqIO::BEGIN@330 at line 97 # spent 7µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@97 |
98 | 2 | 22µs | 1 | 6µs | # spent 6µs within Bio::Factory::FTLocationFactory::BEGIN@98 which was called:
# once (6µs+0s) by Bio::SeqIO::BEGIN@330 at line 98 # spent 6µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@98 |
99 | 2 | 23µs | 1 | 5µs | # spent 5µs within Bio::Factory::FTLocationFactory::BEGIN@99 which was called:
# once (5µs+0s) by Bio::SeqIO::BEGIN@330 at line 99 # spent 5µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@99 |
100 | |||||
101 | |||||
102 | 2 | 84µs | 2 | 953µs | # spent 481µs (9+472) within Bio::Factory::FTLocationFactory::BEGIN@102 which was called:
# once (9µs+472µs) by Bio::SeqIO::BEGIN@330 at line 102 # spent 481µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@102
# spent 472µs making 1 call to base::import |
103 | |||||
104 | # spent 15µs (9+6) within Bio::Factory::FTLocationFactory::BEGIN@104 which was called:
# once (9µs+6µs) by Bio::SeqIO::BEGIN@330 at line 115 | ||||
105 | # the below is an optimized regex obj. from J. Freidl's Mastering Reg Exp. | ||||
106 | 1 | 15µs | 1 | 6µs | $LOCREG = qr{ # spent 6µs making 1 call to Bio::Factory::FTLocationFactory::CORE:qr |
107 | (?> | ||||
108 | [^()]+ | ||||
109 | | | ||||
110 | \( | ||||
111 | (??{$LOCREG}) | ||||
112 | \) | ||||
113 | )* | ||||
114 | }x; | ||||
115 | 1 | 886µs | 1 | 15µs | } # spent 15µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@104 |
116 | |||||
117 | =head2 new | ||||
118 | |||||
119 | Title : new | ||||
120 | Usage : my $obj = Bio::Factory::FTLocationFactory->new(); | ||||
121 | Function: Builds a new Bio::Factory::FTLocationFactory object | ||||
122 | Returns : an instance of Bio::Factory::FTLocationFactory | ||||
123 | Args : | ||||
124 | |||||
125 | =cut | ||||
126 | |||||
127 | =head2 from_string | ||||
128 | |||||
129 | Title : from_string | ||||
130 | Usage : $loc = $locfactory->from_string("100..200"); | ||||
131 | Function: Parses the given string and returns a Bio::LocationI implementing | ||||
132 | object representing the location encoded by the string. | ||||
133 | |||||
134 | This implementation parses the Genbank feature table | ||||
135 | encoding of locations. | ||||
136 | Example : | ||||
137 | Returns : A Bio::LocationI implementing object. | ||||
138 | Args : A string. | ||||
139 | |||||
140 | =cut | ||||
141 | |||||
142 | sub from_string { | ||||
143 | my ($self,$locstr,$op) = @_; | ||||
144 | my $loc; | ||||
145 | |||||
146 | #$self->debug("$locstr\n"); | ||||
147 | |||||
148 | # $op for operator (error handling) | ||||
149 | |||||
150 | # run on first pass only | ||||
151 | # Note : These location types are now deprecated in GenBank (Oct. 2006) | ||||
152 | if (!defined($op)) { | ||||
153 | # convert all (X.Y) to [X.Y] | ||||
154 | $locstr =~ s{\((\d+\.\d+)\)}{\[$1\]}g; | ||||
155 | # convert ABC123:(X..Y) to ABC123:[X..Y] | ||||
156 | # we should never see the above | ||||
157 | $locstr =~ s{:\((\d+\.{2}\d+)\)}{:\[$1\]}g; | ||||
158 | } | ||||
159 | |||||
160 | if ($locstr =~ m{(.*?)\(($LOCREG)\)(.*)}o) { # any matching parentheses? | ||||
161 | my ($beg, $mid, $end) = ($1, $2, $3); | ||||
162 | my (@sublocs) = (split(q(,),$beg), $mid, split(q(,),$end)); | ||||
163 | |||||
164 | my @loc_objs; | ||||
165 | my $loc_obj; | ||||
166 | my @gl_subloc_strands; | ||||
167 | |||||
168 | SUBLOCS: | ||||
169 | while (@sublocs) { | ||||
170 | my $subloc = shift @sublocs; | ||||
171 | next if !$subloc; | ||||
172 | my $oparg = ($subloc eq 'join' || $subloc eq 'bond' || | ||||
173 | $subloc eq 'order' || $subloc eq 'complement') ? $subloc : undef; | ||||
174 | # has operator, requires further work (recurse) | ||||
175 | if ($oparg) { | ||||
176 | my $sub = shift @sublocs; | ||||
177 | # simple split operators (no recursive calls needed) | ||||
178 | if (($oparg eq 'join' || $oparg eq 'order' || $oparg eq 'bond' ) | ||||
179 | && $sub !~ m{(?:join|order|bond)}) { | ||||
180 | my @splitlocs = split(q(,), $sub); | ||||
181 | $loc_obj = Bio::Location::Split->new(-verbose => 1, | ||||
182 | -splittype => $oparg); | ||||
183 | # Store strand values for later consistency check | ||||
184 | my @subloc_strands; | ||||
185 | my @s_objs; | ||||
186 | foreach my $splitloc (@splitlocs) { | ||||
187 | next unless $splitloc; | ||||
188 | my $sobj; | ||||
189 | if ($splitloc =~ m{\(($LOCREG)\)}) { | ||||
190 | my $comploc = $1; | ||||
191 | $sobj = $self->_parse_location($comploc); | ||||
192 | $sobj->strand(-1); | ||||
193 | push @subloc_strands, -1; | ||||
194 | push @gl_subloc_strands, -1; | ||||
195 | } else { | ||||
196 | $sobj = $self->_parse_location($splitloc); | ||||
197 | push @subloc_strands, 1; | ||||
198 | push @gl_subloc_strands, 1; | ||||
199 | } | ||||
200 | push @s_objs, $sobj; | ||||
201 | } | ||||
202 | |||||
203 | # Sublocations strand values consistency check to set | ||||
204 | # Guide Strand and sublocations adding order | ||||
205 | if (scalar @s_objs > 0) { | ||||
206 | my $identical = 0; | ||||
207 | my $gl_identical = 0; | ||||
208 | |||||
209 | my $first_value = $subloc_strands[0]; | ||||
210 | foreach my $strand (@subloc_strands) { | ||||
211 | $identical++ if ($strand == $first_value); | ||||
212 | } | ||||
213 | |||||
214 | my $first_gl_value = $gl_subloc_strands[0]; | ||||
215 | foreach my $gl_strand (@gl_subloc_strands) { | ||||
216 | $gl_identical++ if ($gl_strand == $first_gl_value); | ||||
217 | } | ||||
218 | |||||
219 | if ($identical == scalar @subloc_strands) { | ||||
220 | # Set guide_strand if all sublocations have the same strand | ||||
221 | $loc_obj->guide_strand($first_value); | ||||
222 | |||||
223 | # Reverse sublocation order for negative strand locations in cases like this: | ||||
224 | # join(1..11,join(complement(40..50),complement(60..70))) | ||||
225 | # But not this: | ||||
226 | # join(complement(10..20),complement(30..40)) | ||||
227 | if ( $gl_identical != scalar @gl_subloc_strands | ||||
228 | and $first_value == -1 | ||||
229 | ) { | ||||
230 | @s_objs = reverse @s_objs; | ||||
231 | } | ||||
232 | } | ||||
233 | else { | ||||
234 | # Mixed strand values | ||||
235 | $loc_obj->guide_strand(undef); | ||||
236 | } | ||||
237 | |||||
238 | # Add sublocations | ||||
239 | foreach my $s_obj (@s_objs) { | ||||
240 | $loc_obj->add_sub_Location($s_obj); | ||||
241 | } | ||||
242 | } | ||||
243 | } else { | ||||
244 | $loc_obj = $self->from_string($sub, $oparg); | ||||
245 | # reinsure the operator is set correctly for this level | ||||
246 | # unless it is complement | ||||
247 | $loc_obj->splittype($oparg) unless $oparg eq 'complement'; | ||||
248 | } | ||||
249 | } | ||||
250 | # no operator, simple or fuzzy | ||||
251 | else { | ||||
252 | $loc_obj = $self->from_string($subloc,1); | ||||
253 | } | ||||
254 | if ($op && $op eq 'complement') { | ||||
255 | $loc_obj->strand(-1); | ||||
256 | push @gl_subloc_strands, -1; | ||||
257 | } | ||||
258 | else { | ||||
259 | push @gl_subloc_strands, 1; | ||||
260 | } | ||||
261 | |||||
262 | push @loc_objs, $loc_obj; | ||||
263 | } | ||||
264 | my $ct = @loc_objs; | ||||
265 | if ($op && !($op eq 'join' || $op eq 'order' || $op eq 'bond') | ||||
266 | && $ct > 1 ) { | ||||
267 | $self->throw("Bad operator $op: had multiple locations ". | ||||
268 | scalar(@loc_objs).", should be SplitLocationI"); | ||||
269 | } | ||||
270 | if ($ct > 1) { | ||||
271 | $loc = Bio::Location::Split->new(); | ||||
272 | $loc->add_sub_Location(shift @loc_objs) while (@loc_objs); | ||||
273 | return $loc; | ||||
274 | } else { | ||||
275 | $loc = shift @loc_objs; | ||||
276 | return $loc; | ||||
277 | } | ||||
278 | } else { # simple location(s) | ||||
279 | $loc = $self->_parse_location($locstr); | ||||
280 | $loc->strand(-1) if ($op && $op eq 'complement'); | ||||
281 | } | ||||
282 | return $loc; | ||||
283 | } | ||||
284 | |||||
285 | =head2 _parse_location | ||||
286 | |||||
287 | Title : _parse_location | ||||
288 | Usage : $loc = $locfactory->_parse_location( $loc_string) | ||||
289 | |||||
290 | Function: Parses the given location string and returns a location object | ||||
291 | with start() and end() and strand() set appropriately. | ||||
292 | Note that this method is private. | ||||
293 | Returns : A Bio::LocationI implementing object or undef on failure | ||||
294 | Args : location string | ||||
295 | |||||
296 | =cut | ||||
297 | |||||
298 | sub _parse_location { | ||||
299 | my ($self, $locstr) = @_; | ||||
300 | my ($loc, $seqid); | ||||
301 | #$self->debug( "Location parse, processing $locstr\n"); | ||||
302 | # 'remote' location? | ||||
303 | if($locstr =~ m{^(\S+):(.*)$}o) { | ||||
304 | # yes; memorize remote ID and strip from location string | ||||
305 | $seqid = $1; | ||||
306 | $locstr = $2; | ||||
307 | } | ||||
308 | |||||
309 | # split into start and end | ||||
310 | my ($start, $end) = split(/\.\./, $locstr); | ||||
311 | # remove enclosing parentheses if any; note that because of parentheses | ||||
312 | # possibly surrounding the entire location the parentheses around start | ||||
313 | # and/or may be asymmetrical | ||||
314 | # Note: these are from X.Y fuzzy locations, which are deprecated! | ||||
315 | $start =~ s/(?:^\[+|\]+$)//g if $start; | ||||
316 | $end =~ s/(?:^\[+|\]+$)//g if $end; | ||||
317 | |||||
318 | # Is this a simple (exact) or a fuzzy location? Simples have exact start | ||||
319 | # and end, or is between two adjacent bases. Everything else is fuzzy. | ||||
320 | my $loctype = ".."; # exact with start and end as default | ||||
321 | |||||
322 | $loctype = '?' if ( ($locstr =~ /\?/) && ($locstr !~ /\?\d+/) ); | ||||
323 | |||||
324 | my $locclass = "Bio::Location::Simple"; | ||||
325 | if(! defined($end)) { | ||||
326 | if($locstr =~ /(\d+)([\.\^])(\d+)/) { | ||||
327 | $start = $1; | ||||
328 | $end = $3; | ||||
329 | $loctype = $2; | ||||
330 | $locclass = "Bio::Location::Fuzzy" | ||||
331 | unless (abs($end-$start) <= 1) && ($loctype eq "^"); | ||||
332 | } else { | ||||
333 | $end = $start; | ||||
334 | } | ||||
335 | } | ||||
336 | # start_num and end_num are for the numeric only versions of | ||||
337 | # start and end so they can be compared | ||||
338 | # in a few lines | ||||
339 | my ($start_num, $end_num) = ($start,$end); | ||||
340 | if ( ($start =~ /[\>\<\?\.\^]/) || ($end =~ /[\>\<\?\.\^]/) ) { | ||||
341 | $locclass = 'Bio::Location::Fuzzy'; | ||||
342 | if($start =~ /(\d+)/) { | ||||
343 | ($start_num) = $1; | ||||
344 | } else { | ||||
345 | $start_num = 0 | ||||
346 | } | ||||
347 | if ($end =~ /(\d+)/) { | ||||
348 | ($end_num) = $1; | ||||
349 | } else { $end_num = 0 } | ||||
350 | } | ||||
351 | my $strand = 1; | ||||
352 | |||||
353 | if( $start_num > $end_num && $loctype ne '?') { | ||||
354 | ($start,$end,$strand) = ($end,$start,-1); | ||||
355 | } | ||||
356 | # instantiate location and initialize | ||||
357 | $loc = $locclass->new(-verbose => $self->verbose, | ||||
358 | -start => $start, | ||||
359 | -end => $end, | ||||
360 | -strand => $strand, | ||||
361 | -location_type => $loctype); | ||||
362 | # set remote ID if remote location | ||||
363 | if($seqid) { | ||||
364 | $loc->is_remote(1); | ||||
365 | $loc->seq_id($seqid); | ||||
366 | } | ||||
367 | |||||
368 | # done (hopefully) | ||||
369 | return $loc; | ||||
370 | } | ||||
371 | |||||
372 | 1 | 2µs | 1; | ||
# spent 6µs within Bio::Factory::FTLocationFactory::CORE:qr which was called:
# once (6µs+0s) by Bio::Factory::FTLocationFactory::BEGIN@104 at line 106 |