Filename | /Users/ap13/perl5/lib/perl5/Bio/SeqIO.pm |
Statements | Executed 17 statements in 1.94ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 2.61ms | 2.72ms | BEGIN@332 | Bio::SeqIO::
1 | 1 | 1 | 1.20ms | 1.81ms | BEGIN@330 | Bio::SeqIO::
1 | 1 | 1 | 1.06ms | 1.59ms | BEGIN@331 | Bio::SeqIO::
1 | 1 | 1 | 15µs | 31µs | BEGIN@327 | Bio::SeqIO::
1 | 1 | 1 | 11µs | 491µs | BEGIN@335 | Bio::SeqIO::
1 | 1 | 1 | 11µs | 50µs | BEGIN@333 | Bio::SeqIO::
1 | 1 | 1 | 8µs | 14µs | BEGIN@328 | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | DESTROY | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | |
0 | 0 | 0 | 0s | 0s | READLINE | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | TIEHANDLE | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | _concatenate_lines | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | _filehandle | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | _guess_format | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | _initialize | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | _load_format_module | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | alphabet | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | fh | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | location_factory | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | new | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | newFh | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | next_seq | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | object_factory | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | sequence_builder | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | sequence_factory | Bio::SeqIO::
0 | 0 | 0 | 0s | 0s | write_seq | Bio::SeqIO::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # BioPerl module for Bio::SeqIO | ||||
2 | # | ||||
3 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
4 | # | ||||
5 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
6 | # and Lincoln Stein <lstein@cshl.org> | ||||
7 | # | ||||
8 | # Copyright Ewan Birney | ||||
9 | # | ||||
10 | # You may distribute this module under the same terms as perl itself | ||||
11 | # | ||||
12 | # _history | ||||
13 | # October 18, 1999 Largely rewritten by Lincoln Stein | ||||
14 | |||||
15 | # POD documentation - main docs before the code | ||||
16 | |||||
17 | =head1 NAME | ||||
18 | |||||
19 | Bio::SeqIO - Handler for SeqIO Formats | ||||
20 | |||||
21 | =head1 SYNOPSIS | ||||
22 | |||||
23 | use Bio::SeqIO; | ||||
24 | |||||
25 | $in = Bio::SeqIO->new(-file => "inputfilename" , | ||||
26 | -format => 'Fasta'); | ||||
27 | $out = Bio::SeqIO->new(-file => ">outputfilename" , | ||||
28 | -format => 'EMBL'); | ||||
29 | |||||
30 | while ( my $seq = $in->next_seq() ) { | ||||
31 | $out->write_seq($seq); | ||||
32 | } | ||||
33 | |||||
34 | # Now, to actually get at the sequence object, use the standard Bio::Seq | ||||
35 | # methods (look at Bio::Seq if you don't know what they are) | ||||
36 | |||||
37 | use Bio::SeqIO; | ||||
38 | |||||
39 | $in = Bio::SeqIO->new(-file => "inputfilename" , | ||||
40 | -format => 'genbank'); | ||||
41 | |||||
42 | while ( my $seq = $in->next_seq() ) { | ||||
43 | print "Sequence ",$seq->id, " first 10 bases ", | ||||
44 | $seq->subseq(1,10), "\n"; | ||||
45 | } | ||||
46 | |||||
47 | |||||
48 | # The SeqIO system does have a filehandle binding. Most people find this | ||||
49 | # a little confusing, but it does mean you can write the world's | ||||
50 | # smallest reformatter | ||||
51 | |||||
52 | use Bio::SeqIO; | ||||
53 | |||||
54 | $in = Bio::SeqIO->newFh(-file => "inputfilename" , | ||||
55 | -format => 'Fasta'); | ||||
56 | $out = Bio::SeqIO->newFh(-format => 'EMBL'); | ||||
57 | |||||
58 | # World's shortest Fasta<->EMBL format converter: | ||||
59 | print $out $_ while <$in>; | ||||
60 | |||||
61 | |||||
62 | =head1 DESCRIPTION | ||||
63 | |||||
64 | Bio::SeqIO is a handler module for the formats in the SeqIO set (eg, | ||||
65 | Bio::SeqIO::fasta). It is the officially sanctioned way of getting at | ||||
66 | the format objects, which most people should use. | ||||
67 | |||||
68 | The Bio::SeqIO system can be thought of like biological file handles. | ||||
69 | They are attached to filehandles with smart formatting rules (eg, | ||||
70 | genbank format, or EMBL format, or binary trace file format) and | ||||
71 | can either read or write sequence objects (Bio::Seq objects, or | ||||
72 | more correctly, Bio::SeqI implementing objects, of which Bio::Seq is | ||||
73 | one such object). If you want to know what to do with a Bio::Seq | ||||
74 | object, read L<Bio::Seq>. | ||||
75 | |||||
76 | The idea is that you request a stream object for a particular format. | ||||
77 | All the stream objects have a notion of an internal file that is read | ||||
78 | from or written to. A particular SeqIO object instance is configured | ||||
79 | for either input or output. A specific example of a stream object is | ||||
80 | the Bio::SeqIO::fasta object. | ||||
81 | |||||
82 | Each stream object has functions | ||||
83 | |||||
84 | $stream->next_seq(); | ||||
85 | |||||
86 | and | ||||
87 | |||||
88 | $stream->write_seq($seq); | ||||
89 | |||||
90 | As an added bonus, you can recover a filehandle that is tied to the | ||||
91 | SeqIO object, allowing you to use the standard E<lt>E<gt> and print | ||||
92 | operations to read and write sequence objects: | ||||
93 | |||||
94 | use Bio::SeqIO; | ||||
95 | |||||
96 | $stream = Bio::SeqIO->newFh(-format => 'Fasta', | ||||
97 | -fh => \*ARGV); | ||||
98 | # read from standard input or the input filenames | ||||
99 | |||||
100 | while ( $seq = <$stream> ) { | ||||
101 | # do something with $seq | ||||
102 | } | ||||
103 | |||||
104 | and | ||||
105 | |||||
106 | print $stream $seq; # when stream is in output mode | ||||
107 | |||||
108 | This makes the simplest ever reformatter | ||||
109 | |||||
110 | #!/usr/bin/perl | ||||
111 | use strict; | ||||
112 | my $format1 = shift; | ||||
113 | my $format2 = shift || die | ||||
114 | "Usage: reformat format1 format2 < input > output"; | ||||
115 | |||||
116 | use Bio::SeqIO; | ||||
117 | |||||
118 | my $in = Bio::SeqIO->newFh(-format => $format1, -fh => \*ARGV ); | ||||
119 | my $out = Bio::SeqIO->newFh(-format => $format2 ); | ||||
120 | # Note: you might want to quote -format to keep older | ||||
121 | # perl's from complaining. | ||||
122 | |||||
123 | print $out $_ while <$in>; | ||||
124 | |||||
125 | |||||
126 | =head1 CONSTRUCTORS | ||||
127 | |||||
128 | =head2 Bio::SeqIO-E<gt>new() | ||||
129 | |||||
130 | $seqIO = Bio::SeqIO->new(-file => 'seqs.fasta', -format => $format); | ||||
131 | $seqIO = Bio::SeqIO->new(-fh => \*FILEHANDLE, -format => $format); | ||||
132 | $seqIO = Bio::SeqIO->new(-string => $string , -format => $format); | ||||
133 | $seqIO = Bio::SeqIO->new(-format => $format); | ||||
134 | |||||
135 | The new() class method constructs a new Bio::SeqIO object. The returned object | ||||
136 | can be used to retrieve or print Seq objects. new() accepts the following | ||||
137 | parameters: | ||||
138 | |||||
139 | =over 5 | ||||
140 | |||||
141 | =item -file | ||||
142 | |||||
143 | A file path to be opened for reading or writing. The usual Perl | ||||
144 | conventions apply: | ||||
145 | |||||
146 | 'file' # open file for reading | ||||
147 | '>file' # open file for writing | ||||
148 | '>>file' # open file for appending | ||||
149 | '+<file' # open file read/write | ||||
150 | 'command |' # open a pipe from the command | ||||
151 | '| command' # open a pipe to the command | ||||
152 | |||||
153 | =item -fh | ||||
154 | |||||
155 | You may use new() with a opened filehandle, provided as a glob reference. For | ||||
156 | example, to read from STDIN: | ||||
157 | |||||
158 | my $seqIO = Bio::SeqIO->new(-fh => \*STDIN); | ||||
159 | |||||
160 | A string filehandle is handy if you want to modify the output in the | ||||
161 | memory, before printing it out. The following program reads in EMBL | ||||
162 | formatted entries from a file and prints them out in fasta format with | ||||
163 | some HTML tags: | ||||
164 | |||||
165 | use Bio::SeqIO; | ||||
166 | use IO::String; | ||||
167 | my $in = Bio::SeqIO->new(-file => "emblfile", | ||||
168 | -format => 'EMBL'); | ||||
169 | while ( my $seq = $in->next_seq() ) { | ||||
170 | # the output handle is reset for every file | ||||
171 | my $stringio = IO::String->new($string); | ||||
172 | my $out = Bio::SeqIO->new(-fh => $stringio, | ||||
173 | -format => 'fasta'); | ||||
174 | # output goes into $string | ||||
175 | $out->write_seq($seq); | ||||
176 | # modify $string | ||||
177 | $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g; | ||||
178 | # print into STDOUT | ||||
179 | print $string; | ||||
180 | } | ||||
181 | |||||
182 | =item -string | ||||
183 | |||||
184 | A string to read the sequences from. For example: | ||||
185 | |||||
186 | my $string = ">seq1\nACGCTAGCTAGC\n"; | ||||
187 | my $seqIO = Bio::SeqIO->new(-string => $string); | ||||
188 | |||||
189 | =item -format | ||||
190 | |||||
191 | Specify the format of the file. Supported formats include fasta, | ||||
192 | genbank, embl, swiss (SwissProt), Entrez Gene and tracefile formats | ||||
193 | such as abi (ABI) and scf. There are many more, for a complete listing | ||||
194 | see the SeqIO HOWTO (L<http://bioperl.open-bio.org/wiki/HOWTO:SeqIO>). | ||||
195 | |||||
196 | If no format is specified and a filename is given then the module will | ||||
197 | attempt to deduce the format from the filename suffix. If there is no | ||||
198 | suffix that Bioperl understands then it will attempt to guess the | ||||
199 | format based on file content. If this is unsuccessful then SeqIO will | ||||
200 | throw a fatal error. | ||||
201 | |||||
202 | The format name is case-insensitive: 'FASTA', 'Fasta' and 'fasta' are | ||||
203 | all valid. | ||||
204 | |||||
205 | Currently, the tracefile formats (except for SCF) require installation | ||||
206 | of the external Staden "io_lib" package, as well as the | ||||
207 | Bio::SeqIO::staden::read package available from the bioperl-ext | ||||
208 | repository. | ||||
209 | |||||
210 | =item -alphabet | ||||
211 | |||||
212 | Sets the alphabet ('dna', 'rna', or 'protein'). When the alphabet is | ||||
213 | set then Bioperl will not attempt to guess what the alphabet is. This | ||||
214 | may be important because Bioperl does not always guess correctly. | ||||
215 | |||||
216 | =item -flush | ||||
217 | |||||
218 | By default, all files (or filehandles) opened for writing sequences | ||||
219 | will be flushed after each write_seq() (making the file immediately | ||||
220 | usable). If you do not need this facility and would like to marginally | ||||
221 | improve the efficiency of writing multiple sequences to the same file | ||||
222 | (or filehandle), pass the -flush option '0' or any other value that | ||||
223 | evaluates as defined but false: | ||||
224 | |||||
225 | my $gb = Bio::SeqIO->new(-file => "<gball.gbk", | ||||
226 | -format => "gb"); | ||||
227 | my $fa = Bio::SeqIO->new(-file => ">gball.fa", | ||||
228 | -format => "fasta", | ||||
229 | -flush => 0); # go as fast as we can! | ||||
230 | while($seq = $gb->next_seq) { $fa->write_seq($seq) } | ||||
231 | |||||
232 | =item -seqfactory | ||||
233 | |||||
234 | Provide a Bio::Factory::SequenceFactoryI object. See the sequence_factory() method. | ||||
235 | |||||
236 | =item -locfactory | ||||
237 | |||||
238 | Provide a Bio::Factory::LocationFactoryI object. See the location_factory() method. | ||||
239 | |||||
240 | =item -objbuilder | ||||
241 | |||||
242 | Provide a Bio::Factory::ObjectBuilderI object. See the object_builder() method. | ||||
243 | |||||
244 | =back | ||||
245 | |||||
246 | =head2 Bio::SeqIO-E<gt>newFh() | ||||
247 | |||||
248 | $fh = Bio::SeqIO->newFh(-fh => \*FILEHANDLE, -format=>$format); | ||||
249 | $fh = Bio::SeqIO->newFh(-format => $format); | ||||
250 | # etc. | ||||
251 | |||||
252 | This constructor behaves like new(), but returns a tied filehandle | ||||
253 | rather than a Bio::SeqIO object. You can read sequences from this | ||||
254 | object using the familiar E<lt>E<gt> operator, and write to it using | ||||
255 | print(). The usual array and $_ semantics work. For example, you can | ||||
256 | read all sequence objects into an array like this: | ||||
257 | |||||
258 | @sequences = <$fh>; | ||||
259 | |||||
260 | Other operations, such as read(), sysread(), write(), close(), and | ||||
261 | printf() are not supported. | ||||
262 | |||||
263 | =head1 OBJECT METHODS | ||||
264 | |||||
265 | See below for more detailed summaries. The main methods are: | ||||
266 | |||||
267 | =head2 $sequence = $seqIO-E<gt>next_seq() | ||||
268 | |||||
269 | Fetch the next sequence from the stream, or nothing if no more. | ||||
270 | |||||
271 | =head2 $seqIO-E<gt>write_seq($sequence [,$another_sequence,...]) | ||||
272 | |||||
273 | Write the specified sequence(s) to the stream. | ||||
274 | |||||
275 | =head2 TIEHANDLE(), READLINE(), PRINT() | ||||
276 | |||||
277 | These provide the tie interface. See L<perltie> for more details. | ||||
278 | |||||
279 | =head1 FEEDBACK | ||||
280 | |||||
281 | =head2 Mailing Lists | ||||
282 | |||||
283 | User feedback is an integral part of the evolution of this and other | ||||
284 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
285 | of the Bioperl mailing lists. | ||||
286 | |||||
287 | Your participation is much appreciated. | ||||
288 | |||||
289 | bioperl-l@bioperl.org - General discussion | ||||
290 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
291 | |||||
292 | =head2 Support | ||||
293 | |||||
294 | Please direct usage questions or support issues to the mailing list: | ||||
295 | |||||
296 | bioperl-l@bioperl.org | ||||
297 | |||||
298 | rather than to the module maintainer directly. Many experienced and | ||||
299 | responsive experts will be able look at the problem and quickly | ||||
300 | address it. Please include a thorough description of the problem | ||||
301 | with code and data examples if at all possible. | ||||
302 | |||||
303 | =head2 Reporting Bugs | ||||
304 | |||||
305 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
306 | the bugs and their resolution. Bug reports can be submitted via the | ||||
307 | web: | ||||
308 | |||||
309 | https://github.com/bioperl/bioperl-live/issues | ||||
310 | |||||
311 | =head1 AUTHOR - Ewan Birney, Lincoln Stein | ||||
312 | |||||
313 | Email birney@ebi.ac.uk | ||||
314 | lstein@cshl.org | ||||
315 | |||||
316 | =head1 APPENDIX | ||||
317 | |||||
318 | The rest of the documentation details each of the object | ||||
319 | methods. Internal methods are usually preceded with a _ | ||||
320 | |||||
321 | =cut | ||||
322 | |||||
323 | #' Let the code begin... | ||||
324 | |||||
325 | package Bio::SeqIO; | ||||
326 | |||||
327 | 2 | 25µs | 2 | 48µs | # spent 31µs (15+16) within Bio::SeqIO::BEGIN@327 which was called:
# once (15µs+16µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 327 # spent 31µs making 1 call to Bio::SeqIO::BEGIN@327
# spent 16µs making 1 call to strict::import |
328 | 2 | 22µs | 2 | 20µs | # spent 14µs (8+6) within Bio::SeqIO::BEGIN@328 which was called:
# once (8µs+6µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 328 # spent 14µs making 1 call to Bio::SeqIO::BEGIN@328
# spent 6µs making 1 call to warnings::import |
329 | |||||
330 | 2 | 153µs | 1 | 1.81ms | # spent 1.81ms (1.20+612µs) within Bio::SeqIO::BEGIN@330 which was called:
# once (1.20ms+612µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 330 # spent 1.81ms making 1 call to Bio::SeqIO::BEGIN@330 |
331 | 2 | 149µs | 1 | 1.59ms | # spent 1.59ms (1.06+534µs) within Bio::SeqIO::BEGIN@331 which was called:
# once (1.06ms+534µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 331 # spent 1.59ms making 1 call to Bio::SeqIO::BEGIN@331 |
332 | 2 | 180µs | 1 | 2.72ms | # spent 2.72ms (2.61+109µs) within Bio::SeqIO::BEGIN@332 which was called:
# once (2.61ms+109µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 332 # spent 2.72ms making 1 call to Bio::SeqIO::BEGIN@332 |
333 | 2 | 29µs | 2 | 90µs | # spent 50µs (11+40) within Bio::SeqIO::BEGIN@333 which was called:
# once (11µs+40µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 333 # spent 50µs making 1 call to Bio::SeqIO::BEGIN@333
# spent 40µs making 1 call to Exporter::import |
334 | |||||
335 | 2 | 1.38ms | 2 | 971µs | # spent 491µs (11+480) within Bio::SeqIO::BEGIN@335 which was called:
# once (11µs+480µs) by Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 at line 335 # spent 491µs making 1 call to Bio::SeqIO::BEGIN@335
# spent 480µs making 1 call to parent::import |
336 | |||||
337 | 1 | 200ns | my %valid_alphabet_cache; | ||
338 | |||||
339 | |||||
340 | =head2 new | ||||
341 | |||||
342 | Title : new | ||||
343 | Usage : $stream = Bio::SeqIO->new(-file => 'sequences.fasta', | ||||
344 | -format => 'fasta'); | ||||
345 | Function: Returns a new sequence stream | ||||
346 | Returns : A Bio::SeqIO stream initialised with the appropriate format | ||||
347 | Args : Named parameters indicating where to read the sequences from or to | ||||
348 | write them to: | ||||
349 | -file => filename, OR | ||||
350 | -fh => filehandle to attach to, OR | ||||
351 | -string => string | ||||
352 | |||||
353 | Additional arguments, all with reasonable defaults: | ||||
354 | -format => format of the sequences, usually auto-detected | ||||
355 | -alphabet => 'dna', 'rna', or 'protein' | ||||
356 | -flush => 0 or 1 (default: flush filehandles after each write) | ||||
357 | -seqfactory => sequence factory | ||||
358 | -locfactory => location factory | ||||
359 | -objbuilder => object builder | ||||
360 | |||||
361 | See L<Bio::SeqIO::Handler> | ||||
362 | |||||
363 | =cut | ||||
364 | |||||
365 | 1 | 400ns | my $entry = 0; | ||
366 | |||||
367 | sub new { | ||||
368 | my ($caller, @args) = @_; | ||||
369 | my $class = ref($caller) || $caller; | ||||
370 | |||||
371 | # or do we want to call SUPER on an object if $caller is an | ||||
372 | # object? | ||||
373 | if( $class =~ /Bio::SeqIO::(\S+)/ ) { | ||||
374 | my ($self) = $class->SUPER::new(@args); | ||||
375 | $self->_initialize(@args); | ||||
376 | return $self; | ||||
377 | } else { | ||||
378 | my %params = @args; | ||||
379 | @params{ map { lc $_ } keys %params } = values %params; # lowercase keys | ||||
380 | |||||
381 | unless( defined $params{-file} || | ||||
382 | defined $params{-fh} || | ||||
383 | defined $params{-string} ) { | ||||
384 | $class->throw("file argument provided, but with an undefined value") | ||||
385 | if exists $params{'-file'}; | ||||
386 | $class->throw("fh argument provided, but with an undefined value") | ||||
387 | if exists $params{'-fh'}; | ||||
388 | $class->throw("string argument provided, but with an undefined value") | ||||
389 | if exists($params{'-string'}); | ||||
390 | # $class->throw("No file, fh, or string argument provided"); # neither defined | ||||
391 | } | ||||
392 | |||||
393 | # Determine or guess sequence format and variant | ||||
394 | my $format = $params{'-format'}; | ||||
395 | if (! $format ) { | ||||
396 | if ($params{-file}) { | ||||
397 | # Guess from filename extension, and then from file content | ||||
398 | $format = $class->_guess_format( $params{-file} ) || | ||||
399 | Bio::Tools::GuessSeqFormat->new(-file => $params{-file} )->guess; | ||||
400 | } elsif ($params{-fh}) { | ||||
401 | # Guess from filehandle content | ||||
402 | $format = Bio::Tools::GuessSeqFormat->new(-fh => $params{-fh} )->guess; | ||||
403 | } elsif ($params{-string}) { | ||||
404 | # Guess from string content | ||||
405 | $format = Bio::Tools::GuessSeqFormat->new(-text => $params{-string})->guess; | ||||
406 | } | ||||
407 | } | ||||
408 | |||||
409 | # changed 1-3-11; no need to print out an empty string (only way this | ||||
410 | # exception is triggered) - cjfields | ||||
411 | $class->throw("Could not guess format from file, filehandle or string") | ||||
412 | if not $format; | ||||
413 | $format = "\L$format"; # normalize capitalization to lower case | ||||
414 | |||||
415 | if ($format =~ /-/) { | ||||
416 | ($format, my $variant) = split('-', $format, 2); | ||||
417 | $params{-variant} = $variant; | ||||
418 | } | ||||
419 | |||||
420 | return unless( $class->_load_format_module($format) ); | ||||
421 | return "Bio::SeqIO::$format"->new(%params); | ||||
422 | } | ||||
423 | } | ||||
424 | |||||
425 | |||||
426 | =head2 newFh | ||||
427 | |||||
428 | Title : newFh | ||||
429 | Usage : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format') | ||||
430 | Function: Does a new() followed by an fh() | ||||
431 | Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format') | ||||
432 | $sequence = <$fh>; # read a sequence object | ||||
433 | print $fh $sequence; # write a sequence object | ||||
434 | Returns : filehandle tied to the Bio::SeqIO::Fh class | ||||
435 | Args : | ||||
436 | |||||
437 | See L<Bio::SeqIO::Fh> | ||||
438 | |||||
439 | =cut | ||||
440 | |||||
441 | sub newFh { | ||||
442 | my $class = shift; | ||||
443 | return unless my $self = $class->new(@_); | ||||
444 | return $self->fh; | ||||
445 | } | ||||
446 | |||||
447 | |||||
448 | =head2 fh | ||||
449 | |||||
450 | Title : fh | ||||
451 | Usage : $obj->fh | ||||
452 | Function: Get or set the IO filehandle | ||||
453 | Example : $fh = $obj->fh; # make a tied filehandle | ||||
454 | $sequence = <$fh>; # read a sequence object | ||||
455 | print $fh $sequence; # write a sequence object | ||||
456 | Returns : filehandle tied to Bio::SeqIO class | ||||
457 | Args : none | ||||
458 | |||||
459 | =cut | ||||
460 | |||||
461 | sub fh { | ||||
462 | my $self = shift; | ||||
463 | my $class = ref($self) || $self; | ||||
464 | my $s = Symbol::gensym; | ||||
465 | tie $$s,$class,$self; | ||||
466 | return $s; | ||||
467 | } | ||||
468 | |||||
469 | |||||
470 | # _initialize is chained for all SeqIO classes | ||||
471 | |||||
472 | sub _initialize { | ||||
473 | my($self, @args) = @_; | ||||
474 | |||||
475 | # flush is initialized by the Root::IO init | ||||
476 | |||||
477 | my ($seqfact,$locfact,$objbuilder, $alphabet) = | ||||
478 | $self->_rearrange([qw(SEQFACTORY | ||||
479 | LOCFACTORY | ||||
480 | OBJBUILDER | ||||
481 | ALPHABET) | ||||
482 | ], @args); | ||||
483 | |||||
484 | $locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose) | ||||
485 | if ! $locfact; | ||||
486 | $objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose) | ||||
487 | unless $objbuilder; | ||||
488 | $self->sequence_builder($objbuilder); | ||||
489 | $self->location_factory($locfact); | ||||
490 | |||||
491 | # note that this should come last because it propagates the sequence | ||||
492 | # factory to the sequence builder | ||||
493 | $seqfact && $self->sequence_factory($seqfact); | ||||
494 | |||||
495 | #bug 2160 | ||||
496 | $alphabet && $self->alphabet($alphabet); | ||||
497 | |||||
498 | # initialize the IO part | ||||
499 | $self->_initialize_io(@args); | ||||
500 | } | ||||
501 | |||||
502 | |||||
503 | =head2 next_seq | ||||
504 | |||||
505 | Title : next_seq | ||||
506 | Usage : $seq = stream->next_seq | ||||
507 | Function: Reads the next sequence object from the stream and returns it. | ||||
508 | |||||
509 | Certain driver modules may encounter entries in the stream | ||||
510 | that are either misformatted or that use syntax not yet | ||||
511 | understood by the driver. If such an incident is | ||||
512 | recoverable, e.g., by dismissing a feature of a feature | ||||
513 | table or some other non-mandatory part of an entry, the | ||||
514 | driver will issue a warning. In the case of a | ||||
515 | non-recoverable situation an exception will be thrown. Do | ||||
516 | not assume that you can resume parsing the same stream | ||||
517 | after catching the exception. Note that you can always turn | ||||
518 | recoverable errors into exceptions by calling | ||||
519 | $stream->verbose(2). | ||||
520 | |||||
521 | Returns : a Bio::Seq sequence object, or nothing if no more sequences | ||||
522 | are available | ||||
523 | |||||
524 | Args : none | ||||
525 | |||||
526 | See L<Bio::Root::RootI>, L<Bio::Factory::SeqStreamI>, L<Bio::Seq> | ||||
527 | |||||
528 | =cut | ||||
529 | |||||
530 | sub next_seq { | ||||
531 | my ($self, $seq) = @_; | ||||
532 | $self->throw("Sorry, you cannot read from a generic Bio::SeqIO object."); | ||||
533 | } | ||||
534 | |||||
535 | |||||
536 | =head2 write_seq | ||||
537 | |||||
538 | Title : write_seq | ||||
539 | Usage : $stream->write_seq($seq) | ||||
540 | Function: writes the $seq object into the stream | ||||
541 | Returns : 1 for success and 0 for error | ||||
542 | Args : Bio::Seq object | ||||
543 | |||||
544 | =cut | ||||
545 | |||||
546 | sub write_seq { | ||||
547 | my ($self, $seq) = @_; | ||||
548 | $self->throw("Sorry, you cannot write to a generic Bio::SeqIO object."); | ||||
549 | } | ||||
550 | |||||
551 | |||||
552 | =head2 format | ||||
553 | |||||
554 | Title : format | ||||
555 | Usage : $format = $stream->format() | ||||
556 | Function: Get the sequence format | ||||
557 | Returns : sequence format, e.g. fasta, fastq | ||||
558 | Args : none | ||||
559 | |||||
560 | =cut | ||||
561 | |||||
562 | # format() method inherited from Bio::Root::IO | ||||
563 | |||||
564 | |||||
565 | =head2 alphabet | ||||
566 | |||||
567 | Title : alphabet | ||||
568 | Usage : $self->alphabet($newval) | ||||
569 | Function: Set/get the molecule type for the Seq objects to be created. | ||||
570 | Example : $seqio->alphabet('protein') | ||||
571 | Returns : value of alphabet: 'dna', 'rna', or 'protein' | ||||
572 | Args : newvalue (optional) | ||||
573 | Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein' | ||||
574 | |||||
575 | =cut | ||||
576 | |||||
577 | sub alphabet { | ||||
578 | my ($self, $value) = @_; | ||||
579 | |||||
580 | if ( defined $value) { | ||||
581 | $value = lc $value; | ||||
582 | unless ($valid_alphabet_cache{$value}) { | ||||
583 | # instead of hard-coding the allowed values once more, we check by | ||||
584 | # creating a dummy sequence object | ||||
585 | eval { | ||||
586 | require Bio::PrimarySeq; | ||||
587 | my $seq = Bio::PrimarySeq->new( -verbose => $self->verbose, | ||||
588 | -alphabet => $value ); | ||||
589 | }; | ||||
590 | if ($@) { | ||||
591 | $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values."); | ||||
592 | } | ||||
593 | $valid_alphabet_cache{$value} = 1; | ||||
594 | } | ||||
595 | $self->{'alphabet'} = $value; | ||||
596 | } | ||||
597 | return $self->{'alphabet'}; | ||||
598 | } | ||||
599 | |||||
600 | |||||
601 | =head2 _load_format_module | ||||
602 | |||||
603 | Title : _load_format_module | ||||
604 | Usage : *INTERNAL SeqIO stuff* | ||||
605 | Function: Loads up (like use) a module at run time on demand | ||||
606 | Example : | ||||
607 | Returns : | ||||
608 | Args : | ||||
609 | |||||
610 | =cut | ||||
611 | |||||
612 | sub _load_format_module { | ||||
613 | my ($self, $format) = @_; | ||||
614 | my $module = "Bio::SeqIO::" . $format; | ||||
615 | my $ok; | ||||
616 | |||||
617 | eval { | ||||
618 | $ok = $self->_load_module($module); | ||||
619 | }; | ||||
620 | if ( $@ ) { | ||||
621 | print STDERR <<END; | ||||
622 | $self: $format cannot be found | ||||
623 | Exception $@ | ||||
624 | For more information about the SeqIO system please see the SeqIO docs. | ||||
625 | This includes ways of checking for formats at compile time, not run time | ||||
626 | END | ||||
627 | ; | ||||
628 | } | ||||
629 | return $ok; | ||||
630 | } | ||||
631 | |||||
632 | |||||
633 | =head2 _concatenate_lines | ||||
634 | |||||
635 | Title : _concatenate_lines | ||||
636 | Usage : $s = _concatenate_lines($line, $continuation_line) | ||||
637 | Function: Private. Concatenates two strings assuming that the second stems | ||||
638 | from a continuation line of the first. Adds a space between both | ||||
639 | unless the first ends with a dash. | ||||
640 | |||||
641 | Takes care of either arg being empty. | ||||
642 | Example : | ||||
643 | Returns : A string. | ||||
644 | Args : | ||||
645 | |||||
646 | =cut | ||||
647 | |||||
648 | sub _concatenate_lines { | ||||
649 | my ($self, $s1, $s2) = @_; | ||||
650 | $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2); | ||||
651 | return ($s1 ? $s1 : "") . ($s2 ? $s2 : ""); | ||||
652 | } | ||||
653 | |||||
654 | |||||
655 | =head2 _filehandle | ||||
656 | |||||
657 | Title : _filehandle | ||||
658 | Usage : $obj->_filehandle($newval) | ||||
659 | Function: This method is deprecated. Call _fh() instead. | ||||
660 | Example : | ||||
661 | Returns : value of _filehandle | ||||
662 | Args : newvalue (optional) | ||||
663 | |||||
664 | =cut | ||||
665 | |||||
666 | sub _filehandle { | ||||
667 | my ($self,@args) = @_; | ||||
668 | return $self->_fh(@args); | ||||
669 | } | ||||
670 | |||||
671 | |||||
672 | =head2 _guess_format | ||||
673 | |||||
674 | Title : _guess_format | ||||
675 | Usage : $obj->_guess_format($filename) | ||||
676 | Function: guess format based on file suffix | ||||
677 | Example : | ||||
678 | Returns : guessed format of filename (lower case) | ||||
679 | Args : | ||||
680 | Notes : formats that _filehandle() will guess include fasta, | ||||
681 | genbank, scf, pir, embl, raw, gcg, ace, bsml, swissprot, | ||||
682 | fastq and phd/phred | ||||
683 | |||||
684 | =cut | ||||
685 | |||||
686 | sub _guess_format { | ||||
687 | my $class = shift; | ||||
688 | return unless $_ = shift; | ||||
689 | |||||
690 | return 'abi' if /\.ab[i1]$/i; | ||||
691 | return 'ace' if /\.ace$/i; | ||||
692 | return 'alf' if /\.alf$/i; | ||||
693 | return 'bsml' if /\.(bsm|bsml)$/i; | ||||
694 | return 'ctf' if /\.ctf$/i; | ||||
695 | return 'embl' if /\.(embl|ebl|emb|dat)$/i; | ||||
696 | return 'entrezgene' if /\.asn$/i; | ||||
697 | return 'exp' if /\.exp$/i; | ||||
698 | return 'fasta' if /\.(fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa)$/i; | ||||
699 | return 'fastq' if /\.fastq$/i; | ||||
700 | return 'gcg' if /\.gcg$/i; | ||||
701 | return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i; | ||||
702 | return 'phd' if /\.(phd|phred)$/i; | ||||
703 | return 'pir' if /\.pir$/i; | ||||
704 | return 'pln' if /\.pln$/i; | ||||
705 | return 'qual' if /\.qual$/i; | ||||
706 | return 'raw' if /\.txt$/i; | ||||
707 | return 'scf' if /\.scf$/i; | ||||
708 | # from Strider 1.4 Release Notes: The file name extensions used by | ||||
709 | # Strider 1.4 are ".xdna", ".xdgn", ".xrna" and ".xprt" for DNA, | ||||
710 | # DNA Degenerate, RNA and Protein Sequence Files, respectively | ||||
711 | return 'strider' if /\.(xdna|xdgn|xrna|xprt)$/i; | ||||
712 | return 'swiss' if /\.(swiss|sp)$/i; | ||||
713 | return 'ztr' if /\.ztr$/i; | ||||
714 | } | ||||
715 | |||||
716 | |||||
717 | sub DESTROY { | ||||
718 | my $self = shift; | ||||
719 | $self->close(); | ||||
720 | } | ||||
721 | |||||
722 | |||||
723 | sub TIEHANDLE { | ||||
724 | my ($class,$val) = @_; | ||||
725 | return bless {'seqio' => $val}, $class; | ||||
726 | } | ||||
727 | |||||
728 | |||||
729 | sub READLINE { | ||||
730 | my $self = shift; | ||||
731 | return $self->{'seqio'}->next_seq() || undef unless wantarray; | ||||
732 | my (@list, $obj); | ||||
733 | push @list, $obj while $obj = $self->{'seqio'}->next_seq(); | ||||
734 | return @list; | ||||
735 | } | ||||
736 | |||||
737 | |||||
738 | sub PRINT { | ||||
739 | my $self = shift; | ||||
740 | $self->{'seqio'}->write_seq(@_); | ||||
741 | } | ||||
742 | |||||
743 | |||||
744 | =head2 sequence_factory | ||||
745 | |||||
746 | Title : sequence_factory | ||||
747 | Usage : $seqio->sequence_factory($seqfactory) | ||||
748 | Function: Get/Set the Bio::Factory::SequenceFactoryI | ||||
749 | Returns : Bio::Factory::SequenceFactoryI | ||||
750 | Args : [optional] Bio::Factory::SequenceFactoryI | ||||
751 | |||||
752 | =cut | ||||
753 | |||||
754 | sub sequence_factory { | ||||
755 | my ($self, $obj) = @_; | ||||
756 | if( defined $obj ) { | ||||
757 | if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) { | ||||
758 | $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()"); | ||||
759 | } | ||||
760 | $self->{'_seqio_seqfactory'} = $obj; | ||||
761 | my $builder = $self->sequence_builder(); | ||||
762 | if($builder && $builder->can('sequence_factory') && | ||||
763 | (! $builder->sequence_factory())) { | ||||
764 | $builder->sequence_factory($obj); | ||||
765 | } | ||||
766 | } | ||||
767 | $self->{'_seqio_seqfactory'}; | ||||
768 | } | ||||
769 | |||||
770 | |||||
771 | =head2 object_factory | ||||
772 | |||||
773 | Title : object_factory | ||||
774 | Usage : $obj->object_factory($newval) | ||||
775 | Function: This is an alias to sequence_factory with a more generic name. | ||||
776 | Example : | ||||
777 | Returns : value of object_factory (a scalar) | ||||
778 | Args : on set, new value (a scalar or undef, optional) | ||||
779 | |||||
780 | =cut | ||||
781 | |||||
782 | sub object_factory{ | ||||
783 | return shift->sequence_factory(@_); | ||||
784 | } | ||||
785 | |||||
786 | |||||
787 | =head2 sequence_builder | ||||
788 | |||||
789 | Title : sequence_builder | ||||
790 | Usage : $seqio->sequence_builder($seqfactory) | ||||
791 | Function: Get/Set the Bio::Factory::ObjectBuilderI used to build sequence | ||||
792 | objects. This applies to rich sequence formats only, e.g. genbank | ||||
793 | but not fasta. | ||||
794 | |||||
795 | If you do not set the sequence object builder yourself, it | ||||
796 | will in fact be an instance of L<Bio::Seq::SeqBuilder>, and | ||||
797 | you may use all methods documented there to configure it. | ||||
798 | |||||
799 | Returns : a Bio::Factory::ObjectBuilderI compliant object | ||||
800 | Args : [optional] a Bio::Factory::ObjectBuilderI compliant object | ||||
801 | |||||
802 | =cut | ||||
803 | |||||
804 | sub sequence_builder { | ||||
805 | my ($self, $obj) = @_; | ||||
806 | if( defined $obj ) { | ||||
807 | if( ! ref($obj) || ! $obj->isa('Bio::Factory::ObjectBuilderI') ) { | ||||
808 | $self->throw("Must provide a valid Bio::Factory::ObjectBuilderI object to ".ref($self)."::sequence_builder()"); | ||||
809 | } | ||||
810 | $self->{'_object_builder'} = $obj; | ||||
811 | } | ||||
812 | $self->{'_object_builder'}; | ||||
813 | } | ||||
814 | |||||
815 | |||||
816 | =head2 location_factory | ||||
817 | |||||
818 | Title : location_factory | ||||
819 | Usage : $seqio->location_factory($locfactory) | ||||
820 | Function: Get/Set the Bio::Factory::LocationFactoryI object to be used for | ||||
821 | location string parsing | ||||
822 | Returns : a Bio::Factory::LocationFactoryI implementing object | ||||
823 | Args : [optional] on set, a Bio::Factory::LocationFactoryI implementing | ||||
824 | object. | ||||
825 | |||||
826 | =cut | ||||
827 | |||||
828 | sub location_factory { | ||||
829 | my ($self,$obj) = @_; | ||||
830 | if( defined $obj ) { | ||||
831 | if( ! ref($obj) || ! $obj->isa('Bio::Factory::LocationFactoryI') ) { | ||||
832 | $self->throw("Must provide a valid Bio::Factory::LocationFactoryI" . | ||||
833 | " object to ".ref($self)."->location_factory()"); | ||||
834 | } | ||||
835 | $self->{'_seqio_locfactory'} = $obj; | ||||
836 | } | ||||
837 | $self->{'_seqio_locfactory'}; | ||||
838 | } | ||||
839 | |||||
840 | 1 | 4µs | 1; | ||
841 |