Filename | /Users/ap13/pathogens/Roary/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm |
Statements | Executed 24 statements in 1.42ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 1.74ms | 8.45ms | BEGIN@18 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 48µs | 4.57ms | BEGIN@17 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 16µs | 71µs | BEGIN@19 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 15µs | 196µs | BEGIN@148 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 14µs | 51µs | BEGIN@20 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 8µs | 8µs | BEGIN@21 | Bio::Roary::Output::GroupsMultifastaNucleotide::
1 | 1 | 1 | 6µs | 6µs | BEGIN@22 | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _bed_output_filename | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _build__input_seqio | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _build__output_filename | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _build_fasta_file | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _create_bed_file_from_gff | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _create_nucleotide_fasta_file_from_gff | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _extract_nucleotide_regions | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _extracted_nucleotide_fasta_file_from_bed_filename | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _group_file_name | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _group_seq_io_obj | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | _nucleotide_fasta_file_from_gff_filename | Bio::Roary::Output::GroupsMultifastaNucleotide::
0 | 0 | 0 | 0s | 0s | populate_files | Bio::Roary::Output::GroupsMultifastaNucleotide::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Bio::Roary::Output::GroupsMultifastaNucleotide; | ||||
2 | |||||
3 | # ABSTRACT: Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences. | ||||
4 | |||||
5 | =head1 SYNOPSIS | ||||
6 | |||||
7 | Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences. | ||||
8 | use Bio::Roary::Output::GroupsMultifastas; | ||||
9 | |||||
10 | my $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new( | ||||
11 | group_names => ['aaa','bbb'], | ||||
12 | ); | ||||
13 | $obj->populate_files(); | ||||
14 | |||||
15 | =cut | ||||
16 | |||||
17 | 2 | 44µs | 2 | 9.08ms | # spent 4.57ms (48µs+4.52) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17 which was called:
# once (48µs+4.52ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 17 # spent 4.57ms making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17
# spent 4.52ms making 1 call to Moose::import |
18 | 2 | 277µs | 1 | 8.45ms | # spent 8.45ms (1.74+6.71) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 which was called:
# once (1.74ms+6.71ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 18 # spent 8.45ms making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 |
19 | 2 | 41µs | 2 | 126µs | # spent 71µs (16+55) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19 which was called:
# once (16µs+55µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 19 # spent 71µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19
# spent 55µs making 1 call to Exporter::import |
20 | 2 | 29µs | 2 | 88µs | # spent 51µs (14+37) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20 which was called:
# once (14µs+37µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 20 # spent 51µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20
# spent 37µs making 1 call to Exporter::import |
21 | 2 | 22µs | 1 | 8µs | # spent 8µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 which was called:
# once (8µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 21 # spent 8µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 |
22 | 2 | 878µs | 1 | 6µs | # spent 6µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 which was called:
# once (6µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 22 # spent 6µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 |
23 | |||||
24 | 1 | 3µs | 1 | 2.45ms | has 'gff_file' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 2.45ms making 1 call to Moose::has |
25 | #Â Not implemented | ||||
26 | 1 | 1µs | 1 | 1.70ms | has 'group_names' => ( is => 'ro', isa => 'ArrayRef', required => 0 ); # spent 1.70ms making 1 call to Moose::has |
27 | 1 | 2µs | 1 | 1.53ms | has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 1.53ms making 1 call to Moose::has |
28 | 1 | 2µs | 1 | 1.45ms | has 'annotate_groups' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); # spent 1.45ms making 1 call to Moose::has |
29 | 1 | 2µs | 1 | 1.54ms | has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # spent 1.54ms making 1 call to Moose::has |
30 | |||||
31 | 1 | 2µs | 1 | 1.96ms | has 'fasta_file' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_fasta_file' ); # spent 1.96ms making 1 call to Moose::has |
32 | 1 | 2µs | 1 | 2.46ms | has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' ); # spent 2.46ms making 1 call to Moose::has |
33 | |||||
34 | 1 | 2µs | 1 | 2.04ms | has '_output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_filename' ); # spent 2.04ms making 1 call to Moose::has |
35 | |||||
36 | |||||
37 | sub _build__output_filename | ||||
38 | { | ||||
39 | my ($self) = @_; | ||||
40 | my ( $filename, $directories, $suffix ) = fileparse($self->gff_file); | ||||
41 | return join('/',($self->output_directory, $filename.'.tmp_nuc_sequences.fa' )); | ||||
42 | } | ||||
43 | |||||
44 | sub _build__input_seqio { | ||||
45 | my ($self) = @_; | ||||
46 | return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' ); | ||||
47 | } | ||||
48 | |||||
49 | sub populate_files { | ||||
50 | my ($self) = @_; | ||||
51 | while ( my $input_seq = $self->_input_seqio->next_seq() ) | ||||
52 | { | ||||
53 | if ( $self->annotate_groups->_ids_to_groups->{$input_seq->display_id} ) | ||||
54 | { | ||||
55 | my $current_group = $self->annotate_groups->_ids_to_groups->{$input_seq->display_id}; | ||||
56 | |||||
57 | my $number_of_genes = @{$self->annotate_groups->_groups_to_id_names->{$current_group}}; | ||||
58 | # Theres no need to align a single sequence | ||||
59 | next if($self->output_multifasta_files == 0 && $number_of_genes == 1); | ||||
60 | |||||
61 | my $output_seq = $self->_group_seq_io_obj($current_group,$number_of_genes); | ||||
62 | $output_seq->write_seq($input_seq); | ||||
63 | } | ||||
64 | } | ||||
65 | |||||
66 | unlink($self->fasta_file); | ||||
67 | 1; | ||||
68 | } | ||||
69 | |||||
70 | sub _group_file_name | ||||
71 | { | ||||
72 | my ($self,$group_name,$num_group_genes) = @_; | ||||
73 | my $annotated_group_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$group_name}; | ||||
74 | $annotated_group_name =~ s!\W!_!gi; | ||||
75 | my $filename = $annotated_group_name.'.fa'; | ||||
76 | my $group_file_name = join('/',($self->output_directory, $filename )); | ||||
77 | return $group_file_name; | ||||
78 | } | ||||
79 | |||||
80 | sub _group_seq_io_obj | ||||
81 | { | ||||
82 | my ($self,$group_name,$num_group_genes) = @_; | ||||
83 | my $filename = $self->_group_file_name($group_name,$num_group_genes); | ||||
84 | return Bio::SeqIO->new( -file => ">>".$filename, -format => 'Fasta' ); | ||||
85 | } | ||||
86 | |||||
87 | |||||
88 | sub _extracted_nucleotide_fasta_file_from_bed_filename { | ||||
89 | my ($self) = @_; | ||||
90 | return join( '.', ( $self->_output_filename, 'intermediate.extracted.fa' ) ); | ||||
91 | } | ||||
92 | |||||
93 | sub _create_bed_file_from_gff { | ||||
94 | my ($self) = @_; | ||||
95 | my $cmd = | ||||
96 | 'sed -n \'/##gff-version 3/,/##FASTA/p\' ' | ||||
97 | . $self->gff_file | ||||
98 | . ' | grep -v \'^#\' | awk \'{print $1"\t"($4-1)"\t"($5)"\t"$9"\t1\t"$7}\' | sed \'s/ID=//\' | sed \'s/;[^\t]*\t/\t/g\' > ' | ||||
99 | . $self->_bed_output_filename; | ||||
100 | system($cmd); | ||||
101 | } | ||||
102 | |||||
103 | sub _create_nucleotide_fasta_file_from_gff { | ||||
104 | my ($self) = @_; | ||||
105 | my $cmd = | ||||
106 | 'sed -n \'/##FASTA/,//p\' ' | ||||
107 | . $self->gff_file | ||||
108 | . ' | grep -v \'##FASTA\' > ' | ||||
109 | . $self->_nucleotide_fasta_file_from_gff_filename; | ||||
110 | system($cmd); | ||||
111 | } | ||||
112 | |||||
113 | sub _nucleotide_fasta_file_from_gff_filename { | ||||
114 | my ($self) = @_; | ||||
115 | return join( '.', ( $self->_output_filename, 'intermediate.fa' ) ); | ||||
116 | } | ||||
117 | |||||
118 | sub _bed_output_filename { | ||||
119 | my ($self) = @_; | ||||
120 | return join( '.', ( $self->_output_filename, 'intermediate.bed' ) ); | ||||
121 | } | ||||
122 | |||||
123 | sub _extract_nucleotide_regions { | ||||
124 | my ($self) = @_; | ||||
125 | |||||
126 | $self->_create_nucleotide_fasta_file_from_gff; | ||||
127 | $self->_create_bed_file_from_gff; | ||||
128 | |||||
129 | my $cmd = | ||||
130 | 'bedtools getfasta -s -fi ' | ||||
131 | . $self->_nucleotide_fasta_file_from_gff_filename | ||||
132 | . ' -bed ' | ||||
133 | . $self->_bed_output_filename . ' -fo ' | ||||
134 | . $self->_extracted_nucleotide_fasta_file_from_bed_filename | ||||
135 | . ' -name > /dev/null 2>&1'; | ||||
136 | system($cmd); | ||||
137 | unlink( $self->_nucleotide_fasta_file_from_gff_filename ); | ||||
138 | unlink( $self->_bed_output_filename ); | ||||
139 | unlink( $self->_nucleotide_fasta_file_from_gff_filename . '.fai' ); | ||||
140 | return $self->_extracted_nucleotide_fasta_file_from_bed_filename; | ||||
141 | } | ||||
142 | |||||
143 | sub _build_fasta_file { | ||||
144 | my ($self) = @_; | ||||
145 | return $self->_extract_nucleotide_regions; | ||||
146 | } | ||||
147 | |||||
148 | 2 | 66µs | 2 | 377µs | # spent 196µs (15+181) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148 which was called:
# once (15µs+181µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 148 # spent 196µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148
# spent 181µs making 1 call to Moose::unimport |
149 | 1 | 8µs | 2 | 5.84ms | __PACKAGE__->meta->make_immutable; # spent 5.82ms making 1 call to Class::MOP::Class::make_immutable
# spent 20µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::meta |
150 | |||||
151 | 1 | 42µs | 1; | ||
152 |