Filename | /Users/ap13/pathogens/Roary/lib/Bio/Roary/SplitGroups.pm |
Statements | Executed 1402863 statements in 7.25s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
39725 | 3 | 1 | 4.11s | 4.11s | CORE:readline (opcode) | Bio::Roary::SplitGroups::
31 | 1 | 1 | 835ms | 985ms | _update_genes_to_groups | Bio::Roary::SplitGroups::
18277 | 2 | 1 | 746ms | 3.73s | _find_paralogs | Bio::Roary::SplitGroups::
8 | 1 | 1 | 499ms | 499ms | CORE:system (opcode) | Bio::Roary::SplitGroups::
312 | 4 | 1 | 337ms | 337ms | CORE:open (opcode) | Bio::Roary::SplitGroups::
48485 | 1 | 1 | 298ms | 417ms | _same_group | Bio::Roary::SplitGroups::
1 | 1 | 1 | 224ms | 239ms | _set_genes_to_groups | Bio::Roary::SplitGroups::
1 | 1 | 1 | 210ms | 10.5s | split_groups | Bio::Roary::SplitGroups::
309 | 1 | 1 | 148ms | 4.65s | _parse_gene_neighbourhood | Bio::Roary::SplitGroups::
494 | 1 | 1 | 125ms | 542ms | _shared_cgn_score | Bio::Roary::SplitGroups::
18246 | 2 | 1 | 66.5ms | 3.79s | _contains_paralogs | Bio::Roary::SplitGroups::
31 | 2 | 1 | 34.8ms | 6.22s | _true_orthologs (recurses: max depth 1, inclusive time 717ms) | Bio::Roary::SplitGroups::
3066 | 1 | 1 | 10.2ms | 10.2ms | CORE:subst (opcode) | Bio::Roary::SplitGroups::
18215 | 1 | 1 | 5.74ms | 5.74ms | CORE:print (opcode) | Bio::Roary::SplitGroups::
317 | 2 | 1 | 4.79ms | 4.79ms | CORE:qr (opcode) | Bio::Roary::SplitGroups::
247 | 1 | 1 | 3.56ms | 546ms | _closest_cgn | Bio::Roary::SplitGroups::
1 | 1 | 1 | 794µs | 501ms | _pre_filter_fasta_files | Bio::Roary::SplitGroups::
2 | 2 | 1 | 261µs | 261µs | CORE:close (opcode) | Bio::Roary::SplitGroups::
1 | 1 | 1 | 100µs | 2.75s | _build__analyse_groups_obj | Bio::Roary::SplitGroups::
1 | 1 | 1 | 39µs | 4.15ms | BEGIN@11 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 26µs | 2.75s | _build__genes_to_files | Bio::Roary::SplitGroups::
1 | 1 | 1 | 17µs | 568µs | __ANON__[lib/Bio/Roary/SplitGroups.pm:38] | Bio::Roary::SplitGroups::
1 | 1 | 1 | 12µs | 19µs | _make_tmp_dir | Bio::Roary::SplitGroups::
1 | 1 | 1 | 12µs | 54µs | BEGIN@13 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 12µs | 63µs | BEGIN@15 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 11µs | 45µs | BEGIN@14 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 11µs | 11µs | BEGIN@12 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 10µs | 115µs | BEGIN@318 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 10µs | 53µs | BEGIN@16 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 9µs | 40µs | BEGIN@17 | Bio::Roary::SplitGroups::
1 | 1 | 1 | 4µs | 4µs | CORE:ftis (opcode) | Bio::Roary::SplitGroups::
0 | 0 | 0 | 0s | 0s | _build__group_filelist | Bio::Roary::SplitGroups::
0 | 0 | 0 | 0s | 0s | _build__outfile_handle | Bio::Roary::SplitGroups::
0 | 0 | 0 | 0s | 0s | _get_files_for_iteration | Bio::Roary::SplitGroups::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Bio::Roary::SplitGroups; | ||||
2 | |||||
3 | # ABSTRACT: | ||||
4 | |||||
5 | =head1 SYNOPSIS | ||||
6 | |||||
7 | use Bio::Roary::SplitGroups; | ||||
8 | |||||
9 | =cut | ||||
10 | |||||
11 | 2 | 59µs | 2 | 8.26ms | # spent 4.15ms (39µs+4.11) within Bio::Roary::SplitGroups::BEGIN@11 which was called:
# once (39µs+4.11ms) by Bio::Roary::PostAnalysis::BEGIN@22 at line 11 # spent 4.15ms making 1 call to Bio::Roary::SplitGroups::BEGIN@11
# spent 4.11ms making 1 call to Moose::import |
12 | 2 | 32µs | 1 | 11µs | # spent 11µs within Bio::Roary::SplitGroups::BEGIN@12 which was called:
# once (11µs+0s) by Bio::Roary::PostAnalysis::BEGIN@22 at line 12 # spent 11µs making 1 call to Bio::Roary::SplitGroups::BEGIN@12 |
13 | 2 | 33µs | 2 | 96µs | # spent 54µs (12+42) within Bio::Roary::SplitGroups::BEGIN@13 which was called:
# once (12µs+42µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 13 # spent 54µs making 1 call to Bio::Roary::SplitGroups::BEGIN@13
# spent 42µs making 1 call to Exporter::import |
14 | 2 | 27µs | 2 | 79µs | # spent 45µs (11+34) within Bio::Roary::SplitGroups::BEGIN@14 which was called:
# once (11µs+34µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 14 # spent 45µs making 1 call to Bio::Roary::SplitGroups::BEGIN@14
# spent 34µs making 1 call to Exporter::import |
15 | 2 | 25µs | 2 | 114µs | # spent 63µs (12+51) within Bio::Roary::SplitGroups::BEGIN@15 which was called:
# once (12µs+51µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 15 # spent 63µs making 1 call to Bio::Roary::SplitGroups::BEGIN@15
# spent 51µs making 1 call to Exporter::import |
16 | 2 | 23µs | 2 | 96µs | # spent 53µs (10+43) within Bio::Roary::SplitGroups::BEGIN@16 which was called:
# once (10µs+43µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 16 # spent 53µs making 1 call to Bio::Roary::SplitGroups::BEGIN@16
# spent 43µs making 1 call to Exporter::import |
17 | 2 | 1.60ms | 2 | 70µs | # spent 40µs (9+30) within Bio::Roary::SplitGroups::BEGIN@17 which was called:
# once (9µs+30µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 17 # spent 40µs making 1 call to Bio::Roary::SplitGroups::BEGIN@17
# spent 30µs making 1 call to Exporter::import |
18 | |||||
19 | |||||
20 | 1 | 2µs | 1 | 2.05ms | has 'groupfile' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 2.05ms making 1 call to Moose::has |
21 | 1 | 2µs | 1 | 1.52ms | has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); # spent 1.52ms making 1 call to Moose::has |
22 | 1 | 2µs | 1 | 1.45ms | has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 1.45ms making 1 call to Moose::has |
23 | 1 | 2µs | 1 | 1.45ms | has 'iterations' => ( is => 'ro', isa => 'Int', default => 10 ); # spent 1.45ms making 1 call to Moose::has |
24 | 1 | 1µs | 1 | 1.51ms | has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 ); # spent 1.51ms making 1 call to Moose::has |
25 | 1 | 2µs | 1 | 1.45ms | has 'max_recursion' => ( is => 'ro', isa => 'Int', default => 2 ); # spent 1.45ms making 1 call to Moose::has |
26 | |||||
27 | 1 | 2µs | 1 | 3.29ms | has '_outfile_handle' => ( is => 'ro', lazy_build => 1 ); # spent 3.29ms making 1 call to Moose::has |
28 | 1 | 3µs | 1 | 2.47ms | has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 ); # spent 2.47ms making 1 call to Moose::has |
29 | |||||
30 | 1 | 2µs | 1 | 3.91ms | has '_group_filelist' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); # spent 3.91ms making 1 call to Moose::has |
31 | 1 | 2µs | 1 | 1.65ms | has '_tmp_dir' => ( is => 'ro', isa => 'Str', default => 'split_groups' ); # spent 1.65ms making 1 call to Moose::has |
32 | |||||
33 | 1 | 2µs | 1 | 3.10ms | has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 ); # spent 3.10ms making 1 call to Moose::has |
34 | 1 | 2µs | 1 | 3.00ms | has '_genes_to_files' => ( is => 'ro', lazy_build => 1 ); # spent 3.00ms making 1 call to Moose::has |
35 | 1 | 2µs | 1 | 2.65ms | has '_genes_to_groups' => ( is => 'rw', isa => 'HashRef' ); # spent 2.65ms making 1 call to Moose::has |
36 | |||||
37 | has '_gene_files_temp_dir_obj' => | ||||
38 | 2 | 47µs | 3 | 3.13ms | # spent 568µs (17+551) within Bio::Roary::SplitGroups::__ANON__[lib/Bio/Roary/SplitGroups.pm:38] which was called:
# once (17µs+551µs) by Bio::Roary::SplitGroups::new at line 63 of (eval 25)[Eval/Closure.pm:125] # spent 2.57ms making 1 call to Moose::has
# spent 526µs making 1 call to File::Temp::newdir
# spent 26µs making 1 call to Cwd::getcwd |
39 | |||||
40 | |||||
41 | 1 | 2µs | 1 | 1.96ms | has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only # spent 1.96ms making 1 call to Moose::has |
42 | |||||
43 | sub _build__outfile_handle { | ||||
44 | my ( $self ) = @_; | ||||
45 | |||||
46 | open( my $fh, '>', $self->outfile ); | ||||
47 | return $fh; | ||||
48 | } | ||||
49 | |||||
50 | # spent 2.75s (100µs+2.75) within Bio::Roary::SplitGroups::_build__analyse_groups_obj which was called:
# once (100µs+2.75s) by Bio::Roary::SplitGroups::_analyse_groups_obj at line 11 of (eval 25)[Eval/Closure.pm:125] | ||||
51 | 3 | 80µs | my ( $self ) = @_; | ||
52 | 1 | 501ms | $self->_pre_filter_fasta_files(); # spent 501ms making 1 call to Bio::Roary::SplitGroups::_pre_filter_fasta_files | ||
53 | |||||
54 | 3 | 2.25s | return Bio::Roary::AnalyseGroups->new( # spent 2.25s making 1 call to Bio::Roary::AnalyseGroups::new
# spent 21µs making 1 call to Bio::Roary::SplitGroups::fasta_files
# spent 14µs making 1 call to Bio::Roary::SplitGroups::groupfile | ||
55 | fasta_files => $self->fasta_files, | ||||
56 | groups_filename => $self->groupfile | ||||
57 | ); | ||||
58 | } | ||||
59 | |||||
60 | # spent 2.75s (26µs+2.75) within Bio::Roary::SplitGroups::_build__genes_to_files which was called:
# once (26µs+2.75s) by Bio::Roary::SplitGroups::_genes_to_files at line 11 of (eval 25)[Eval/Closure.pm:125] | ||||
61 | 2 | 18µs | my ( $self ) = @_; | ||
62 | 2 | 2.75s | return $self->_analyse_groups_obj->_genes_to_file; # spent 2.75s making 1 call to Bio::Roary::SplitGroups::_analyse_groups_obj
# spent 6µs making 1 call to Bio::Roary::AnalyseGroups::_genes_to_file | ||
63 | } | ||||
64 | |||||
65 | sub _build__group_filelist { | ||||
66 | my ( $self ) = @_; | ||||
67 | my $tmp = $self->_tmp_dir; | ||||
68 | |||||
69 | my @filelist = ( $self->groupfile ); | ||||
70 | for my $i ( 1..($self->iterations - 1) ){ | ||||
71 | push( @filelist, "$tmp/group_$i" ); | ||||
72 | } | ||||
73 | push( @filelist, $self->outfile ); | ||||
74 | |||||
75 | return \@filelist; | ||||
76 | } | ||||
77 | |||||
78 | # spent 19µs (12+7) within Bio::Roary::SplitGroups::_make_tmp_dir which was called:
# once (12µs+7µs) by Bio::Roary::SplitGroups::split_groups at line 100 | ||||
79 | 3 | 16µs | my ( $self ) = @_; | ||
80 | 1 | 3µs | my $dir = $self->_tmp_dir; # spent 3µs making 1 call to Bio::Roary::SplitGroups::_tmp_dir | ||
81 | 1 | 4µs | unless ( -e $dir ) { # spent 4µs making 1 call to Bio::Roary::SplitGroups::CORE:ftis | ||
82 | make_path($dir) or die "Cannot make dir: $dir\n" ; | ||||
83 | } | ||||
84 | } | ||||
85 | |||||
86 | sub _pre_filter_fasta_files | ||||
87 | # spent 501ms (794µs+501) within Bio::Roary::SplitGroups::_pre_filter_fasta_files which was called:
# once (794µs+501ms) by Bio::Roary::SplitGroups::_build__analyse_groups_obj at line 52 | ||||
88 | 3 | 49µs | my ( $self ) = @_; | ||
89 | 1 | 4µs | for my $fasta_file( @{$self->fasta_files}) # spent 4µs making 1 call to Bio::Roary::SplitGroups::fasta_files | ||
90 | { | ||||
91 | 16 | 726µs | 16 | 1.36ms | my ( $filename, $directories, $suffix ) = fileparse( $fasta_file, qr/\.[^.]*/ ); # spent 1.17ms making 8 calls to File::Basename::fileparse, avg 146µs/call
# spent 194µs making 8 calls to Bio::Roary::SplitGroups::CORE:qr, avg 24µs/call |
92 | 1 | 499ms | 24 | 499ms | system('grep \> '.$fasta_file.' > '.$self->_gene_files_temp_dir_obj."/".$filename.$suffix ); # spent 499ms making 8 calls to Bio::Roary::SplitGroups::CORE:system, avg 62.4ms/call
# spent 178µs making 8 calls to File::Temp::Dir::STRINGIFY, avg 22µs/call
# spent 100µs making 8 calls to Bio::Roary::SplitGroups::_gene_files_temp_dir_obj, avg 12µs/call |
93 | } | ||||
94 | return 1; | ||||
95 | } | ||||
96 | |||||
97 | # spent 10.5s (210ms+10.3) within Bio::Roary::SplitGroups::split_groups which was called:
# once (210ms+10.3s) by Bio::Roary::PostAnalysis::run at line 71 of lib/Bio/Roary/PostAnalysis.pm | ||||
98 | 10 | 70.4ms | my ( $self ) = @_; | ||
99 | |||||
100 | 1 | 19µs | $self->_make_tmp_dir; # spent 19µs making 1 call to Bio::Roary::SplitGroups::_make_tmp_dir | ||
101 | 2 | 239ms | $self->_set_genes_to_groups( $self->groupfile ); # spent 239ms making 1 call to Bio::Roary::SplitGroups::_set_genes_to_groups
# spent 3µs making 1 call to Bio::Roary::SplitGroups::groupfile | ||
102 | |||||
103 | # read in groupfile | ||||
104 | my @newgroups; | ||||
105 | 2 | 46µs | open( my $group_handle, '<', $self->groupfile ); # spent 42µs making 1 call to Bio::Roary::SplitGroups::CORE:open
# spent 4µs making 1 call to Bio::Roary::SplitGroups::groupfile | ||
106 | 36348 | 78.2ms | 18175 | 15.4ms | while( my $line = <$group_handle> ){ # spent 15.4ms making 18175 calls to Bio::Roary::SplitGroups::CORE:readline, avg 846ns/call |
107 | my @group = split( /\s+/, $line ); | ||||
108 | |||||
109 | 18199 | 8.50ms | 18174 | 3.78s | if( $self->_contains_paralogs( \@group ) ){ # spent 3.78s making 18174 calls to Bio::Roary::SplitGroups::_contains_paralogs, avg 208µs/call |
110 | 50 | 6.22s | my @true_orthologs = @{ $self->_true_orthologs( \@group,$self->max_recursion ) }; # spent 6.22s making 25 calls to Bio::Roary::SplitGroups::_true_orthologs, avg 249ms/call
# spent 300µs making 25 calls to Bio::Roary::SplitGroups::max_recursion, avg 12µs/call | ||
111 | push( @newgroups, @true_orthologs); | ||||
112 | } | ||||
113 | else { | ||||
114 | push( @newgroups, \@group ); | ||||
115 | } | ||||
116 | } | ||||
117 | 1 | 19µs | close( $group_handle ); # spent 19µs making 1 call to Bio::Roary::SplitGroups::CORE:close | ||
118 | |||||
119 | # write split groups to file | ||||
120 | 2 | 373µs | open( my $outfile_handle, '>', $self->outfile ); # spent 351µs making 1 call to Bio::Roary::SplitGroups::CORE:open
# spent 22µs making 1 call to Bio::Roary::SplitGroups::outfile | ||
121 | for my $g ( @newgroups ) { | ||||
122 | 36430 | 45.4ms | my $group_str = join( "\t", @{ $g } ) . "\n"; | ||
123 | 18215 | 5.74ms | print $outfile_handle $group_str; # spent 5.74ms making 18215 calls to Bio::Roary::SplitGroups::CORE:print, avg 315ns/call | ||
124 | } | ||||
125 | 1 | 242µs | close( $outfile_handle ); # spent 242µs making 1 call to Bio::Roary::SplitGroups::CORE:close | ||
126 | } | ||||
127 | |||||
128 | # spent 239ms (224+14.8) within Bio::Roary::SplitGroups::_set_genes_to_groups which was called:
# once (224ms+14.8ms) by Bio::Roary::SplitGroups::split_groups at line 101 | ||||
129 | 6 | 46.2ms | my ( $self, $groupfile ) = @_; | ||
130 | |||||
131 | my %genes2groups; | ||||
132 | my $c = 0; | ||||
133 | 1 | 16µs | open( GFH, '<', $groupfile ); # spent 16µs making 1 call to Bio::Roary::SplitGroups::CORE:open | ||
134 | 72696 | 69.2ms | 18175 | 14.7ms | while( my $line = <GFH> ){ # spent 14.7ms making 18175 calls to Bio::Roary::SplitGroups::CORE:readline, avg 810ns/call |
135 | chomp $line; | ||||
136 | my @genes = split( /\s+/, $line ); | ||||
137 | for my $g ( @genes ){ | ||||
138 | 144292 | 78.0ms | $genes2groups{$g} = $c; | ||
139 | } | ||||
140 | $c++; | ||||
141 | } | ||||
142 | 1 | 12µs | $self->_genes_to_groups( \%genes2groups ); # spent 12µs making 1 call to Bio::Roary::SplitGroups::_genes_to_groups | ||
143 | } | ||||
144 | |||||
145 | # spent 985ms (835+150) within Bio::Roary::SplitGroups::_update_genes_to_groups which was called 31 times, avg 31.8ms/call:
# 31 times (835ms+150ms) by Bio::Roary::SplitGroups::_true_orthologs at line 235, avg 31.8ms/call | ||||
146 | 155 | 833ms | my ( $self, $groups ) = @_; | ||
147 | |||||
148 | 31 | 72µs | my %genes2groups = %{ $self->_genes_to_groups }; # spent 72µs making 31 calls to Bio::Roary::SplitGroups::_genes_to_groups, avg 2µs/call | ||
149 | my $c = 1; | ||||
150 | for my $g ( @{ $groups } ){ | ||||
151 | 144 | 154µs | for my $h ( @{ $g } ){ | ||
152 | 309 | 752µs | $genes2groups{$h} .= ".$c"; | ||
153 | } | ||||
154 | $c++; | ||||
155 | } | ||||
156 | |||||
157 | 31 | 150ms | $self->_genes_to_groups( \%genes2groups ); # spent 150ms making 31 calls to Bio::Roary::SplitGroups::_genes_to_groups, avg 4.85ms/call | ||
158 | } | ||||
159 | |||||
160 | sub _get_files_for_iteration { | ||||
161 | my ( $self, $n ) = @_; | ||||
162 | my @filelist = @{ $self->_group_filelist }; | ||||
163 | return ( $filelist[$n], $filelist[$n+1] ); | ||||
164 | } | ||||
165 | |||||
166 | # spent 3.79s (66.5ms+3.72) within Bio::Roary::SplitGroups::_contains_paralogs which was called 18246 times, avg 208µs/call:
# 18174 times (65.9ms+3.72s) by Bio::Roary::SplitGroups::split_groups at line 109, avg 208µs/call
# 72 times (589µs+2.95ms) by Bio::Roary::SplitGroups::_true_orthologs at line 239, avg 49µs/call | ||||
167 | 54707 | 51.1ms | my ( $self, $group ) = @_; | ||
168 | |||||
169 | 18246 | 3.72s | return 1 if defined $self->_find_paralogs( $group ); # spent 3.72s making 18246 calls to Bio::Roary::SplitGroups::_find_paralogs, avg 204µs/call | ||
170 | return 0; | ||||
171 | } | ||||
172 | |||||
173 | # spent 3.73s (746ms+2.98) within Bio::Roary::SplitGroups::_find_paralogs which was called 18277 times, avg 204µs/call:
# 18246 times (742ms+2.98s) by Bio::Roary::SplitGroups::_contains_paralogs at line 169, avg 204µs/call
# 31 times (4.19ms+1.21ms) by Bio::Roary::SplitGroups::_true_orthologs at line 212, avg 174µs/call | ||||
174 | 146154 | 103ms | my ( $self, $group ) = @_; | ||
175 | |||||
176 | my %occ; | ||||
177 | for my $gene ( @{ $group } ){ | ||||
178 | 289820 | 378ms | 144910 | 2.98s | my $gene_file = $self->_genes_to_files->{ $gene }; # spent 2.98s making 144910 calls to Bio::Roary::SplitGroups::_genes_to_files, avg 21µs/call |
179 | push( @{ $occ{$gene_file} }, $gene ); | ||||
180 | } | ||||
181 | |||||
182 | # pick the smallest number of paralogs | ||||
183 | my $smallest_number = 1000000; | ||||
184 | my $smallest_group; | ||||
185 | for my $v ( values %occ ){ | ||||
186 | 289348 | 53.5ms | my $v_len = scalar( @{$v} ); | ||
187 | 140 | 30µs | if ( $v_len < $smallest_number && $v_len > 1 ){ | ||
188 | $smallest_number = $v_len; | ||||
189 | $smallest_group = $v; | ||||
190 | } | ||||
191 | } | ||||
192 | return $smallest_group if ( defined $smallest_group ); | ||||
193 | |||||
194 | return undef; | ||||
195 | } | ||||
196 | |||||
197 | # spent 6.22s (34.8ms+6.19) within Bio::Roary::SplitGroups::_true_orthologs which was called 31 times, avg 201ms/call:
# 25 times (31.7ms+6.19s) by Bio::Roary::SplitGroups::split_groups at line 110, avg 249ms/call
# 6 times (3.10ms+-3.10ms) by Bio::Roary::SplitGroups::_true_orthologs at line 240, avg 0s/call | ||||
198 | 403 | 2.82ms | my ( $self, $gs, $max_recursion ) = @_; | ||
199 | |||||
200 | # first, create CGN hash for group | ||||
201 | my %cgns; | ||||
202 | for my $g ( @{$gs} ){ | ||||
203 | 309 | 26.3ms | 309 | 4.65s | $cgns{$g} = $self->_parse_gene_neighbourhood( $g ); # spent 4.65s making 309 calls to Bio::Roary::SplitGroups::_parse_gene_neighbourhood, avg 15.0ms/call |
204 | } | ||||
205 | |||||
206 | my @groups = ( $gs ); | ||||
207 | my @split_groups; | ||||
208 | my $continue = 1; | ||||
209 | my $c = 0; | ||||
210 | for my $group ( @groups ){ | ||||
211 | # finding paralogs in the group | ||||
212 | 248 | 623µs | 31 | 5.40ms | my @paralogs = @{ $self->_find_paralogs( $group ) }; # spent 5.40ms making 31 calls to Bio::Roary::SplitGroups::_find_paralogs, avg 174µs/call |
213 | my @paralog_cgns; | ||||
214 | for my $p ( @paralogs ){ | ||||
215 | 62 | 90µs | push( @paralog_cgns, $cgns{$p} ); | ||
216 | } | ||||
217 | |||||
218 | for my $p ( @paralogs ){ | ||||
219 | 62 | 80µs | push( @split_groups, [ $p ] ); | ||
220 | } | ||||
221 | push( @split_groups, [] ); # extra "leftovers" array to gather genes that don't share CGN with anything | ||||
222 | |||||
223 | # cluster other members of the group to their closest match | ||||
224 | for my $g ( @{ $group } ){ | ||||
225 | 803 | 1.79ms | next if ( grep {$_ eq $g} @paralogs ); | ||
226 | 247 | 546ms | my $closest = $self->_closest_cgn( $cgns{$g}, \@paralog_cgns ); # spent 546ms making 247 calls to Bio::Roary::SplitGroups::_closest_cgn, avg 2.21ms/call | ||
227 | push( @{ $split_groups[$closest] }, $g ); | ||||
228 | } | ||||
229 | |||||
230 | # check for "leftovers", remove if absent | ||||
231 | my $last = pop @split_groups; | ||||
232 | push( @split_groups, $last ) if ( @$last > 0 ); | ||||
233 | } | ||||
234 | |||||
235 | 31 | 985ms | $self->_update_genes_to_groups( \@split_groups ); # spent 985ms making 31 calls to Bio::Roary::SplitGroups::_update_genes_to_groups, avg 31.8ms/call | ||
236 | |||||
237 | my @new_groups; | ||||
238 | for my $g ( @split_groups ){ | ||||
239 | 150 | 510µs | 72 | 3.54ms | if( $self->_contains_paralogs( $g ) && $max_recursion > 0){ # spent 3.54ms making 72 calls to Bio::Roary::SplitGroups::_contains_paralogs, avg 49µs/call |
240 | 6 | 0s | my @true_orthologs = @{ $self->_true_orthologs( $g,$max_recursion - 1) }; # spent 717ms making 6 calls to Bio::Roary::SplitGroups::_true_orthologs, avg 119ms/call, recursion: max depth 1, sum of overlapping time 717ms | ||
241 | push( @new_groups, @true_orthologs); | ||||
242 | } | ||||
243 | else { | ||||
244 | push( @new_groups, $g ); | ||||
245 | } | ||||
246 | } | ||||
247 | |||||
248 | # sort | ||||
249 | 31 | 373µs | if ( $self->_do_sorting ){ # spent 373µs making 31 calls to Bio::Roary::SplitGroups::_do_sorting, avg 12µs/call | ||
250 | my @sorted_new_groups; | ||||
251 | for my $gr ( @new_groups ){ | ||||
252 | my @s_gr = sort @{ $gr }; | ||||
253 | push( @sorted_new_groups, \@s_gr ); | ||||
254 | } | ||||
255 | return \@sorted_new_groups; | ||||
256 | } | ||||
257 | |||||
258 | return \@new_groups; | ||||
259 | } | ||||
260 | |||||
261 | # spent 546ms (3.56+542) within Bio::Roary::SplitGroups::_closest_cgn which was called 247 times, avg 2.21ms/call:
# 247 times (3.56ms+542ms) by Bio::Roary::SplitGroups::_true_orthologs at line 226, avg 2.21ms/call | ||||
262 | 1482 | 1.40ms | my ( $self, $cgn, $p_cgns ) = @_; | ||
263 | |||||
264 | my @paralog_cgns = @{ $p_cgns }; | ||||
265 | my $best_score = 0; | ||||
266 | my $bs_index = -1; # return -1 to add to "leftovers" array if no better score is found | ||||
267 | for my $i ( 0..$#paralog_cgns ){ | ||||
268 | 1482 | 1.29ms | my $p_cgn = $paralog_cgns[$i]; | ||
269 | 494 | 542ms | my $score = $self->_shared_cgn_score( $cgn, $p_cgn ); # spent 542ms making 494 calls to Bio::Roary::SplitGroups::_shared_cgn_score, avg 1.10ms/call | ||
270 | 486 | 89µs | if ( $score > $best_score ){ | ||
271 | $best_score = $score; | ||||
272 | $bs_index = $i; | ||||
273 | } | ||||
274 | } | ||||
275 | return $bs_index; | ||||
276 | } | ||||
277 | |||||
278 | # spent 542ms (125+417) within Bio::Roary::SplitGroups::_shared_cgn_score which was called 494 times, avg 1.10ms/call:
# 494 times (125ms+417ms) by Bio::Roary::SplitGroups::_closest_cgn at line 269, avg 1.10ms/call | ||||
279 | 2470 | 1.83ms | my ( $self, $cgn1, $cgn2 ) = @_; | ||
280 | |||||
281 | my $total_shared = 0; | ||||
282 | for my $i ( @{ $cgn1 } ){ | ||||
283 | 4910 | 2.84ms | for my $j ( @{ $cgn2 } ){ | ||
284 | 48485 | 68.8ms | 48485 | 417ms | $total_shared += $self->_same_group( $i, $j ); # spent 417ms making 48485 calls to Bio::Roary::SplitGroups::_same_group, avg 9µs/call |
285 | } | ||||
286 | } | ||||
287 | my $score = $total_shared/scalar @{ $cgn1 }; | ||||
288 | return $score; | ||||
289 | } | ||||
290 | |||||
291 | # spent 417ms (298+119) within Bio::Roary::SplitGroups::_same_group which was called 48485 times, avg 9µs/call:
# 48485 times (298ms+119ms) by Bio::Roary::SplitGroups::_shared_cgn_score at line 284, avg 9µs/call | ||||
292 | 240404 | 228ms | my ( $self, $gene1, $gene2 ) = @_; | ||
293 | 48485 | 60.4ms | my $g1 = $self->_genes_to_groups->{$gene1}; # spent 60.4ms making 48485 calls to Bio::Roary::SplitGroups::_genes_to_groups, avg 1µs/call | ||
294 | 48485 | 59.0ms | my $g2 = $self->_genes_to_groups->{$gene2}; # spent 59.0ms making 48485 calls to Bio::Roary::SplitGroups::_genes_to_groups, avg 1µs/call | ||
295 | return 1 if ( defined $g1 && defined $g2 && $g1 eq $g2 ); | ||||
296 | return 0; | ||||
297 | } | ||||
298 | |||||
299 | # spent 4.65s (148ms+4.50) within Bio::Roary::SplitGroups::_parse_gene_neighbourhood which was called 309 times, avg 15.0ms/call:
# 309 times (148ms+4.50s) by Bio::Roary::SplitGroups::_true_orthologs at line 203, avg 15.0ms/call | ||||
300 | 3090 | 4.56s | my ( $self, $gene_id ) = @_; | ||
301 | |||||
302 | 309 | 4.44ms | my $nh_size = $self->_neighbourhood_size; # spent 4.44ms making 309 calls to Bio::Roary::SplitGroups::_neighbourhood_size, avg 14µs/call | ||
303 | 309 | 4.68ms | my $gene_file = $self->_genes_to_files->{ $gene_id }; # spent 4.68ms making 309 calls to Bio::Roary::SplitGroups::_genes_to_files, avg 15µs/call | ||
304 | 618 | 46.3ms | my ( $filename, $directories, $suffix ) = fileparse( $gene_file, qr/\.[^.]*/ ); # spent 41.7ms making 309 calls to File::Basename::fileparse, avg 135µs/call
# spent 4.60ms making 309 calls to Bio::Roary::SplitGroups::CORE:qr, avg 15µs/call | ||
305 | 1 | 589µs | 618 | 11.9ms | my $filtered_gene_file = $self->_gene_files_temp_dir_obj."/".$filename.$suffix ; # spent 7.78ms making 309 calls to File::Temp::Dir::STRINGIFY, avg 25µs/call
# spent 4.15ms making 309 calls to Bio::Roary::SplitGroups::_gene_files_temp_dir_obj, avg 13µs/call |
306 | my $grep_cmd = "grep '>' $filtered_gene_file | grep -B $nh_size -A $nh_size $gene_id | grep -v $gene_id"; | ||||
307 | |||||
308 | 309 | 336ms | open( GREP, '-|', $grep_cmd ); # spent 336ms making 309 calls to Bio::Roary::SplitGroups::CORE:open, avg 1.09ms/call | ||
309 | my @neighbourhood; | ||||
310 | 9198 | 37.7ms | 3375 | 4.08s | while( my $line = <GREP> ){ # spent 4.08s making 3375 calls to Bio::Roary::SplitGroups::CORE:readline, avg 1.21ms/call |
311 | chomp $line; | ||||
312 | 3066 | 10.2ms | $line =~ s/^>//; # spent 10.2ms making 3066 calls to Bio::Roary::SplitGroups::CORE:subst, avg 3µs/call | ||
313 | push( @neighbourhood, $line ); | ||||
314 | } | ||||
315 | return \@neighbourhood; | ||||
316 | } | ||||
317 | |||||
318 | 2 | 61µs | 2 | 220µs | # spent 115µs (10+105) within Bio::Roary::SplitGroups::BEGIN@318 which was called:
# once (10µs+105µs) by Bio::Roary::PostAnalysis::BEGIN@22 at line 318 # spent 115µs making 1 call to Bio::Roary::SplitGroups::BEGIN@318
# spent 105µs making 1 call to Moose::unimport |
319 | 1 | 7µs | 2 | 9.31ms | __PACKAGE__->meta->make_immutable; # spent 9.29ms making 1 call to Class::MOP::Class::make_immutable
# spent 16µs making 1 call to Bio::Roary::SplitGroups::meta |
320 | 1 | 72µs | 1; | ||
sub Bio::Roary::SplitGroups::CORE:close; # opcode | |||||
# spent 4µs within Bio::Roary::SplitGroups::CORE:ftis which was called:
# once (4µs+0s) by Bio::Roary::SplitGroups::_make_tmp_dir at line 81 | |||||
# spent 337ms within Bio::Roary::SplitGroups::CORE:open which was called 312 times, avg 1.08ms/call:
# 309 times (336ms+0s) by Bio::Roary::SplitGroups::_parse_gene_neighbourhood at line 308, avg 1.09ms/call
# once (351µs+0s) by Bio::Roary::SplitGroups::split_groups at line 120
# once (42µs+0s) by Bio::Roary::SplitGroups::split_groups at line 105
# once (16µs+0s) by Bio::Roary::SplitGroups::_set_genes_to_groups at line 133 | |||||
# spent 5.74ms within Bio::Roary::SplitGroups::CORE:print which was called 18215 times, avg 315ns/call:
# 18215 times (5.74ms+0s) by Bio::Roary::SplitGroups::split_groups at line 123, avg 315ns/call | |||||
# spent 4.79ms within Bio::Roary::SplitGroups::CORE:qr which was called 317 times, avg 15µs/call:
# 309 times (4.60ms+0s) by Bio::Roary::SplitGroups::_parse_gene_neighbourhood at line 304, avg 15µs/call
# 8 times (194µs+0s) by Bio::Roary::SplitGroups::_pre_filter_fasta_files at line 91, avg 24µs/call | |||||
# spent 4.11s within Bio::Roary::SplitGroups::CORE:readline which was called 39725 times, avg 104µs/call:
# 18175 times (15.4ms+0s) by Bio::Roary::SplitGroups::split_groups at line 106, avg 846ns/call
# 18175 times (14.7ms+0s) by Bio::Roary::SplitGroups::_set_genes_to_groups at line 134, avg 810ns/call
# 3375 times (4.08s+0s) by Bio::Roary::SplitGroups::_parse_gene_neighbourhood at line 310, avg 1.21ms/call | |||||
# spent 10.2ms within Bio::Roary::SplitGroups::CORE:subst which was called 3066 times, avg 3µs/call:
# 3066 times (10.2ms+0s) by Bio::Roary::SplitGroups::_parse_gene_neighbourhood at line 312, avg 3µs/call | |||||
# spent 499ms within Bio::Roary::SplitGroups::CORE:system which was called 8 times, avg 62.4ms/call:
# 8 times (499ms+0s) by Bio::Roary::SplitGroups::_pre_filter_fasta_files at line 92, avg 62.4ms/call |