← Index
NYTProf Performance Profile   « block view • line view • sub view »
For bin/pan_genome_post_analysis
  Run on Fri Mar 27 11:43:32 2015
Reported on Fri Mar 27 11:46:15 2015

Filename/Users/ap13/pathogens/Roary/lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm
StatementsExecuted 741331 statements in 1.51s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
811877ms1.40sBio::Roary::ContigsToGeneIDsFromGFF::::_build_contig_to_idsBio::Roary::ContigsToGeneIDsFromGFF::_build_contig_to_ids
811314ms470msBio::Roary::ContigsToGeneIDsFromGFF::::_build_overlapping_hypothetical_protein_idsBio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids
197111.03ms1.03msBio::Roary::ContigsToGeneIDsFromGFF::::_percent_overlapBio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap
81166µs117µsBio::Roary::ContigsToGeneIDsFromGFF::::_build__awk_filterBio::Roary::ContigsToGeneIDsFromGFF::_build__awk_filter
81155µs55µsBio::Roary::ContigsToGeneIDsFromGFF::::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24]Bio::Roary::ContigsToGeneIDsFromGFF::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24]
11133µs4.18msBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@17Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17
11110µs10µsBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@18Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18
11110µs100µsBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@142Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Bio::Roary::ContigsToGeneIDsFromGFF;
2
3# ABSTRACT: Parse a GFF and efficiently and extract ordered gene ids on each contig
4
5=head1 SYNOPSIS
6
7Parse a GFF and efficiently and extract ordered gene ids on each contig
8 use Bio::Roary::ContigsToGeneIDsFromGFF;
9
10 my $obj = Bio::Roary::ContigsToGeneIDsFromGFF->new(
11 gff_file => 'abc.gff'
12 );
13 $obj->contig_to_ids;
14
15=cut
16
17248µs28.33ms
# spent 4.18ms (33µs+4.15) within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17 which was called: # once (33µs+4.15ms) by Bio::Roary::OrderGenes::BEGIN@21 at line 17
use Moose;
# spent 4.18ms making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17 # spent 4.15ms making 1 call to Moose::import
182596µs110µs
# spent 10µs within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18 which was called: # once (10µs+0s) by Bio::Roary::OrderGenes::BEGIN@21 at line 18
use Bio::Tools::GFF;
# spent 10µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18
1913µs19.12mswith 'Bio::Roary::ParseGFFAnnotationRole';
# spent 9.12ms making 1 call to Moose::with
20
2113µs12.13mshas 'contig_to_ids' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build_contig_to_ids');
# spent 2.13ms making 1 call to Moose::has
22
2312µs11.86mshas 'overlapping_hypothetical_protein_ids' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_overlapping_hypothetical_protein_ids');
# spent 1.86ms making 1 call to Moose::has
24947µs11.79ms
# spent 55µs within Bio::Roary::ContigsToGeneIDsFromGFF::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24] which was called 8 times, avg 7µs/call: # 8 times (55µs+0s) by Bio::Roary::ContigsToGeneIDsFromGFF::new at line 52 of (eval 25)[Eval/Closure.pm:125], avg 7µs/call
has '_genes_annotation' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]});
# spent 1.79ms making 1 call to Moose::has
25
2612µs11.62mshas '_min_nucleotide_overlap_percentage' => ( is => 'ro', isa => 'Int', default => 10);
# spent 1.62ms making 1 call to Moose::has
27
28# Manually parse the GFF file because the BioPerl module is too slow
29sub _build_contig_to_ids
30
# spent 1.40s (877ms+522ms) within Bio::Roary::ContigsToGeneIDsFromGFF::_build_contig_to_ids which was called 8 times, avg 175ms/call: # 8 times (877ms+522ms) by Bio::Roary::ContigsToGeneIDsFromGFF::contig_to_ids at line 15 of (eval 25)[Eval/Closure.pm:125], avg 175ms/call
{
3188µs my ($self) = @_;
3283µs my %contigs_to_ids;
3384µs my @genes_annotation;
34
35816.6ms1616.1ms open( my $fh, '-|', $self->_gff_fh_input_string ) or die "Couldnt open GFF file";
# spent 15.5ms making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:open, avg 1.94ms/call # spent 589µs making 8 calls to Bio::Roary::ParseGFFAnnotationRole::_gff_fh_input_string, avg 74µs/call
36863.3ms863.1ms while(<$fh>)
# spent 63.1ms making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:readline, avg 7.88ms/call
37 {
38400088.13ms chomp;
394000812.5ms my $line = $_;
40400082.31ms my $id_name;
4140008194ms4000894.7ms if($line =~/ID=["']?([^;"']+)["']?;?/i)
# spent 94.7ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 2µs/call
42 {
43 $id_name= $1;
44 }
45 else
46 {
47 next;
48 }
49
5040008122ms my @annotation_elements = split(/\t/,$line);
51 # Map gene IDs to the contig
524000843.0ms push(@{$contigs_to_ids{$annotation_elements[0]}}, $id_name);
53
5440008525ms80016314ms if($line =~/product=["']?([^;,"']+)[,"']?;?/i)
# spent 192ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:readline, avg 5µs/call # spent 123ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 3µs/call
55 {
56400084.18ms my %gene_data;
574000863.5ms $gene_data{product} = $1;
584000825.3ms $gene_data{id_name} = $id_name;
5940008144ms5951632.9ms if($line =~ /UniProtKB/ || $line =~ /RefSeq/ || $line =~ /protein motif/)
# spent 32.9ms making 59516 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 553ns/call
60 {
61 $gene_data{database_annotation_exists} = 1;
62 }
63 else
64 {
6546842.60ms $gene_data{database_annotation_exists} = 0;
66 }
67
684000828.4ms $gene_data{contig} = $annotation_elements[0];
694000817.7ms $gene_data{start} = $annotation_elements[1];
704000817.6ms $gene_data{end} = $annotation_elements[2];
714000839.7ms push(@genes_annotation,\%gene_data);
72 }
73
74 }
758332µs8286µs close($fh);
# spent 286µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:close, avg 36µs/call
76
778127µs8170µs $self->_genes_annotation(\@genes_annotation);
# spent 170µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 21µs/call
788189µs return \%contigs_to_ids;
79}
80
81sub _build_overlapping_hypothetical_protein_ids
82
# spent 470ms (314+156) within Bio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids which was called 8 times, avg 58.7ms/call: # 8 times (314ms+156ms) by Bio::Roary::ContigsToGeneIDsFromGFF::overlapping_hypothetical_protein_ids at line 12 of (eval 25)[Eval/Closure.pm:125], avg 58.7ms/call
{
8387µs my ($self) = @_;
84813µs818µs $self->contig_to_ids;
# spent 18µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::contig_to_ids, avg 2µs/call
85
8682µs my %overlapping_protein_ids;
87
88 #Checking to see if the current feature is hypotheitical and if the next one has annotation
89845.1ms4000851.7ms for(my $i = 0; $i< (@{$self->_genes_annotation} -1) ; $i++ )
# spent 51.7ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
90 {
914000041.6ms4000049.5ms my $current_feature = $self->_genes_annotation->[$i];
# spent 49.5ms making 40000 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
924000048.4ms4000049.5ms my $next_feature = $self->_genes_annotation->[$i+1];
# spent 49.5ms making 40000 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
93
944000024.6ms next if($current_feature->{database_annotation_exists} == 1);
95467712.2ms46773.74ms next unless($current_feature->{product} =~ /hypothetical/i);
# spent 3.74ms making 4677 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 800ns/call
9625961.26ms next unless($next_feature->{database_annotation_exists} == 1);
97
981468637µs my $start_coord = $current_feature->{start} ;
991468336µs my $end_coord = $current_feature->{end} ;
1001468393µs my $comparison_start_coord =$next_feature->{start} ;
1011468288µs my $comparison_end_coord =$next_feature->{end} ;
10214681.15ms if($comparison_start_coord < $end_coord && $comparison_end_coord > $start_coord )
103 {
104197401µs1971.03ms my $percent_overlap = $self->_percent_overlap($start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord);
# spent 1.03ms making 197 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap, avg 5µs/call
1051971.85ms197517µs if($percent_overlap >= $self->_min_nucleotide_overlap_percentage)
# spent 517µs making 197 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_min_nucleotide_overlap_percentage, avg 3µs/call
106 {
107 $overlapping_protein_ids{$current_feature->{id_name}}++;
108 }
109 }
110 }
111
1128110µs return \%overlapping_protein_ids;
113}
114
115sub _percent_overlap
116
# spent 1.03ms within Bio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap which was called 197 times, avg 5µs/call: # 197 times (1.03ms+0s) by Bio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids at line 104, avg 5µs/call
{
117197192µs my ($self, $start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord) = @_;
11819754µs my $size_of_hypothetical_gene = $end_coord - $start_coord;
119
12019736µs my $lower_bound = $start_coord;
12119759µs if($comparison_start_coord > $start_coord)
122 {
123 $lower_bound = $comparison_start_coord;
124 }
12519739µs my $upper_bound = $end_coord;
12619731µs if($comparison_end_coord < $end_coord )
127 {
128 $upper_bound = $comparison_end_coord;
129 }
130197616µs return (($upper_bound-$lower_bound)*100) / $size_of_hypothetical_gene;
131}
132
133
134
# spent 117µs (66+51) within Bio::Roary::ContigsToGeneIDsFromGFF::_build__awk_filter which was called 8 times, avg 15µs/call: # 8 times (66µs+51µs) by Bio::Roary::ContigsToGeneIDsFromGFF::_awk_filter at line 12 of (eval 25)[Eval/Closure.pm:125], avg 15µs/call
sub _build__awk_filter {
13584µs my ($self) = @_;
136 return
137863µs850µs 'awk \'BEGIN {FS="\t"};{ if ($3 ~/'
# spent 50µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_tags_to_filter, avg 6µs/call
138 . $self->_tags_to_filter
139 . '/) print $1"\t"$4"\t"$5"\t"$9;}\' ';
140}
141
142247µs2190µs
# spent 100µs (10+90) within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142 which was called: # once (10µs+90µs) by Bio::Roary::OrderGenes::BEGIN@21 at line 142
no Moose;
# spent 100µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142 # spent 90µs making 1 call to Moose::unimport
14316µs26.68ms__PACKAGE__->meta->make_immutable;
# spent 6.67ms making 1 call to Class::MOP::Class::make_immutable # spent 15µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::meta
144
145136µs1;