← Index
NYTProf Performance Profile   « block view • line view • sub view »
For bin/pan_genome_post_analysis
  Run on Fri Mar 27 11:43:32 2015
Reported on Fri Mar 27 11:45:50 2015

Filename/Users/ap13/pathogens/Roary/lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm
StatementsExecuted 741331 statements in 1.51s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
811877ms1.40sBio::Roary::ContigsToGeneIDsFromGFF::::_build_contig_to_idsBio::Roary::ContigsToGeneIDsFromGFF::_build_contig_to_ids
811314ms470msBio::Roary::ContigsToGeneIDsFromGFF::::_build_overlapping_hypothetical_protein_idsBio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids
197111.03ms1.03msBio::Roary::ContigsToGeneIDsFromGFF::::_percent_overlapBio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap
81166µs117µsBio::Roary::ContigsToGeneIDsFromGFF::::_build__awk_filterBio::Roary::ContigsToGeneIDsFromGFF::_build__awk_filter
81155µs55µsBio::Roary::ContigsToGeneIDsFromGFF::::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24]Bio::Roary::ContigsToGeneIDsFromGFF::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24]
11133µs4.18msBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@17Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17
11110µs10µsBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@18Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18
11110µs100µsBio::Roary::ContigsToGeneIDsFromGFF::::BEGIN@142Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Bio::Roary::ContigsToGeneIDsFromGFF;
2
3# ABSTRACT: Parse a GFF and efficiently and extract ordered gene ids on each contig
4
5=head1 SYNOPSIS
6
7Parse a GFF and efficiently and extract ordered gene ids on each contig
8 use Bio::Roary::ContigsToGeneIDsFromGFF;
9
10 my $obj = Bio::Roary::ContigsToGeneIDsFromGFF->new(
11 gff_file => 'abc.gff'
12 );
13 $obj->contig_to_ids;
14
15=cut
16
17248µs28.33ms
# spent 4.18ms (33µs+4.15) within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17 which was called: # once (33µs+4.15ms) by Bio::Roary::OrderGenes::BEGIN@21 at line 17
use Moose;
# spent 4.18ms making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@17 # spent 4.15ms making 1 call to Moose::import
182596µs110µs
# spent 10µs within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18 which was called: # once (10µs+0s) by Bio::Roary::OrderGenes::BEGIN@21 at line 18
use Bio::Tools::GFF;
# spent 10µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@18
1913µs19.12mswith 'Bio::Roary::ParseGFFAnnotationRole';
# spent 9.12ms making 1 call to Moose::with
20
2113µs12.13mshas 'contig_to_ids' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build_contig_to_ids');
# spent 2.13ms making 1 call to Moose::has
22
2312µs11.86mshas 'overlapping_hypothetical_protein_ids' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_overlapping_hypothetical_protein_ids');
# spent 1.86ms making 1 call to Moose::has
24947µs11.79ms
# spent 55µs within Bio::Roary::ContigsToGeneIDsFromGFF::__ANON__[lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm:24] which was called 8 times, avg 7µs/call: # 8 times (55µs+0s) by Bio::Roary::ContigsToGeneIDsFromGFF::new at line 52 of (eval 25)[Eval/Closure.pm:125], avg 7µs/call
has '_genes_annotation' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]});
# spent 1.79ms making 1 call to Moose::has
25
2612µs11.62mshas '_min_nucleotide_overlap_percentage' => ( is => 'ro', isa => 'Int', default => 10);
# spent 1.62ms making 1 call to Moose::has
27
28# Manually parse the GFF file because the BioPerl module is too slow
29sub _build_contig_to_ids
30
# spent 1.40s (877ms+522ms) within Bio::Roary::ContigsToGeneIDsFromGFF::_build_contig_to_ids which was called 8 times, avg 175ms/call: # 8 times (877ms+522ms) by Bio::Roary::ContigsToGeneIDsFromGFF::contig_to_ids at line 15 of (eval 25)[Eval/Closure.pm:125], avg 175ms/call
{
316480.5ms my ($self) = @_;
32 my %contigs_to_ids;
33 my @genes_annotation;
34
351616.1ms open( my $fh, '-|', $self->_gff_fh_input_string ) or die "Couldnt open GFF file";
# spent 15.5ms making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:open, avg 1.94ms/call # spent 589µs making 8 calls to Bio::Roary::ParseGFFAnnotationRole::_gff_fh_input_string, avg 74µs/call
36863.1ms while(<$fh>)
# spent 63.1ms making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:readline, avg 7.88ms/call
37 {
38280056906ms chomp;
39 my $line = $_;
40 my $id_name;
414000894.7ms if($line =~/ID=["']?([^;"']+)["']?;?/i)
# spent 94.7ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 2µs/call
42 {
43 $id_name= $1;
44 }
45 else
46 {
47 next;
48 }
49
50 my @annotation_elements = split(/\t/,$line);
51 # Map gene IDs to the contig
52 push(@{$contigs_to_ids{$annotation_elements[0]}}, $id_name);
53
54320064340ms80016314ms if($line =~/product=["']?([^;,"']+)[,"']?;?/i)
# spent 192ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:readline, avg 5µs/call # spent 123ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 3µs/call
55 {
56 my %gene_data;
57 $gene_data{product} = $1;
58 $gene_data{id_name} = $id_name;
5946842.60ms5951632.9ms if($line =~ /UniProtKB/ || $line =~ /RefSeq/ || $line =~ /protein motif/)
# spent 32.9ms making 59516 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 553ns/call
60 {
61 $gene_data{database_annotation_exists} = 1;
62 }
63 else
64 {
65 $gene_data{database_annotation_exists} = 0;
66 }
67
68 $gene_data{contig} = $annotation_elements[0];
69 $gene_data{start} = $annotation_elements[1];
70 $gene_data{end} = $annotation_elements[2];
71 push(@genes_annotation,\%gene_data);
72 }
73
74 }
758286µs close($fh);
# spent 286µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:close, avg 36µs/call
76
778170µs $self->_genes_annotation(\@genes_annotation);
# spent 170µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 21µs/call
78 return \%contigs_to_ids;
79}
80
81sub _build_overlapping_hypothetical_protein_ids
82
# spent 470ms (314+156) within Bio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids which was called 8 times, avg 58.7ms/call: # 8 times (314ms+156ms) by Bio::Roary::ContigsToGeneIDsFromGFF::overlapping_hypothetical_protein_ids at line 12 of (eval 25)[Eval/Closure.pm:125], avg 58.7ms/call
{
834045.2ms my ($self) = @_;
84818µs $self->contig_to_ids;
# spent 18µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::contig_to_ids, avg 2µs/call
85
86 my %overlapping_protein_ids;
87
88 #Checking to see if the current feature is hypotheitical and if the next one has annotation
89134613131ms4000851.7ms for(my $i = 0; $i< (@{$self->_genes_annotation} -1) ; $i++ )
# spent 51.7ms making 40008 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
90 {
914000049.5ms my $current_feature = $self->_genes_annotation->[$i];
# spent 49.5ms making 40000 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
924000049.5ms my $next_feature = $self->_genes_annotation->[$i+1];
# spent 49.5ms making 40000 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_genes_annotation, avg 1µs/call
93
94 next if($current_feature->{database_annotation_exists} == 1);
9546773.74ms next unless($current_feature->{product} =~ /hypothetical/i);
# spent 3.74ms making 4677 calls to Bio::Roary::ContigsToGeneIDsFromGFF::CORE:match, avg 800ns/call
96 next unless($next_feature->{database_annotation_exists} == 1);
97
98 my $start_coord = $current_feature->{start} ;
99 my $end_coord = $current_feature->{end} ;
100 my $comparison_start_coord =$next_feature->{start} ;
101 my $comparison_end_coord =$next_feature->{end} ;
1023942.25ms if($comparison_start_coord < $end_coord && $comparison_end_coord > $start_coord )
103 {
1041971.03ms my $percent_overlap = $self->_percent_overlap($start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord);
# spent 1.03ms making 197 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap, avg 5µs/call
105197517µs if($percent_overlap >= $self->_min_nucleotide_overlap_percentage)
# spent 517µs making 197 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_min_nucleotide_overlap_percentage, avg 3µs/call
106 {
107 $overlapping_protein_ids{$current_feature->{id_name}}++;
108 }
109 }
110 }
111
112 return \%overlapping_protein_ids;
113}
114
115sub _percent_overlap
116
# spent 1.03ms within Bio::Roary::ContigsToGeneIDsFromGFF::_percent_overlap which was called 197 times, avg 5µs/call: # 197 times (1.03ms+0s) by Bio::Roary::ContigsToGeneIDsFromGFF::_build_overlapping_hypothetical_protein_ids at line 104, avg 5µs/call
{
11713791.03ms my ($self, $start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord) = @_;
118 my $size_of_hypothetical_gene = $end_coord - $start_coord;
119
120 my $lower_bound = $start_coord;
121 if($comparison_start_coord > $start_coord)
122 {
123 $lower_bound = $comparison_start_coord;
124 }
125 my $upper_bound = $end_coord;
126 if($comparison_end_coord < $end_coord )
127 {
128 $upper_bound = $comparison_end_coord;
129 }
130 return (($upper_bound-$lower_bound)*100) / $size_of_hypothetical_gene;
131}
132
133
134
# spent 117µs (66+51) within Bio::Roary::ContigsToGeneIDsFromGFF::_build__awk_filter which was called 8 times, avg 15µs/call: # 8 times (66µs+51µs) by Bio::Roary::ContigsToGeneIDsFromGFF::_awk_filter at line 12 of (eval 25)[Eval/Closure.pm:125], avg 15µs/call
sub _build__awk_filter {
1351667µs my ($self) = @_;
136 return
137850µs 'awk \'BEGIN {FS="\t"};{ if ($3 ~/'
# spent 50µs making 8 calls to Bio::Roary::ContigsToGeneIDsFromGFF::_tags_to_filter, avg 6µs/call
138 . $self->_tags_to_filter
139 . '/) print $1"\t"$4"\t"$5"\t"$9;}\' ';
140}
141
142247µs2190µs
# spent 100µs (10+90) within Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142 which was called: # once (10µs+90µs) by Bio::Roary::OrderGenes::BEGIN@21 at line 142
no Moose;
# spent 100µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::BEGIN@142 # spent 90µs making 1 call to Moose::unimport
14316µs26.68ms__PACKAGE__->meta->make_immutable;
# spent 6.67ms making 1 call to Class::MOP::Class::make_immutable # spent 15µs making 1 call to Bio::Roary::ContigsToGeneIDsFromGFF::meta
144
145136µs1;