← Index
NYTProf Performance Profile   « block view • line view • sub view »
For bin/pan_genome_post_analysis
  Run on Fri Mar 27 11:43:32 2015
Reported on Fri Mar 27 11:46:13 2015

Filename/Users/ap13/pathogens/Roary/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm
StatementsExecuted 24 statements in 1.42ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1111.74ms8.45msBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@18Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18
11148µs4.57msBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@17Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17
11116µs71µsBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@19Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19
11115µs196µsBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@148Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148
11114µs51µsBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@20Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20
1118µs8µsBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@21Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21
1116µs6µsBio::Roary::Output::GroupsMultifastaNucleotide::::BEGIN@22Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_bed_output_filenameBio::Roary::Output::GroupsMultifastaNucleotide::_bed_output_filename
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_build__input_seqioBio::Roary::Output::GroupsMultifastaNucleotide::_build__input_seqio
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_build__output_filenameBio::Roary::Output::GroupsMultifastaNucleotide::_build__output_filename
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_build_fasta_fileBio::Roary::Output::GroupsMultifastaNucleotide::_build_fasta_file
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_create_bed_file_from_gffBio::Roary::Output::GroupsMultifastaNucleotide::_create_bed_file_from_gff
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_create_nucleotide_fasta_file_from_gffBio::Roary::Output::GroupsMultifastaNucleotide::_create_nucleotide_fasta_file_from_gff
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_extract_nucleotide_regionsBio::Roary::Output::GroupsMultifastaNucleotide::_extract_nucleotide_regions
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_extracted_nucleotide_fasta_file_from_bed_filenameBio::Roary::Output::GroupsMultifastaNucleotide::_extracted_nucleotide_fasta_file_from_bed_filename
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_group_file_nameBio::Roary::Output::GroupsMultifastaNucleotide::_group_file_name
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_group_seq_io_objBio::Roary::Output::GroupsMultifastaNucleotide::_group_seq_io_obj
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::_nucleotide_fasta_file_from_gff_filenameBio::Roary::Output::GroupsMultifastaNucleotide::_nucleotide_fasta_file_from_gff_filename
0000s0sBio::Roary::Output::GroupsMultifastaNucleotide::::populate_filesBio::Roary::Output::GroupsMultifastaNucleotide::populate_files
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Bio::Roary::Output::GroupsMultifastaNucleotide;
2
3# ABSTRACT: Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
4
5=head1 SYNOPSIS
6
7Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
8 use Bio::Roary::Output::GroupsMultifastas;
9
10 my $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
11 group_names => ['aaa','bbb'],
12 );
13 $obj->populate_files();
14
15=cut
16
17244µs29.08ms
# spent 4.57ms (48µs+4.52) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17 which was called: # once (48µs+4.52ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 17
use Moose;
# spent 4.57ms making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17 # spent 4.52ms making 1 call to Moose::import
182277µs18.45ms
# spent 8.45ms (1.74+6.71) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 which was called: # once (1.74ms+6.71ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 18
use Bio::SeqIO;
19241µs2126µs
# spent 71µs (16+55) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19 which was called: # once (16µs+55µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 19
use File::Path qw(make_path);
# spent 71µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19 # spent 55µs making 1 call to Exporter::import
20229µs288µs
# spent 51µs (14+37) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20 which was called: # once (14µs+37µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 20
use File::Basename;
# spent 51µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20 # spent 37µs making 1 call to Exporter::import
21222µs18µs
# spent 8µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 which was called: # once (8µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 21
use Bio::Roary::Exceptions;
222878µs16µs
# spent 6µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 which was called: # once (6µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 22
use Bio::Roary::AnalyseGroups;
23
2413µs12.45mshas 'gff_file' => ( is => 'ro', isa => 'Str', required => 1 );
# spent 2.45ms making 1 call to Moose::has
25# Not implemented
2611µs11.70mshas 'group_names' => ( is => 'ro', isa => 'ArrayRef', required => 0 );
# spent 1.70ms making 1 call to Moose::has
2712µs11.53mshas 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
# spent 1.53ms making 1 call to Moose::has
2812µs11.45mshas 'annotate_groups' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
# spent 1.45ms making 1 call to Moose::has
2912µs11.54mshas 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
# spent 1.54ms making 1 call to Moose::has
30
3112µs11.96mshas 'fasta_file' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_fasta_file' );
# spent 1.96ms making 1 call to Moose::has
3212µs12.46mshas '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
# spent 2.46ms making 1 call to Moose::has
33
3412µs12.04mshas '_output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_filename' );
# spent 2.04ms making 1 call to Moose::has
35
36
37sub _build__output_filename
38{
39 my ($self) = @_;
40 my ( $filename, $directories, $suffix ) = fileparse($self->gff_file);
41 return join('/',($self->output_directory, $filename.'.tmp_nuc_sequences.fa' ));
42}
43
44sub _build__input_seqio {
45 my ($self) = @_;
46 return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
47}
48
49sub populate_files {
50 my ($self) = @_;
51 while ( my $input_seq = $self->_input_seqio->next_seq() )
52 {
53 if ( $self->annotate_groups->_ids_to_groups->{$input_seq->display_id} )
54 {
55 my $current_group = $self->annotate_groups->_ids_to_groups->{$input_seq->display_id};
56
57 my $number_of_genes = @{$self->annotate_groups->_groups_to_id_names->{$current_group}};
58 # Theres no need to align a single sequence
59 next if($self->output_multifasta_files == 0 && $number_of_genes == 1);
60
61 my $output_seq = $self->_group_seq_io_obj($current_group,$number_of_genes);
62 $output_seq->write_seq($input_seq);
63 }
64 }
65
66 unlink($self->fasta_file);
67 1;
68}
69
70sub _group_file_name
71{
72 my ($self,$group_name,$num_group_genes) = @_;
73 my $annotated_group_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$group_name};
74 $annotated_group_name =~ s!\W!_!gi;
75 my $filename = $annotated_group_name.'.fa';
76 my $group_file_name = join('/',($self->output_directory, $filename ));
77 return $group_file_name;
78}
79
80sub _group_seq_io_obj
81{
82 my ($self,$group_name,$num_group_genes) = @_;
83 my $filename = $self->_group_file_name($group_name,$num_group_genes);
84 return Bio::SeqIO->new( -file => ">>".$filename, -format => 'Fasta' );
85}
86
87
88sub _extracted_nucleotide_fasta_file_from_bed_filename {
89 my ($self) = @_;
90 return join( '.', ( $self->_output_filename, 'intermediate.extracted.fa' ) );
91}
92
93sub _create_bed_file_from_gff {
94 my ($self) = @_;
95 my $cmd =
96 'sed -n \'/##gff-version 3/,/##FASTA/p\' '
97 . $self->gff_file
98 . ' | grep -v \'^#\' | awk \'{print $1"\t"($4-1)"\t"($5)"\t"$9"\t1\t"$7}\' | sed \'s/ID=//\' | sed \'s/;[^\t]*\t/\t/g\' > '
99 . $self->_bed_output_filename;
100 system($cmd);
101}
102
103sub _create_nucleotide_fasta_file_from_gff {
104 my ($self) = @_;
105 my $cmd =
106 'sed -n \'/##FASTA/,//p\' '
107 . $self->gff_file
108 . ' | grep -v \'##FASTA\' > '
109 . $self->_nucleotide_fasta_file_from_gff_filename;
110 system($cmd);
111}
112
113sub _nucleotide_fasta_file_from_gff_filename {
114 my ($self) = @_;
115 return join( '.', ( $self->_output_filename, 'intermediate.fa' ) );
116}
117
118sub _bed_output_filename {
119 my ($self) = @_;
120 return join( '.', ( $self->_output_filename, 'intermediate.bed' ) );
121}
122
123sub _extract_nucleotide_regions {
124 my ($self) = @_;
125
126 $self->_create_nucleotide_fasta_file_from_gff;
127 $self->_create_bed_file_from_gff;
128
129 my $cmd =
130 'bedtools getfasta -s -fi '
131 . $self->_nucleotide_fasta_file_from_gff_filename
132 . ' -bed '
133 . $self->_bed_output_filename . ' -fo '
134 . $self->_extracted_nucleotide_fasta_file_from_bed_filename
135 . ' -name > /dev/null 2>&1';
136 system($cmd);
137 unlink( $self->_nucleotide_fasta_file_from_gff_filename );
138 unlink( $self->_bed_output_filename );
139 unlink( $self->_nucleotide_fasta_file_from_gff_filename . '.fai' );
140 return $self->_extracted_nucleotide_fasta_file_from_bed_filename;
141}
142
143sub _build_fasta_file {
144 my ($self) = @_;
145 return $self->_extract_nucleotide_regions;
146}
147
148266µs2377µs
# spent 196µs (15+181) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148 which was called: # once (15µs+181µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 148
no Moose;
# spent 196µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148 # spent 181µs making 1 call to Moose::unimport
14918µs25.84ms__PACKAGE__->meta->make_immutable;
# spent 5.82ms making 1 call to Class::MOP::Class::make_immutable # spent 20µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::meta
150
151142µs1;
152