Filename | /Users/ap13/perl5/lib/perl5/Text/CSV_PP.pm |
Statements | Executed 3140358 statements in 4.52s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
18216 | 1 | 1 | 3.60s | 4.56s | _combine | Text::CSV_PP::
673202 | 4 | 1 | 516ms | 516ms | CORE:regcomp (opcode) | Text::CSV_PP::
18216 | 2 | 1 | 264ms | 7.02s | |
673200 | 2 | 1 | 224ms | 224ms | CORE:subst (opcode) | Text::CSV_PP::
673206 | 4 | 1 | 218ms | 218ms | CORE:match (opcode) | Text::CSV_PP::
18216 | 1 | 1 | 51.2ms | 51.2ms | _string | Text::CSV_PP::
1 | 1 | 1 | 568µs | 568µs | BEGIN@970 | Text::CSV_PP::
1 | 1 | 1 | 128µs | 176µs | new | Text::CSV_PP::
1 | 1 | 1 | 27µs | 30µs | BEGIN@328 | Text::CSV_PP::
1 | 1 | 1 | 23µs | 23µs | BEGIN@111 | Text::CSV_PP::
1 | 1 | 1 | 23µs | 82µs | BEGIN@1088 | Text::CSV::ErrorDiag::
1 | 1 | 1 | 14µs | 24µs | _check_sanity | Text::CSV_PP::
1 | 1 | 1 | 14µs | 39µs | BEGIN@1087 | Text::CSV::ErrorDiag::
1 | 1 | 1 | 13µs | 13µs | types | Text::CSV_PP::
1 | 1 | 1 | 13µs | 25µs | BEGIN@10 | Text::CSV_PP::
1 | 1 | 1 | 10µs | 33µs | BEGIN@11 | Text::CSV_PP::
2 | 2 | 1 | 9µs | 9µs | CORE:qr (opcode) | Text::CSV_PP::
1 | 1 | 1 | 7µs | 21µs | BEGIN@114 | Text::CSV_PP::
1 | 1 | 1 | 7µs | 16µs | BEGIN@119 | Text::CSV_PP::
1 | 1 | 1 | 3µs | 3µs | BEGIN@12 | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | numeric | Text::CSV::ErrorDiag::
0 | 0 | 0 | 0s | 0s | stringify | Text::CSV::ErrorDiag::
0 | 0 | 0 | 0s | 0s | IV | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | NV | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | PV | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | SetDiag | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | __ANON__[:115] | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | __ANON__[:116] | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | __ANON__[:120] | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | __ANON__[:121] | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | __ANON__[:138] | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _check_type | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _fields | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _is_valid_utf8 | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _make_regexp_split_column | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _make_regexp_split_column_allow_sp | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _make_regexp_split_column_allow_unqout_esc | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _parse | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _return_getline_result | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | _set_error_diag | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | allow_whitespace | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | auto_diag | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | bind_columns | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | column_names | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | diag_verbose | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | eof | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | eol | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | error_diag | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | error_input | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | escape_char | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | getline | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | getline_all | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | getline_hr | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | getline_hr_all | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | is_binary | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | is_missing | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | is_quoted | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | meta_info | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | print_hr | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | quote_char | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | record_number | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | sep_char | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | status | Text::CSV_PP::
0 | 0 | 0 | 0s | 0s | version | Text::CSV_PP::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Text::CSV_PP; | ||||
2 | |||||
3 | ################################################################################ | ||||
4 | # | ||||
5 | # Text::CSV_PP - Text::CSV_XS compatible pure-Perl module | ||||
6 | # | ||||
7 | ################################################################################ | ||||
8 | 1 | 43µs | require 5.005; | ||
9 | |||||
10 | 2 | 25µs | 2 | 38µs | # spent 25µs (13+12) within Text::CSV_PP::BEGIN@10 which was called:
# once (13µs+12µs) by Text::CSV::_load_pp at line 10 # spent 25µs making 1 call to Text::CSV_PP::BEGIN@10
# spent 12µs making 1 call to strict::import |
11 | 2 | 22µs | 2 | 56µs | # spent 33µs (10+23) within Text::CSV_PP::BEGIN@11 which was called:
# once (10µs+23µs) by Text::CSV::_load_pp at line 11 # spent 33µs making 1 call to Text::CSV_PP::BEGIN@11
# spent 23µs making 1 call to vars::import |
12 | 2 | 225µs | 1 | 3µs | # spent 3µs within Text::CSV_PP::BEGIN@12 which was called:
# once (3µs+0s) by Text::CSV::_load_pp at line 12 # spent 3µs making 1 call to Text::CSV_PP::BEGIN@12 |
13 | |||||
14 | 1 | 900ns | $VERSION = '1.31'; | ||
15 | |||||
16 | sub PV { 0 } | ||||
17 | sub IV { 1 } | ||||
18 | sub NV { 2 } | ||||
19 | |||||
20 | sub IS_QUOTED () { 0x0001; } | ||||
21 | sub IS_BINARY () { 0x0002; } | ||||
22 | sub IS_MISSING () { 0x0010; } | ||||
23 | |||||
24 | |||||
25 | 1 | 27µs | my $ERRORS = { | ||
26 | # PP and XS | ||||
27 | 1000 => "INI - constructor failed", | ||||
28 | 1001 => "sep_char is equal to quote_char or escape_char", | ||||
29 | 1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB", | ||||
30 | 1003 => "INI - \r or \n in main attr not allowed", | ||||
31 | |||||
32 | 2010 => "ECR - QUO char inside quotes followed by CR not part of EOL", | ||||
33 | 2011 => "ECR - Characters after end of quoted field", | ||||
34 | |||||
35 | 2021 => "EIQ - NL char inside quotes, binary off", | ||||
36 | 2022 => "EIQ - CR char inside quotes, binary off", | ||||
37 | 2025 => "EIQ - Loose unescaped escape", | ||||
38 | 2026 => "EIQ - Binary character inside quoted field, binary off", | ||||
39 | 2027 => "EIQ - Quoted field not terminated", | ||||
40 | |||||
41 | 2030 => "EIF - NL char inside unquoted verbatim, binary off", | ||||
42 | 2031 => "EIF - CR char is first char of field, not part of EOL", | ||||
43 | 2032 => "EIF - CR char inside unquoted, not part of EOL", | ||||
44 | 2034 => "EIF - Loose unescaped quote", | ||||
45 | 2037 => "EIF - Binary character in unquoted field, binary off", | ||||
46 | |||||
47 | 2110 => "ECB - Binary character in Combine, binary off", | ||||
48 | |||||
49 | 2200 => "EIO - print to IO failed. See errno", | ||||
50 | |||||
51 | # PP Only Error | ||||
52 | 4002 => "EIQ - Unescaped ESC in quoted field", | ||||
53 | 4003 => "EIF - ESC CR", | ||||
54 | 4004 => "EUF - ", | ||||
55 | |||||
56 | # Hash-Ref errors | ||||
57 | 3001 => "EHR - Unsupported syntax for column_names ()", | ||||
58 | 3002 => "EHR - getline_hr () called before column_names ()", | ||||
59 | 3003 => "EHR - bind_columns () and column_names () fields count mismatch", | ||||
60 | 3004 => "EHR - bind_columns () only accepts refs to scalars", | ||||
61 | 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields", | ||||
62 | 3007 => "EHR - bind_columns needs refs to writable scalars", | ||||
63 | 3008 => "EHR - unexpected error in bound fields", | ||||
64 | 3009 => "EHR - print_hr () called before column_names ()", | ||||
65 | 3010 => "EHR - print_hr () called with invalid arguments", | ||||
66 | |||||
67 | 0 => "", | ||||
68 | }; | ||||
69 | |||||
70 | |||||
71 | 1 | 300ns | my $last_new_error = ''; | ||
72 | 1 | 100ns | my $last_new_err_num; | ||
73 | |||||
74 | 1 | 13µs | my %def_attr = ( | ||
75 | quote_char => '"', | ||||
76 | escape_char => '"', | ||||
77 | sep_char => ',', | ||||
78 | eol => defined $\ ? $\ : '', | ||||
79 | always_quote => 0, | ||||
80 | binary => 0, | ||||
81 | keep_meta_info => 0, | ||||
82 | allow_loose_quotes => 0, | ||||
83 | allow_loose_escapes => 0, | ||||
84 | allow_unquoted_escape => 0, | ||||
85 | allow_whitespace => 0, | ||||
86 | chomp_verbatim => 0, | ||||
87 | types => undef, | ||||
88 | verbatim => 0, | ||||
89 | blank_is_undef => 0, | ||||
90 | empty_is_undef => 0, | ||||
91 | auto_diag => 0, | ||||
92 | quote_space => 1, | ||||
93 | quote_null => 1, | ||||
94 | quote_binary => 1, | ||||
95 | diag_verbose => 0, | ||||
96 | |||||
97 | _EOF => 0, | ||||
98 | _RECNO => 0, | ||||
99 | _STATUS => undef, | ||||
100 | _FIELDS => undef, | ||||
101 | _FFLAGS => undef, | ||||
102 | _STRING => undef, | ||||
103 | _ERROR_INPUT => undef, | ||||
104 | _ERROR_DIAG => undef, | ||||
105 | |||||
106 | _COLUMN_NAMES => undef, | ||||
107 | _BOUND_COLUMNS => undef, | ||||
108 | ); | ||||
109 | |||||
110 | |||||
111 | # spent 23µs within Text::CSV_PP::BEGIN@111 which was called:
# once (23µs+0s) by Text::CSV::_load_pp at line 141 | ||||
112 | 3 | 21µs | if ( $] < 5.006 ) { | ||
113 | $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy | ||||
114 | 2 | 68µs | 2 | 34µs | # spent 21µs (7+13) within Text::CSV_PP::BEGIN@114 which was called:
# once (7µs+13µs) by Text::CSV::_load_pp at line 114 # spent 21µs making 1 call to Text::CSV_PP::BEGIN@114
# spent 13µs making 1 call to strict::unimport |
115 | *{"utf8::is_utf8"} = sub { 0; }; | ||||
116 | *{"utf8::decode"} = sub { }; | ||||
117 | } | ||||
118 | elsif ( $] < 5.008 ) { | ||||
119 | 2 | 133µs | 2 | 25µs | # spent 16µs (7+9) within Text::CSV_PP::BEGIN@119 which was called:
# once (7µs+9µs) by Text::CSV::_load_pp at line 119 # spent 16µs making 1 call to Text::CSV_PP::BEGIN@119
# spent 9µs making 1 call to strict::unimport |
120 | *{"utf8::is_utf8"} = sub { 0; }; | ||||
121 | *{"utf8::decode"} = sub { }; | ||||
122 | } | ||||
123 | elsif ( !defined &utf8::is_utf8 ) { | ||||
124 | require Encode; | ||||
125 | *utf8::is_utf8 = *Encode::is_utf8; | ||||
126 | } | ||||
127 | |||||
128 | eval q| require Scalar::Util |; # spent 3µs executing statements in string eval | ||||
129 | if ( $@ ) { | ||||
130 | eval q| require B |; | ||||
131 | if ( $@ ) { | ||||
132 | Carp::croak $@; | ||||
133 | } | ||||
134 | else { | ||||
135 | *Scalar::Util::readonly = sub (\$) { | ||||
136 | my $b = B::svref_2object( $_[0] ); | ||||
137 | $b->FLAGS & 0x00800000; # SVf_READONLY? | ||||
138 | } | ||||
139 | } | ||||
140 | } | ||||
141 | 1 | 1.33ms | 1 | 23µs | } # spent 23µs making 1 call to Text::CSV_PP::BEGIN@111 |
142 | |||||
143 | ################################################################################ | ||||
144 | # version | ||||
145 | ################################################################################ | ||||
146 | sub version { | ||||
147 | return $VERSION; | ||||
148 | } | ||||
149 | ################################################################################ | ||||
150 | # new | ||||
151 | ################################################################################ | ||||
152 | |||||
153 | # spent 24µs (14+10) within Text::CSV_PP::_check_sanity which was called:
# once (14µs+10µs) by Text::CSV_PP::new at line 195 | ||||
154 | 4 | 6µs | my ( $self ) = @_; | ||
155 | |||||
156 | for ( qw( sep_char quote_char escape_char ) ) { | ||||
157 | 3 | 18µs | 3 | 10µs | ( exists $self->{$_} && defined $self->{$_} && $self->{$_} =~ m/[\r\n]/ ) and return 1003; # spent 10µs making 3 calls to Text::CSV_PP::CORE:match, avg 3µs/call |
158 | } | ||||
159 | |||||
160 | if ( $self->{allow_whitespace} and | ||||
161 | ( defined $self->{quote_char} && $self->{quote_char} =~ m/^[ \t]$/ ) | ||||
162 | || | ||||
163 | ( defined $self->{escape_char} && $self->{escape_char} =~ m/^[ \t]$/ ) | ||||
164 | ) { | ||||
165 | #$last_new_error = "INI - allow_whitespace with escape_char or quote_char SP or TAB"; | ||||
166 | #$last_new_err_num = 1002; | ||||
167 | return 1002; | ||||
168 | } | ||||
169 | |||||
170 | return 0; | ||||
171 | } | ||||
172 | |||||
173 | |||||
174 | # spent 176µs (128+47) within Text::CSV_PP::new which was called:
# once (128µs+47µs) by Text::CSV::new at line 84 of Text/CSV.pm | ||||
175 | 15 | 105µs | my $proto = shift; | ||
176 | my $attr = @_ > 0 ? shift : {}; | ||||
177 | |||||
178 | $last_new_error = 'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);'; | ||||
179 | $last_new_err_num = 1000; | ||||
180 | |||||
181 | return unless ( defined $attr and ref($attr) eq 'HASH' ); | ||||
182 | |||||
183 | my $class = ref($proto) || $proto or return; | ||||
184 | my $self = { %def_attr }; | ||||
185 | |||||
186 | for my $prop (keys %$attr) { # if invalid attr, return undef | ||||
187 | 6 | 23µs | 3 | 10µs | unless ($prop =~ /^[a-z]/ && exists $def_attr{$prop}) { # spent 10µs making 3 calls to Text::CSV_PP::CORE:match, avg 3µs/call |
188 | $last_new_error = "INI - Unknown attribute '$prop'"; | ||||
189 | error_diag() if $attr->{ auto_diag }; | ||||
190 | return; | ||||
191 | } | ||||
192 | $self->{$prop} = $attr->{$prop}; | ||||
193 | } | ||||
194 | |||||
195 | 1 | 24µs | my $ec = _check_sanity( $self ); # spent 24µs making 1 call to Text::CSV_PP::_check_sanity | ||
196 | |||||
197 | if ( $ec ) { | ||||
198 | $last_new_error = $ERRORS->{ $ec }; | ||||
199 | $last_new_err_num = $ec; | ||||
200 | return; | ||||
201 | #$class->SetDiag ($ec); | ||||
202 | } | ||||
203 | |||||
204 | $last_new_error = ''; | ||||
205 | |||||
206 | defined $\ and $self->{eol} = $\; | ||||
207 | |||||
208 | bless $self, $class; | ||||
209 | |||||
210 | 1 | 13µs | $self->types( $self->{types} ) if( exists( $self->{types} ) ); # spent 13µs making 1 call to Text::CSV_PP::types | ||
211 | |||||
212 | return $self; | ||||
213 | } | ||||
214 | ################################################################################ | ||||
215 | # status | ||||
216 | ################################################################################ | ||||
217 | sub status { | ||||
218 | $_[0]->{_STATUS}; | ||||
219 | } | ||||
220 | ################################################################################ | ||||
221 | # error_input | ||||
222 | ################################################################################ | ||||
223 | sub error_input { | ||||
224 | $_[0]->{_ERROR_INPUT}; | ||||
225 | } | ||||
226 | ################################################################################ | ||||
227 | # error_diag | ||||
228 | ################################################################################ | ||||
229 | sub error_diag { | ||||
230 | my $self = shift; | ||||
231 | my @diag = (0, $last_new_error, 0); | ||||
232 | |||||
233 | unless ($self and ref $self) { # Class method or direct call | ||||
234 | $last_new_error and $diag[0] = defined $last_new_err_num ? $last_new_err_num : 1000; | ||||
235 | } | ||||
236 | elsif ( $self->isa (__PACKAGE__) and defined $self->{_ERROR_DIAG} ) { | ||||
237 | @diag = ( 0 + $self->{_ERROR_DIAG}, $ERRORS->{ $self->{_ERROR_DIAG} } ); | ||||
238 | exists $self->{_ERROR_POS} and $diag[2] = 1 + $self->{_ERROR_POS}; | ||||
239 | } | ||||
240 | |||||
241 | my $context = wantarray; | ||||
242 | |||||
243 | my $diagobj = bless \@diag, 'Text::CSV::ErrorDiag'; | ||||
244 | |||||
245 | unless (defined $context) { # Void context | ||||
246 | if ( $diag[0] ) { | ||||
247 | my $msg = "# CSV_PP ERROR: " . $diag[0] . " - $diag[1]\n"; | ||||
248 | ref $self ? ( $self->{auto_diag} > 1 ? die $msg : warn $msg ) | ||||
249 | : warn $msg; | ||||
250 | } | ||||
251 | return; | ||||
252 | } | ||||
253 | |||||
254 | return $context ? @diag : $diagobj; | ||||
255 | } | ||||
256 | |||||
257 | sub record_number { | ||||
258 | return shift->{_RECNO}; | ||||
259 | } | ||||
260 | |||||
261 | ################################################################################ | ||||
262 | # string | ||||
263 | ################################################################################ | ||||
264 | 1 | 2µs | *string = \&_string; | ||
265 | # spent 51.2ms within Text::CSV_PP::_string which was called 18216 times, avg 3µs/call:
# 18216 times (51.2ms+0s) by Text::CSV_PP::print at line 662, avg 3µs/call | ||||
266 | 18216 | 60.0ms | defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef; | ||
267 | } | ||||
268 | ################################################################################ | ||||
269 | # fields | ||||
270 | ################################################################################ | ||||
271 | 1 | 500ns | *fields = \&_fields; | ||
272 | sub _fields { | ||||
273 | ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef; | ||||
274 | } | ||||
275 | ################################################################################ | ||||
276 | # combine | ||||
277 | ################################################################################ | ||||
278 | 1 | 500ns | *combine = \&_combine; | ||
279 | # spent 4.56s (3.60+958ms) within Text::CSV_PP::_combine which was called 18216 times, avg 250µs/call:
# 18216 times (3.60s+958ms) by Text::CSV_PP::print at line 658, avg 250µs/call | ||||
280 | 291456 | 398ms | my ($self, @part) = @_; | ||
281 | |||||
282 | # at least one argument was given for "combining"... | ||||
283 | return $self->{_STATUS} = 0 unless(@part); | ||||
284 | |||||
285 | $self->{_FIELDS} = \@part; | ||||
286 | $self->{_ERROR_INPUT} = undef; | ||||
287 | $self->{_STRING} = ''; | ||||
288 | $self->{_STATUS} = 0; | ||||
289 | |||||
290 | my ($always_quote, $binary, $quot, $sep, $esc, $empty_is_undef, $quote_space, $quote_null, $quote_binary ) | ||||
291 | = @{$self}{qw/always_quote binary quote_char sep_char escape_char empty_is_undef quote_space quote_null quote_binary/}; | ||||
292 | |||||
293 | if(!defined $quot){ $quot = ''; } | ||||
294 | |||||
295 | return $self->_set_error_diag(1001) if ($sep eq $esc or $sep eq $quot); | ||||
296 | |||||
297 | 2 | 32µs | my $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/; # spent 25µs making 1 call to Text::CSV_PP::CORE:regcomp
# spent 7µs making 1 call to Text::CSV_PP::CORE:qr | ||
298 | 2 | 13µs | my $re_sp = $self->{_re_comb_sp}->{$sep}->{$quote_space} ||= ( $quote_space ? qr/[\s\Q$sep\E]/ : qr/[\Q$sep\E]/ ); # spent 11µs making 1 call to Text::CSV_PP::CORE:regcomp
# spent 2µs making 1 call to Text::CSV_PP::CORE:qr | ||
299 | |||||
300 | my $must_be_quoted; | ||||
301 | for my $column (@part) { | ||||
302 | |||||
303 | 2384712 | 2.83s | unless (defined $column) { | ||
304 | $column = ''; | ||||
305 | next; | ||||
306 | } | ||||
307 | elsif ( !$binary ) { | ||||
308 | $binary = 1 if utf8::is_utf8 $column; | ||||
309 | } | ||||
310 | |||||
311 | if (!$binary and $column =~ /[^\x09\x20-\x7E]/) { | ||||
312 | # an argument contained an invalid character... | ||||
313 | $self->{_ERROR_INPUT} = $column; | ||||
314 | $self->_set_error_diag(2110); | ||||
315 | return $self->{_STATUS}; | ||||
316 | } | ||||
317 | |||||
318 | $must_be_quoted = 0; | ||||
319 | |||||
320 | 673200 | 434ms | if($quot ne '' and $column =~ s/$re_esc/$esc$1/g){ # spent 292ms making 336600 calls to Text::CSV_PP::CORE:regcomp, avg 869ns/call
# spent 141ms making 336600 calls to Text::CSV_PP::CORE:subst, avg 420ns/call | ||
321 | $must_be_quoted++; | ||||
322 | } | ||||
323 | 673200 | 365ms | if($column =~ /$re_sp/){ # spent 224ms making 336600 calls to Text::CSV_PP::CORE:regcomp, avg 664ns/call
# spent 141ms making 336600 calls to Text::CSV_PP::CORE:match, avg 419ns/call | ||
324 | $must_be_quoted++; | ||||
325 | } | ||||
326 | |||||
327 | 336600 | 991ms | if( $binary and $quote_null ){ | ||
328 | 2 | 3.66ms | 2 | 34µs | # spent 30µs (27+4) within Text::CSV_PP::BEGIN@328 which was called:
# once (27µs+4µs) by Text::CSV::_load_pp at line 328 # spent 30µs making 1 call to Text::CSV_PP::BEGIN@328
# spent 4µs making 1 call to bytes::import |
329 | 673200 | 159ms | $must_be_quoted++ if ( $column =~ s/\0/${esc}0/g || ($quote_binary && $column =~ /[\x00-\x1f\x7f-\xa0]/) ); # spent 82.3ms making 336600 calls to Text::CSV_PP::CORE:subst, avg 245ns/call
# spent 76.9ms making 336600 calls to Text::CSV_PP::CORE:match, avg 229ns/call | ||
330 | } | ||||
331 | |||||
332 | if($always_quote or $must_be_quoted){ | ||||
333 | $column = $quot . $column . $quot; | ||||
334 | } | ||||
335 | |||||
336 | } | ||||
337 | |||||
338 | $self->{_STRING} = \do { join($sep, @part) . ( defined $self->{eol} ? $self->{eol} : '' ) }; | ||||
339 | $self->{_STATUS} = 1; | ||||
340 | |||||
341 | return $self->{_STATUS}; | ||||
342 | } | ||||
343 | ################################################################################ | ||||
344 | # parse | ||||
345 | ################################################################################ | ||||
346 | 1 | 2µs | my %allow_eol = ("\r" => 1, "\r\n" => 1, "\n" => 1, "" => 1); | ||
347 | |||||
348 | 1 | 400ns | *parse = \&_parse; | ||
349 | |||||
350 | sub _parse { | ||||
351 | my ($self, $line) = @_; | ||||
352 | |||||
353 | @{$self}{qw/_STRING _FIELDS _STATUS _ERROR_INPUT/} = ( \do{ defined $line ? "$line" : undef }, undef, 0, $line ); | ||||
354 | |||||
355 | return 0 if(!defined $line); | ||||
356 | |||||
357 | my ($binary, $quot, $sep, $esc, $types, $keep_meta_info, $allow_whitespace, $eol, $blank_is_undef, $empty_is_undef, $unquot_esc) | ||||
358 | = @{$self}{ | ||||
359 | qw/binary quote_char sep_char escape_char types keep_meta_info allow_whitespace eol blank_is_undef empty_is_undef allow_unquoted_escape/ | ||||
360 | }; | ||||
361 | |||||
362 | $sep = ',' unless (defined $sep); | ||||
363 | $esc = "\0" unless (defined $esc); | ||||
364 | $quot = "\0" unless (defined $quot); | ||||
365 | |||||
366 | my $quot_is_null = $quot eq "\0"; # in this case, any fields are not interpreted as quoted data. | ||||
367 | |||||
368 | return $self->_set_error_diag(1001) if (($sep eq $esc or $sep eq $quot) and $sep ne "\0"); | ||||
369 | |||||
370 | my $meta_flag = $keep_meta_info ? [] : undef; | ||||
371 | my $re_split = $self->{_re_split}->{$quot}->{$esc}->{$sep} ||= _make_regexp_split_column($esc, $quot, $sep); | ||||
372 | my $re_quoted = $self->{_re_quoted}->{$quot} ||= qr/^\Q$quot\E(.*)\Q$quot\E$/s; | ||||
373 | my $re_in_quot_esp1 = $self->{_re_in_quot_esp1}->{$esc} ||= qr/\Q$esc\E(.)/; | ||||
374 | my $re_in_quot_esp2 = $self->{_re_in_quot_esp2}->{$quot}->{$esc} ||= qr/[\Q$quot$esc$sep\E0]/; | ||||
375 | my $re_quot_char = $self->{_re_quot_char}->{$quot} ||= qr/\Q$quot\E/; | ||||
376 | my $re_esc = $self->{_re_esc}->{$quot}->{$esc} ||= qr/\Q$esc\E(\Q$quot\E|\Q$esc\E|\Q$sep\E|0)/; | ||||
377 | my $re_invalid_quot = $self->{_re_invalid_quot}->{$quot}->{$esc} ||= qr/^$re_quot_char|[^\Q$re_esc\E]$re_quot_char/; | ||||
378 | |||||
379 | if ($allow_whitespace) { | ||||
380 | $re_split = $self->{_re_split_allow_sp}->{$quot}->{$esc}->{$sep} | ||||
381 | ||= _make_regexp_split_column_allow_sp($esc, $quot, $sep); | ||||
382 | } | ||||
383 | if ($unquot_esc) { | ||||
384 | $re_split = $self->{_re_split_allow_unqout_esc}->{$quot}->{$esc}->{$sep} | ||||
385 | ||= _make_regexp_split_column_allow_unqout_esc($esc, $quot, $sep); | ||||
386 | } | ||||
387 | |||||
388 | my $palatable = 1; | ||||
389 | my @part = (); | ||||
390 | |||||
391 | my $i = 0; | ||||
392 | my $flag; | ||||
393 | |||||
394 | if (defined $eol and $eol eq "\r") { | ||||
395 | $line =~ s/[\r ]*\r[ ]*$//; | ||||
396 | } | ||||
397 | |||||
398 | if ($self->{verbatim}) { | ||||
399 | $line .= $sep; | ||||
400 | } | ||||
401 | else { | ||||
402 | if (defined $eol and !$allow_eol{$eol}) { | ||||
403 | $line .= $sep; | ||||
404 | } | ||||
405 | else { | ||||
406 | $line =~ s/(?:\x0D\x0A|\x0A)?$|(?:\x0D\x0A|\x0A)[ ]*$/$sep/; | ||||
407 | } | ||||
408 | } | ||||
409 | |||||
410 | my $pos = 0; | ||||
411 | |||||
412 | my $utf8 = 1 if utf8::is_utf8( $line ); # if UTF8 marked, flag on. | ||||
413 | |||||
414 | for my $col ( $line =~ /$re_split/g ) { | ||||
415 | |||||
416 | if ($keep_meta_info) { | ||||
417 | $flag = 0x0000; | ||||
418 | $flag |= IS_BINARY if ($col =~ /[^\x09\x20-\x7E]/); | ||||
419 | } | ||||
420 | |||||
421 | $pos += length $col; | ||||
422 | |||||
423 | if ( ( !$binary and !$utf8 ) and $col =~ /[^\x09\x20-\x7E]/) { # Binary character, binary off | ||||
424 | if ( not $quot_is_null and $col =~ $re_quoted ) { | ||||
425 | $self->_set_error_diag( | ||||
426 | $col =~ /\n([^\n]*)/ ? (2021, $pos - 1 - length $1) | ||||
427 | : $col =~ /\r([^\r]*)/ ? (2022, $pos - 1 - length $1) | ||||
428 | : (2026, $pos -2) # Binary character inside quoted field, binary off | ||||
429 | ); | ||||
430 | } | ||||
431 | else { | ||||
432 | $self->_set_error_diag( | ||||
433 | $col =~ /\Q$quot\E(.*)\Q$quot\E\r$/ ? (2010, $pos - 2) | ||||
434 | : $col =~ /\n/ ? (2030, $pos - length $col) | ||||
435 | : $col =~ /^\r/ ? (2031, $pos - length $col) | ||||
436 | : $col =~ /\r([^\r]*)/ ? (2032, $pos - 1 - length $1) | ||||
437 | : (2037, $pos - length $col) # Binary character in unquoted field, binary off | ||||
438 | ); | ||||
439 | } | ||||
440 | $palatable = 0; | ||||
441 | last; | ||||
442 | } | ||||
443 | |||||
444 | if ( ($utf8 and !$binary) and $col =~ /\n|\0/ ) { # \n still needs binary (Text::CSV_XS 0.51 compat) | ||||
445 | $self->_set_error_diag(2021, $pos); | ||||
446 | $palatable = 0; | ||||
447 | last; | ||||
448 | } | ||||
449 | |||||
450 | if ( not $quot_is_null and $col =~ $re_quoted ) { | ||||
451 | $flag |= IS_QUOTED if ($keep_meta_info); | ||||
452 | $col = $1; | ||||
453 | |||||
454 | my $flag_in_quot_esp; | ||||
455 | while ( $col =~ /$re_in_quot_esp1/g ) { | ||||
456 | my $str = $1; | ||||
457 | $flag_in_quot_esp = 1; | ||||
458 | |||||
459 | if ($str !~ $re_in_quot_esp2) { | ||||
460 | |||||
461 | unless ($self->{allow_loose_escapes}) { | ||||
462 | $self->_set_error_diag( 2025, $pos - 2 ); # Needless ESC in quoted field | ||||
463 | $palatable = 0; | ||||
464 | last; | ||||
465 | } | ||||
466 | |||||
467 | unless ($self->{allow_loose_quotes}) { | ||||
468 | $col =~ s/\Q$esc\E(.)/$1/g; | ||||
469 | } | ||||
470 | } | ||||
471 | |||||
472 | } | ||||
473 | |||||
474 | last unless ( $palatable ); | ||||
475 | |||||
476 | unless ( $flag_in_quot_esp ) { | ||||
477 | if ($col =~ /(?<!\Q$esc\E)\Q$esc\E/) { | ||||
478 | $self->_set_error_diag( 4002, $pos - 1 ); # No escaped ESC in quoted field | ||||
479 | $palatable = 0; | ||||
480 | last; | ||||
481 | } | ||||
482 | } | ||||
483 | |||||
484 | $col =~ s{$re_esc}{$1 eq '0' ? "\0" : $1}eg; | ||||
485 | |||||
486 | if ( $empty_is_undef and length($col) == 0 ) { | ||||
487 | $col = undef; | ||||
488 | } | ||||
489 | |||||
490 | if ($types and $types->[$i]) { # IV or NV | ||||
491 | _check_type(\$col, $types->[$i]); | ||||
492 | } | ||||
493 | |||||
494 | } | ||||
495 | |||||
496 | # quoted but invalid | ||||
497 | |||||
498 | elsif ( not $quot_is_null and $col =~ $re_invalid_quot ) { | ||||
499 | |||||
500 | unless ($self->{allow_loose_quotes} and $col =~ /$re_quot_char/) { | ||||
501 | $self->_set_error_diag( | ||||
502 | $col =~ /^\Q$quot\E(.*)\Q$quot\E.$/s ? (2011, $pos - 2) | ||||
503 | : $col =~ /^$re_quot_char/ ? (2027, $pos - 1) | ||||
504 | : (2034, $pos - length $col) # Loose unescaped quote | ||||
505 | ); | ||||
506 | $palatable = 0; | ||||
507 | last; | ||||
508 | } | ||||
509 | |||||
510 | } | ||||
511 | |||||
512 | elsif ($types and $types->[$i]) { # IV or NV | ||||
513 | _check_type(\$col, $types->[$i]); | ||||
514 | } | ||||
515 | |||||
516 | # unquoted | ||||
517 | |||||
518 | else { | ||||
519 | |||||
520 | if (!$self->{verbatim} and $col =~ /\r\n|\n/) { | ||||
521 | $col =~ s/(?:\r\n|\n).*$//sm; | ||||
522 | } | ||||
523 | |||||
524 | if ($col =~ /\Q$esc\E\r$/) { # for t/15_flags : test 165 'ESC CR' at line 203 | ||||
525 | $self->_set_error_diag( 4003, $pos ); | ||||
526 | $palatable = 0; | ||||
527 | last; | ||||
528 | } | ||||
529 | |||||
530 | if ($col =~ /.\Q$esc\E$/) { # for t/65_allow : test 53-54 parse('foo\') at line 62, 65 | ||||
531 | $self->_set_error_diag( 4004, $pos ); | ||||
532 | $palatable = 0; | ||||
533 | last; | ||||
534 | } | ||||
535 | |||||
536 | if ( $col eq '' and $blank_is_undef ) { | ||||
537 | $col = undef; | ||||
538 | } | ||||
539 | |||||
540 | if ( $empty_is_undef and length($col) == 0 ) { | ||||
541 | $col = undef; | ||||
542 | } | ||||
543 | |||||
544 | if ( $unquot_esc ) { | ||||
545 | $col =~ s/\Q$esc\E(.)/$1/g; | ||||
546 | } | ||||
547 | |||||
548 | } | ||||
549 | |||||
550 | utf8::encode($col) if $utf8; | ||||
551 | if ( defined $col && _is_valid_utf8($col) ) { | ||||
552 | utf8::decode($col); | ||||
553 | } | ||||
554 | |||||
555 | push @part,$col; | ||||
556 | push @{$meta_flag}, $flag if ($keep_meta_info); | ||||
557 | $self->{ _RECNO }++; | ||||
558 | |||||
559 | $i++; | ||||
560 | } | ||||
561 | |||||
562 | if ($palatable and ! @part) { | ||||
563 | $palatable = 0; | ||||
564 | } | ||||
565 | |||||
566 | if ($palatable) { | ||||
567 | $self->{_ERROR_INPUT} = undef; | ||||
568 | $self->{_FIELDS} = \@part; | ||||
569 | } | ||||
570 | |||||
571 | $self->{_FFLAGS} = $keep_meta_info ? $meta_flag : []; | ||||
572 | |||||
573 | return $self->{_STATUS} = $palatable; | ||||
574 | } | ||||
575 | |||||
576 | |||||
577 | sub _make_regexp_split_column { | ||||
578 | my ($esc, $quot, $sep) = @_; | ||||
579 | |||||
580 | if ( $quot eq '' ) { | ||||
581 | return qr/([^\Q$sep\E]*)\Q$sep\E/s; | ||||
582 | } | ||||
583 | |||||
584 | return qr/( | ||||
585 | \Q$quot\E | ||||
586 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* | ||||
587 | \Q$quot\E | ||||
588 | | # or | ||||
589 | \Q$quot\E | ||||
590 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
591 | \Q$quot\E | ||||
592 | | # or | ||||
593 | [^\Q$sep\E]* | ||||
594 | ) | ||||
595 | \Q$sep\E | ||||
596 | /xs; | ||||
597 | } | ||||
598 | |||||
599 | |||||
600 | sub _make_regexp_split_column_allow_unqout_esc { | ||||
601 | my ($esc, $quot, $sep) = @_; | ||||
602 | |||||
603 | return qr/( | ||||
604 | \Q$quot\E | ||||
605 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* | ||||
606 | \Q$quot\E | ||||
607 | | # or | ||||
608 | \Q$quot\E | ||||
609 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
610 | \Q$quot\E | ||||
611 | | # or | ||||
612 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
613 | | # or | ||||
614 | [^\Q$sep\E]* | ||||
615 | ) | ||||
616 | \Q$sep\E | ||||
617 | /xs; | ||||
618 | } | ||||
619 | |||||
620 | |||||
621 | sub _make_regexp_split_column_allow_sp { | ||||
622 | my ($esc, $quot, $sep) = @_; | ||||
623 | |||||
624 | # if separator is space or tab, don't count that separator | ||||
625 | # as whitespace --- patched by Mike O'Sullivan | ||||
626 | my $ws = $sep eq ' ' ? '[\x09]' | ||||
627 | : $sep eq "\t" ? '[\x20]' | ||||
628 | : '[\x20\x09]' | ||||
629 | ; | ||||
630 | |||||
631 | if ( $quot eq '' ) { | ||||
632 | return qr/$ws*([^\Q$sep\E]?)$ws*\Q$sep\E$ws*/s; | ||||
633 | } | ||||
634 | |||||
635 | qr/$ws* | ||||
636 | ( | ||||
637 | \Q$quot\E | ||||
638 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc$sep\E0][^\Q$quot$esc\E]*)* | ||||
639 | \Q$quot\E | ||||
640 | | # or | ||||
641 | [^\Q$sep\E]*? | ||||
642 | ) | ||||
643 | $ws*\Q$sep\E$ws* | ||||
644 | /xs; | ||||
645 | } | ||||
646 | ################################################################################ | ||||
647 | |||||
648 | ################################################################################ | ||||
649 | # spent 7.02s (264ms+6.75) within Text::CSV_PP::print which was called 18216 times, avg 385µs/call:
# 18215 times (264ms+6.75s) by Bio::Roary::GroupStatistics::create_spreadsheet at line 197 of lib/Bio/Roary/GroupStatistics.pm, avg 385µs/call
# once (65µs+391µs) by Bio::Roary::GroupStatistics::create_spreadsheet at line 186 of lib/Bio/Roary/GroupStatistics.pm | ||||
650 | 109296 | 226ms | my ($self, $io, $cols) = @_; | ||
651 | |||||
652 | require IO::Handle; | ||||
653 | |||||
654 | if(ref($cols) ne 'ARRAY'){ | ||||
655 | Carp::croak("Expected fields to be an array ref"); | ||||
656 | } | ||||
657 | |||||
658 | 18216 | 4.56s | $self->_combine(@$cols) or return ''; # spent 4.56s making 18216 calls to Text::CSV_PP::_combine, avg 250µs/call | ||
659 | |||||
660 | local $\ = ''; | ||||
661 | |||||
662 | 36432 | 2.19s | $io->print( $self->_string ) or $self->_set_error_diag(2200); # spent 2.14s making 18216 calls to IO::Handle::print, avg 118µs/call
# spent 51.2ms making 18216 calls to Text::CSV_PP::_string, avg 3µs/call | ||
663 | } | ||||
664 | |||||
665 | sub print_hr { | ||||
666 | my ($self, $io, $hr) = @_; | ||||
667 | $self->{_COLUMN_NAMES} or $self->_set_error_diag(3009); | ||||
668 | ref $hr eq "HASH" or $self->_set_error_diag(3010); | ||||
669 | $self->print ($io, [ map { $hr->{$_} } $self->column_names ]); | ||||
670 | } | ||||
671 | ################################################################################ | ||||
672 | # getline | ||||
673 | ################################################################################ | ||||
674 | sub getline { | ||||
675 | my ($self, $io) = @_; | ||||
676 | |||||
677 | require IO::Handle; | ||||
678 | |||||
679 | $self->{_EOF} = eof($io) ? 1 : ''; | ||||
680 | |||||
681 | my $quot = $self->{quote_char}; | ||||
682 | my $sep = $self->{sep_char}; | ||||
683 | my $re = defined $quot ? qr/(?:\Q$quot\E)/ : undef; | ||||
684 | |||||
685 | my $eol = $self->{eol}; | ||||
686 | |||||
687 | local $/ = $eol if ( defined $eol and $eol ne '' ); | ||||
688 | |||||
689 | my $line = $io->getline(); | ||||
690 | |||||
691 | # AUTO DETECTION EOL CR | ||||
692 | if ( defined $line and defined $eol and $eol eq '' and $line =~ /[^\r]\r[^\r\n]/ and eof ) { | ||||
693 | $self->{_AUTO_DETECT_CR} = 1; | ||||
694 | $self->{eol} = "\r"; | ||||
695 | seek( $io, 0, 0 ); # restart | ||||
696 | return $self->getline( $io ); | ||||
697 | } | ||||
698 | |||||
699 | if ( $re and defined $line ) { | ||||
700 | LOOP: { | ||||
701 | my $is_continued = scalar(my @list = $line =~ /$re/g) % 2; # if line is valid, quot is even | ||||
702 | |||||
703 | if ( $self->{allow_loose_quotes } ) { | ||||
704 | $is_continued = 0; | ||||
705 | } | ||||
706 | elsif ( $line =~ /${re}0/ ) { # null suspicion case | ||||
707 | $is_continued = $line =~ qr/ | ||||
708 | ^ | ||||
709 | ( | ||||
710 | (?: | ||||
711 | $re # $quote | ||||
712 | (?: | ||||
713 | $re$re # escaped $quote | ||||
714 | | ${re}0 # or escaped zero | ||||
715 | | [^$quot] # or exceptions of $quote | ||||
716 | )* | ||||
717 | $re # $quote | ||||
718 | [^0$quot] # non zero or $quote | ||||
719 | ) | ||||
720 | | | ||||
721 | (?:[^$quot]*) # exceptions of $quote | ||||
722 | )+ | ||||
723 | $ | ||||
724 | /x ? 0 : 1; | ||||
725 | } | ||||
726 | |||||
727 | if ( $is_continued and !eof($io) ) { | ||||
728 | $line .= $io->getline(); | ||||
729 | goto LOOP; | ||||
730 | } | ||||
731 | } | ||||
732 | } | ||||
733 | |||||
734 | $line =~ s/\Q$eol\E$// if ( defined $line and defined $eol and $eol ne '' ); | ||||
735 | |||||
736 | $self->_parse($line); | ||||
737 | |||||
738 | return $self->_return_getline_result(); | ||||
739 | } | ||||
740 | |||||
741 | |||||
742 | sub _return_getline_result { | ||||
743 | |||||
744 | if ( eof ) { | ||||
745 | $_[0]->{_AUTO_DETECT_CR} = 0; | ||||
746 | } | ||||
747 | |||||
748 | return unless $_[0]->{_STATUS}; | ||||
749 | |||||
750 | return [ $_[0]->_fields() ] unless $_[0]->{_BOUND_COLUMNS}; | ||||
751 | |||||
752 | my @vals = $_[0]->_fields(); | ||||
753 | my ( $max, $count ) = ( scalar @vals, 0 ); | ||||
754 | |||||
755 | if ( @{ $_[0]->{_BOUND_COLUMNS} } < $max ) { | ||||
756 | $_[0]->_set_error_diag(3006); | ||||
757 | return; | ||||
758 | } | ||||
759 | |||||
760 | for ( my $i = 0; $i < $max; $i++ ) { | ||||
761 | my $bind = $_[0]->{_BOUND_COLUMNS}->[ $i ]; | ||||
762 | if ( Scalar::Util::readonly( $$bind ) ) { | ||||
763 | $_[0]->_set_error_diag(3008); | ||||
764 | return; | ||||
765 | } | ||||
766 | $$bind = $vals[ $i ]; | ||||
767 | } | ||||
768 | |||||
769 | return []; | ||||
770 | } | ||||
771 | ################################################################################ | ||||
772 | # getline_all | ||||
773 | ################################################################################ | ||||
774 | sub getline_all { | ||||
775 | my ( $self, $io, $offset, $len ) = @_; | ||||
776 | my @list; | ||||
777 | my $tail; | ||||
778 | my $n = 0; | ||||
779 | |||||
780 | $offset ||= 0; | ||||
781 | |||||
782 | if ( $offset < 0 ) { | ||||
783 | $tail = -$offset; | ||||
784 | $offset = 0; | ||||
785 | } | ||||
786 | |||||
787 | while ( my $row = $self->getline($io) ) { | ||||
788 | next if $offset && $offset-- > 0; # skip | ||||
789 | last if defined $len && !$tail && $n >= $len; # exceedes limit size | ||||
790 | push @list, $row; | ||||
791 | ++$n; | ||||
792 | if ( $tail && $n > $tail ) { | ||||
793 | shift @list; | ||||
794 | } | ||||
795 | } | ||||
796 | |||||
797 | if ( $tail && defined $len && $n > $len ) { | ||||
798 | @list = splice( @list, 0, $len); | ||||
799 | } | ||||
800 | |||||
801 | return \@list; | ||||
802 | } | ||||
803 | ################################################################################ | ||||
804 | # getline_hr | ||||
805 | ################################################################################ | ||||
806 | sub getline_hr { | ||||
807 | my ( $self, $io) = @_; | ||||
808 | my %hr; | ||||
809 | |||||
810 | unless ( $self->{_COLUMN_NAMES} ) { | ||||
811 | $self->SetDiag( 3002 ); | ||||
812 | } | ||||
813 | |||||
814 | my $fr = $self->getline( $io ) or return undef; | ||||
815 | |||||
816 | if ( ref $self->{_FFLAGS} ) { | ||||
817 | $self->{_FFLAGS}[$_] = IS_MISSING for ($#{$fr} + 1) .. $#{$self->{_COLUMN_NAMES}}; | ||||
818 | } | ||||
819 | |||||
820 | @hr{ @{ $self->{_COLUMN_NAMES} } } = @$fr; | ||||
821 | |||||
822 | \%hr; | ||||
823 | } | ||||
824 | ################################################################################ | ||||
825 | # getline_hr_all | ||||
826 | ################################################################################ | ||||
827 | sub getline_hr_all { | ||||
828 | my ( $self, $io, @args ) = @_; | ||||
829 | my %hr; | ||||
830 | |||||
831 | unless ( $self->{_COLUMN_NAMES} ) { | ||||
832 | $self->SetDiag( 3002 ); | ||||
833 | } | ||||
834 | |||||
835 | my @cn = @{$self->{_COLUMN_NAMES}}; | ||||
836 | |||||
837 | return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ]; | ||||
838 | } | ||||
839 | ################################################################################ | ||||
840 | # column_names | ||||
841 | ################################################################################ | ||||
842 | sub column_names { | ||||
843 | my ( $self, @columns ) = @_; | ||||
844 | |||||
845 | @columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : undef; | ||||
846 | @columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef; | ||||
847 | |||||
848 | if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) { | ||||
849 | @columns = @{ $columns[0] }; | ||||
850 | } | ||||
851 | elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) { | ||||
852 | $self->SetDiag( 3001 ); | ||||
853 | } | ||||
854 | |||||
855 | if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) { | ||||
856 | $self->SetDiag( 3003 ); | ||||
857 | } | ||||
858 | |||||
859 | $self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ]; | ||||
860 | @{ $self->{_COLUMN_NAMES} }; | ||||
861 | } | ||||
862 | ################################################################################ | ||||
863 | # bind_columns | ||||
864 | ################################################################################ | ||||
865 | sub bind_columns { | ||||
866 | my ( $self, @refs ) = @_; | ||||
867 | |||||
868 | @refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef; | ||||
869 | @refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef; | ||||
870 | |||||
871 | if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) { | ||||
872 | $self->SetDiag( 3003 ); | ||||
873 | } | ||||
874 | |||||
875 | if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep? | ||||
876 | $self->SetDiag( 3004 ); | ||||
877 | } | ||||
878 | |||||
879 | $self->{_is_bound} = scalar @refs; #pack("C", scalar @refs); | ||||
880 | $self->{_BOUND_COLUMNS} = [ @refs ]; | ||||
881 | @refs; | ||||
882 | } | ||||
883 | ################################################################################ | ||||
884 | # eof | ||||
885 | ################################################################################ | ||||
886 | sub eof { | ||||
887 | $_[0]->{_EOF}; | ||||
888 | } | ||||
889 | ################################################################################ | ||||
890 | # type | ||||
891 | ################################################################################ | ||||
892 | # spent 13µs within Text::CSV_PP::types which was called:
# once (13µs+0s) by Text::CSV_PP::new at line 210 | ||||
893 | 2 | 9µs | my $self = shift; | ||
894 | |||||
895 | 1 | 700ns | if (@_) { | ||
896 | 3 | 2µs | if (my $types = shift) { | ||
897 | $self->{'_types'} = join("", map{ chr($_) } @$types); | ||||
898 | $self->{'types'} = $types; | ||||
899 | } | ||||
900 | else { | ||||
901 | delete $self->{'types'}; | ||||
902 | delete $self->{'_types'}; | ||||
903 | undef; | ||||
904 | } | ||||
905 | } | ||||
906 | else { | ||||
907 | $self->{'types'}; | ||||
908 | } | ||||
909 | } | ||||
910 | ################################################################################ | ||||
911 | sub meta_info { | ||||
912 | $_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef; | ||||
913 | } | ||||
914 | |||||
915 | sub is_quoted { | ||||
916 | return unless (defined $_[0]->{_FFLAGS}); | ||||
917 | return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } ); | ||||
918 | |||||
919 | $_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0; | ||||
920 | } | ||||
921 | |||||
922 | sub is_binary { | ||||
923 | return unless (defined $_[0]->{_FFLAGS}); | ||||
924 | return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } ); | ||||
925 | $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0; | ||||
926 | } | ||||
927 | |||||
928 | sub is_missing { | ||||
929 | my ($self, $idx, $val) = @_; | ||||
930 | ref $self->{_FFLAGS} && | ||||
931 | $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return; | ||||
932 | $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0; | ||||
933 | } | ||||
934 | ################################################################################ | ||||
935 | # _check_type | ||||
936 | # take an arg as scalar referrence. | ||||
937 | # if not numeric, make the value 0. otherwise INTEGERized. | ||||
938 | ################################################################################ | ||||
939 | sub _check_type { | ||||
940 | my ($col_ref, $type) = @_; | ||||
941 | unless ($$col_ref =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) { | ||||
942 | Carp::carp sprintf("Argument \"%s\" isn't numeric in subroutine entry",$$col_ref); | ||||
943 | $$col_ref = 0; | ||||
944 | } | ||||
945 | elsif ($type == NV) { | ||||
946 | $$col_ref = sprintf("%G",$$col_ref); | ||||
947 | } | ||||
948 | else { | ||||
949 | $$col_ref = sprintf("%d",$$col_ref); | ||||
950 | } | ||||
951 | } | ||||
952 | ################################################################################ | ||||
953 | # _set_error_diag | ||||
954 | ################################################################################ | ||||
955 | sub _set_error_diag { | ||||
956 | my ( $self, $error, $pos ) = @_; | ||||
957 | |||||
958 | $self->{_ERROR_DIAG} = $error; | ||||
959 | |||||
960 | if (defined $pos) { | ||||
961 | $_[0]->{_ERROR_POS} = $pos; | ||||
962 | } | ||||
963 | |||||
964 | $self->error_diag() if ( $error and $self->{auto_diag} ); | ||||
965 | |||||
966 | return; | ||||
967 | } | ||||
968 | ################################################################################ | ||||
969 | |||||
970 | # spent 568µs within Text::CSV_PP::BEGIN@970 which was called:
# once (568µs+0s) by Text::CSV::_load_pp at line 981 | ||||
971 | 1 | 10µs | for my $method ( qw/always_quote binary keep_meta_info allow_loose_quotes allow_loose_escapes | ||
972 | verbatim blank_is_undef empty_is_undef quote_space quote_null | ||||
973 | quote_binary allow_unquoted_escape/ ) { | ||||
974 | 12 | 554µs | eval qq| | ||
975 | sub $method { | ||||
976 | \$_[0]->{$method} = defined \$_[1] ? \$_[1] : 0 if (\@_ > 1); | ||||
977 | \$_[0]->{$method}; | ||||
978 | } | ||||
979 | |; | ||||
980 | } | ||||
981 | 1 | 640µs | 1 | 568µs | } # spent 568µs making 1 call to Text::CSV_PP::BEGIN@970 |
982 | |||||
- - | |||||
985 | sub sep_char { | ||||
986 | my $self = shift; | ||||
987 | if ( @_ ) { | ||||
988 | $self->{sep_char} = $_[0]; | ||||
989 | my $ec = _check_sanity( $self ); | ||||
990 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
991 | } | ||||
992 | $self->{sep_char}; | ||||
993 | } | ||||
994 | |||||
995 | |||||
996 | sub quote_char { | ||||
997 | my $self = shift; | ||||
998 | if ( @_ ) { | ||||
999 | $self->{quote_char} = $_[0]; | ||||
1000 | my $ec = _check_sanity( $self ); | ||||
1001 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
1002 | } | ||||
1003 | $self->{quote_char}; | ||||
1004 | } | ||||
1005 | |||||
1006 | |||||
1007 | sub escape_char { | ||||
1008 | my $self = shift; | ||||
1009 | if ( @_ ) { | ||||
1010 | $self->{escape_char} = $_[0]; | ||||
1011 | my $ec = _check_sanity( $self ); | ||||
1012 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
1013 | } | ||||
1014 | $self->{escape_char}; | ||||
1015 | } | ||||
1016 | |||||
1017 | |||||
1018 | sub allow_whitespace { | ||||
1019 | my $self = shift; | ||||
1020 | if ( @_ ) { | ||||
1021 | my $aw = shift; | ||||
1022 | $aw and | ||||
1023 | (defined $self->{quote_char} && $self->{quote_char} =~ m/^[ \t]$/) || | ||||
1024 | (defined $self->{escape_char} && $self->{escape_char} =~ m/^[ \t]$/) | ||||
1025 | and Carp::croak ($self->SetDiag (1002)); | ||||
1026 | $self->{allow_whitespace} = $aw; | ||||
1027 | } | ||||
1028 | $self->{allow_whitespace}; | ||||
1029 | } | ||||
1030 | |||||
1031 | |||||
1032 | sub eol { | ||||
1033 | $_[0]->{eol} = defined $_[1] ? $_[1] : '' if ( @_ > 1 ); | ||||
1034 | $_[0]->{eol}; | ||||
1035 | } | ||||
1036 | |||||
1037 | |||||
1038 | sub SetDiag { | ||||
1039 | if ( defined $_[1] and $_[1] == 0 ) { | ||||
1040 | $_[0]->{_ERROR_DIAG} = undef; | ||||
1041 | $last_new_error = ''; | ||||
1042 | return; | ||||
1043 | } | ||||
1044 | |||||
1045 | $_[0]->_set_error_diag( $_[1] ); | ||||
1046 | Carp::croak( $_[0]->error_diag . '' ); | ||||
1047 | } | ||||
1048 | |||||
1049 | sub auto_diag { | ||||
1050 | my $self = shift; | ||||
1051 | if (@_) { | ||||
1052 | my $v = shift; | ||||
1053 | !defined $v || $v eq "" and $v = 0; | ||||
1054 | $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false | ||||
1055 | $self->{auto_diag} = $v; | ||||
1056 | } | ||||
1057 | $self->{auto_diag}; | ||||
1058 | } | ||||
1059 | |||||
1060 | sub diag_verbose { | ||||
1061 | my $self = shift; | ||||
1062 | if (@_) { | ||||
1063 | my $v = shift; | ||||
1064 | !defined $v || $v eq "" and $v = 0; | ||||
1065 | $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false | ||||
1066 | $self->{diag_verbose} = $v; | ||||
1067 | } | ||||
1068 | $self->{diag_verbose}; | ||||
1069 | } | ||||
1070 | |||||
1071 | sub _is_valid_utf8 { | ||||
1072 | return ( $_[0] =~ /^(?: | ||||
1073 | [\x00-\x7F] | ||||
1074 | |[\xC2-\xDF][\x80-\xBF] | ||||
1075 | |[\xE0][\xA0-\xBF][\x80-\xBF] | ||||
1076 | |[\xE1-\xEC][\x80-\xBF][\x80-\xBF] | ||||
1077 | |[\xED][\x80-\x9F][\x80-\xBF] | ||||
1078 | |[\xEE-\xEF][\x80-\xBF][\x80-\xBF] | ||||
1079 | |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | ||||
1080 | |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | ||||
1081 | |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | ||||
1082 | )+$/x ) ? 1 : 0; | ||||
1083 | } | ||||
1084 | ################################################################################ | ||||
1085 | package Text::CSV::ErrorDiag; | ||||
1086 | |||||
1087 | 2 | 72µs | 2 | 64µs | # spent 39µs (14+25) within Text::CSV::ErrorDiag::BEGIN@1087 which was called:
# once (14µs+25µs) by Text::CSV::_load_pp at line 1087 # spent 39µs making 1 call to Text::CSV::ErrorDiag::BEGIN@1087
# spent 25µs making 1 call to strict::import |
1088 | # spent 82µs (23+59) within Text::CSV::ErrorDiag::BEGIN@1088 which was called:
# once (23µs+59µs) by Text::CSV::_load_pp at line 1094 | ||||
1089 | 2 | 18µs | '""' => \&stringify, | ||
1090 | '+' => \&numeric, | ||||
1091 | '-' => \&numeric, | ||||
1092 | '*' => \&numeric, | ||||
1093 | '/' => \&numeric, | ||||
1094 | 1 | 184µs | 2 | 141µs | ); # spent 82µs making 1 call to Text::CSV::ErrorDiag::BEGIN@1088
# spent 59µs making 1 call to overload::import |
1095 | |||||
1096 | |||||
1097 | sub numeric { | ||||
1098 | my ($left, $right) = @_; | ||||
1099 | return ref $left ? $left->[0] : $right->[0]; | ||||
1100 | } | ||||
1101 | |||||
1102 | |||||
1103 | sub stringify { | ||||
1104 | $_[0]->[1]; | ||||
1105 | } | ||||
1106 | ################################################################################ | ||||
1107 | 1 | 44µs | 1; | ||
1108 | __END__ | ||||
# spent 218ms within Text::CSV_PP::CORE:match which was called 673206 times, avg 324ns/call:
# 336600 times (141ms+0s) by Text::CSV_PP::_combine at line 323, avg 419ns/call
# 336600 times (76.9ms+0s) by Text::CSV_PP::_combine at line 329, avg 229ns/call
# 3 times (10µs+0s) by Text::CSV_PP::_check_sanity at line 157, avg 3µs/call
# 3 times (10µs+0s) by Text::CSV_PP::new at line 187, avg 3µs/call | |||||
sub Text::CSV_PP::CORE:qr; # opcode | |||||
# spent 516ms within Text::CSV_PP::CORE:regcomp which was called 673202 times, avg 767ns/call:
# 336600 times (292ms+0s) by Text::CSV_PP::_combine at line 320, avg 869ns/call
# 336600 times (224ms+0s) by Text::CSV_PP::_combine at line 323, avg 664ns/call
# once (25µs+0s) by Text::CSV_PP::_combine at line 297
# once (11µs+0s) by Text::CSV_PP::_combine at line 298 | |||||
sub Text::CSV_PP::CORE:subst; # opcode |