← Index
Performance Profile   « block view • line view • sub view »
For t/test-parsing
  Run on Sun Nov 14 09:49:57 2010
Reported on Sun Nov 14 09:50:10 2010

File /usr/local/lib/perl/5.10.0/Encode/Alias.pm
Statements Executed 380
Total Time 0.0028801 seconds
Subroutines — ordered by exclusive time
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
44441623µs623µsEncode::Alias::::define_aliasEncode::Alias::define_alias
111175µs806µsEncode::Alias::::init_aliasesEncode::Alias::init_aliases
1118µs8µsEncode::Alias::::undef_aliasesEncode::Alias::undef_aliases
0000s0sEncode::Alias::::BEGINEncode::Alias::BEGIN
0000s0sEncode::Alias::::find_aliasEncode::Alias::find_alias
LineStmts.Exclusive
Time
Avg.Code
1package Encode::Alias;
2326µs9µsuse strict;
# spent 9µs making 1 call to strict::import
3333µs11µsuse warnings;
# spent 22µs making 1 call to warnings::import
4383µs28µsno warnings 'redefine';
# spent 20µs making 1 call to warnings::unimport
5319µs6µsour $VERSION = do { my @r = ( q$Revision: 2.12 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6sub DEBUG () { 0 }
7
831.87ms624µsuse base qw(Exporter);
# spent 66µs making 1 call to base::import
9
10# Public, encouraged API is exported by default
11
1211µs1µsour @EXPORT =
13 qw (
14 define_alias
15 find_alias
16);
17
181400ns400nsour @Alias; # ordered matching list
191300ns300nsour %Alias; # cached known aliases
20
21sub find_alias {
22 require Encode;
23 my $class = shift;
24 my $find = shift;
25 unless ( exists $Alias{$find} ) {
26 $Alias{$find} = undef; # Recursion guard
27 for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
28 my $alias = $Alias[$i];
29 my $val = $Alias[ $i + 1 ];
30 my $new;
31 if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
32 DEBUG and warn "eval $val";
33 $new = eval $val;
34 DEBUG and $@ and warn "$val, $@";
35 }
36 elsif ( ref($alias) eq 'CODE' ) {
37 DEBUG and warn "$alias", "->", "($find)";
38 $new = $alias->($find);
39 }
40 elsif ( lc($find) eq lc($alias) ) {
41 $new = $val;
42 }
43 if ( defined($new) ) {
44 next if $new eq $find; # avoid (direct) recursion on bugs
45 DEBUG and warn "$alias, $new";
46 my $enc =
47 ( ref($new) ) ? $new : Encode::find_encoding($new);
48 if ($enc) {
49 $Alias{$find} = $enc;
50 last;
51 }
52 }
53 }
54
55 # case insensitive search when canonical is not in all lowercase
56 # RT ticket #7835
57 unless ( $Alias{$find} ) {
58 my $lcfind = lc($find);
59 for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
60 {
61 $lcfind eq lc($name) or next;
62 $Alias{$find} = Encode::find_encoding($name);
63 DEBUG and warn "$find => $name";
64 }
65 }
66 }
67 if (DEBUG) {
68 my $name;
69 if ( my $e = $Alias{$find} ) {
70 $name = $e->name;
71 }
72 else {
73 $name = "";
74 }
75 warn "find_alias($class, $find)->name = $name";
76 }
77 return $Alias{$find};
78}
79
80
# spent 623µs within Encode::Alias::define_alias which was called 44 times, avg 14µs/call: # once (51µs+0s) by Encode::Alias::init_aliases at line 186 # once (29µs+0s) by Encode::Alias::init_aliases at line 145 # once (26µs+0s) by Encode::Alias::init_aliases at line 140 # once (17µs+0s) by Encode::Alias::init_aliases at line 239 # once (17µs+0s) by Encode::Alias::init_aliases at line 253 # once (17µs+0s) by Encode::Alias::init_aliases at line 243 # once (17µs+0s) by Encode::Alias::init_aliases at line 153 # once (17µs+0s) by Encode::Alias::init_aliases at line 222 # once (16µs+0s) by Encode::Alias::init_aliases at line 200 # once (16µs+0s) by Encode::Alias::init_aliases at line 135 # once (16µs+0s) by Encode::Alias::init_aliases at line 246 # once (15µs+0s) by Encode::Alias::init_aliases at line 138 # once (15µs+0s) by Encode::Alias::init_aliases at line 175 # once (14µs+0s) by Encode::Alias::init_aliases at line 179 # once (14µs+0s) by Encode::Alias::init_aliases at line 259 # once (14µs+0s) by Encode::Alias::init_aliases at line 248 # once (14µs+0s) by Encode::Alias::init_aliases at line 217 # once (13µs+0s) by Encode::Alias::init_aliases at line 154 # once (13µs+0s) by Encode::Alias::init_aliases at line 173 # once (13µs+0s) by Encode::Alias::init_aliases at line 233 # once (13µs+0s) by Encode::Alias::init_aliases at line 254 # once (13µs+0s) by Encode::Alias::init_aliases at line 262 # once (13µs+0s) by Encode::Alias::init_aliases at line 227 # once (13µs+0s) by Encode::Alias::init_aliases at line 255 # once (13µs+0s) by Encode::Alias::init_aliases at line 223 # once (13µs+0s) by Encode::Alias::init_aliases at line 235 # once (13µs+0s) by Encode::Alias::init_aliases at line 237 # once (13µs+0s) by Encode::Alias::init_aliases at line 251 # once (12µs+0s) by Encode::Alias::init_aliases at line 247 # once (12µs+0s) by Encode::Alias::init_aliases at line 209 # once (11µs+0s) by Encode::Alias::init_aliases at line 211 # once (11µs+0s) by Encode::Alias::init_aliases at line 168 # once (11µs+0s) by Encode::Alias::init_aliases at line 236 # once (10µs+0s) by Encode::Alias::init_aliases at line 157 # once (10µs+0s) by Encode::Alias::init_aliases at line 160 # once (10µs+0s) by Encode::Alias::init_aliases at line 139 # once (9µs+0s) by Encode::Alias::init_aliases at line 152 # once (9µs+0s) by Encode::Alias::init_aliases at line 242 # once (9µs+0s) by Encode::Alias::init_aliases at line 252 # once (9µs+0s) by Encode::Alias::init_aliases at line 195 # once (8µs+0s) by Encode::Alias::init_aliases at line 230 # once (8µs+0s) by Encode::Alias::init_aliases at line 234 # once (8µs+0s) by Encode::Alias::init_aliases at line 163 # once (8µs+0s) by Encode::Alias::init_aliases at line 238
sub define_alias {
814449µs1µs while (@_) {
82159222µs1µs my ( $alias, $name ) = splice( @_, 0, 2 );
83 unshift( @Alias, $alias => $name ); # newer one has precedence
8410681µs766ns if ( ref($alias) ) {
85
86 # clear %Alias cache to allow overrides
87 my @a = keys %Alias;
88 for my $k (@a) {
89 if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
90 DEBUG and warn "delete \$Alias\{$k\}";
91 delete $Alias{$k};
92 }
93 elsif ( ref($alias) eq 'CODE' ) {
94 DEBUG and warn "delete \$Alias\{$k\}";
95 delete $Alias{ $alias->($name) };
96 }
97 }
98 }
99 else {
100 DEBUG and warn "delete \$Alias\{$alias\}";
101 delete $Alias{$alias};
102 }
103 }
104}
105
106# Allow latin-1 style names as well
107# 0 1 2 3 4 5 6 7 8 9 10
10812µs2µsour @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
109
110# Allow winlatin1 style names as well
111113µs13µsour %Winlatin2cp = (
112 'latin1' => 1252,
113 'latin2' => 1250,
114 'cyrillic' => 1251,
115 'greek' => 1253,
116 'turkish' => 1254,
117 'hebrew' => 1255,
118 'arabic' => 1256,
119 'baltic' => 1257,
120 'vietnamese' => 1258,
121);
122
12319µs9µsinit_aliases();
# spent 806µs making 1 call to Encode::Alias::init_aliases
124
125
# spent 8µs within Encode::Alias::undef_aliases which was called # once (8µs+0s) by Encode::Alias::init_aliases at line 132
sub undef_aliases {
12622µs1µs @Alias = ();
127 %Alias = ();
128}
129
130
# spent 806µs (175+631) within Encode::Alias::init_aliases which was called # once (175µs+631µs) at line 123
sub init_aliases {
13126233µs9µs require Encode;
132 undef_aliases();
# spent 8µs making 1 call to Encode::Alias::undef_aliases
133
134 # Try all-lower-case version should all else fails
135 define_alias( qr/^(.*)$/ => '"\L$1"' );
# spent 16µs making 1 call to Encode::Alias::define_alias
136
137 # UTF/UCS stuff
138 define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
# spent 15µs making 1 call to Encode::Alias::define_alias
139 define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
# spent 10µs making 1 call to Encode::Alias::define_alias
140 define_alias(
# spent 26µs making 1 call to Encode::Alias::define_alias
141 qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
142 qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
143 qr/^iso-10646-1$/i => '"UCS-2BE"'
144 );
145 define_alias(
# spent 29µs making 1 call to Encode::Alias::define_alias
146 qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
147 qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
148 qr/^UTF-?(16|32)$/i => '"UTF-$1"',
149 );
150
151 # ASCII
152 define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
# spent 9µs making 1 call to Encode::Alias::define_alias
153 define_alias( 'C' => 'ascii' );
# spent 17µs making 1 call to Encode::Alias::define_alias
154 define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
155
156 # Allow variants of iso-8859-1 etc.
157 define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
# spent 10µs making 1 call to Encode::Alias::define_alias
158
159 # At least HP-UX has these.
160 define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
# spent 10µs making 1 call to Encode::Alias::define_alias
161
162 # More HP stuff.
163 define_alias(
# spent 8µs making 1 call to Encode::Alias::define_alias
164 qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
165 '"${1}8"' );
166
167 # The Official name of ASCII.
168 define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
# spent 11µs making 1 call to Encode::Alias::define_alias
169
170 # This is a font issue, not an encoding issue.
171 # (The currency symbol of the Latin 1 upper half
172 # has been redefined as the euro symbol.)
173 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
174
175 define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
# spent 15µs making 1 call to Encode::Alias::define_alias
176'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
177 );
178
179 define_alias(
# spent 14µs making 1 call to Encode::Alias::define_alias
180 qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
181 hebrew|arabic|baltic|vietnamese)$/ix =>
182 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
183 );
184
185 # Common names for non-latin preferred MIME names
186 define_alias(
# spent 51µs making 1 call to Encode::Alias::define_alias
187 'ascii' => 'US-ascii',
188 'cyrillic' => 'iso-8859-5',
189 'arabic' => 'iso-8859-6',
190 'greek' => 'iso-8859-7',
191 'hebrew' => 'iso-8859-8',
192 'thai' => 'iso-8859-11',
193 );
194 # RT #20781
195 define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
# spent 9µs making 1 call to Encode::Alias::define_alias
196
197 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
198 # And Microsoft has their own naming (again, surprisingly).
199 # And windows-* is registered in IANA!
200 define_alias(
# spent 16µs making 1 call to Encode::Alias::define_alias
201 qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
202
203 # Sometimes seen with a leading zero.
204 # define_alias( qr/\bcp037\b/i => '"cp37"');
205
206 # Mac Mappings
207 # predefined in *.ucm; unneeded
208 # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
209 define_alias( qr/^mac_(.*)$/i => '"mac$1"' );
# spent 12µs making 1 call to Encode::Alias::define_alias
210 # http://rt.cpan.org/Ticket/Display.html?id=36326
211 define_alias( qr/^macintosh$/i => '"MacRoman"' );
# spent 11µs making 1 call to Encode::Alias::define_alias
212
213 # Ououououou. gone. They are differente!
214 # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
215
216 # Standardize on the dashed versions.
217 define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
# spent 14µs making 1 call to Encode::Alias::define_alias
218
21921211µs10µs unless ($Encode::ON_EBCDIC) {
220
221 # for Encode::CN
222 define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
# spent 17µs making 1 call to Encode::Alias::define_alias
223 define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
224
225 # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
226 # CP936 doesn't have vendor-addon for GBK, so they're identical.
227 define_alias( qr/^gbk$/i => '"cp936"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
228
229 # This fixes gb2312 vs. euc-cn confusion, practically
230 define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
# spent 8µs making 1 call to Encode::Alias::define_alias
231
232 # for Encode::JP
233 define_alias( qr/\bjis$/i => '"7bit-jis"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
234 define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
# spent 8µs making 1 call to Encode::Alias::define_alias
235 define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
236 define_alias( qr/\bujis$/i => '"euc-jp"' );
# spent 11µs making 1 call to Encode::Alias::define_alias
237 define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
238 define_alias( qr/\bsjis$/i => '"shiftjis"' );
# spent 8µs making 1 call to Encode::Alias::define_alias
239 define_alias( qr/\bwindows-31j$/i => '"cp932"' );
# spent 17µs making 1 call to Encode::Alias::define_alias
240
241 # for Encode::KR
242 define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
# spent 9µs making 1 call to Encode::Alias::define_alias
243 define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
# spent 17µs making 1 call to Encode::Alias::define_alias
244
245 # This fixes ksc5601 vs. euc-kr confusion, practically
246 define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
# spent 16µs making 1 call to Encode::Alias::define_alias
247 define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
# spent 12µs making 1 call to Encode::Alias::define_alias
248 define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
# spent 14µs making 1 call to Encode::Alias::define_alias
249
250 # for Encode::TW
251 define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
252 define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
# spent 9µs making 1 call to Encode::Alias::define_alias
253 define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
# spent 17µs making 1 call to Encode::Alias::define_alias
254 define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
255 define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
256 }
257
258 # utf8 is blessed :)
259 define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
# spent 14µs making 1 call to Encode::Alias::define_alias
260
261 # At last, Map white space and _ to '-'
262 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
# spent 13µs making 1 call to Encode::Alias::define_alias
263}
264
265125µs25µs1;
266__END__
267
268# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
269# TODO: HP-UX '15' encodings japanese15 korean15 roi15
270# TODO: Cyrillic encoding ISO-IR-111 (useful?)
271# TODO: Armenian encoding ARMSCII-8
272# TODO: Hebrew encoding ISO-8859-8-1
273# TODO: Thai encoding TCVN
274# TODO: Vietnamese encodings VPS
275# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
276# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
277# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
278# Kannada Khmer Korean Laotian Malayalam Mongolian
279# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
280
281=head1 NAME
282
283Encode::Alias - alias definitions to encodings
284
285=head1 SYNOPSIS
286
287 use Encode;
288 use Encode::Alias;
289 define_alias( newName => ENCODING);
290
291=head1 DESCRIPTION
292
293Allows newName to be used as an alias for ENCODING. ENCODING may be
294either the name of an encoding or an encoding object (as described
295in L<Encode>).
296
297Currently I<newName> can be specified in the following ways:
298
299=over 4
300
301=item As a simple string.
302
303=item As a qr// compiled regular expression, e.g.:
304
305 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
306
307In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
308in order to allow C<$1> etc. to be substituted. The example is one
309way to alias names as used in X11 fonts to the MIME names for the
310iso-8859-* family. Note the double quotes inside the single quotes.
311
312(or, you don't have to do this yourself because this example is predefined)
313
314If you are using a regex here, you have to use the quotes as shown or
315it won't work. Also note that regex handling is tricky even for the
316experienced. Use this feature with caution.
317
318=item As a code reference, e.g.:
319
320 define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
321
322The same effect as the example above in a different way. The coderef
323takes the alias name as an argument and returns a canonical name on
324success or undef if not. Note the second argument is not required.
325Use this with even more caution than the regex version.
326
327=back
328
329=head3 Changes in code reference aliasing
330
331As of Encode 1.87, the older form
332
333 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
334
335no longer works.
336
337Encode up to 1.86 internally used "local $_" to implement ths older
338form. But consider the code below;
339
340 use Encode;
341 $_ = "eeeee" ;
342 while (/(e)/g) {
343 my $utf = decode('aliased-encoding-name', $1);
344 print "position:",pos,"\n";
345 }
346
347Prior to Encode 1.86 this fails because of "local $_".
348
349=head2 Alias overloading
350
351You can override predefined aliases by simply applying define_alias().
352The new alias is always evaluated first, and when necessary,
353define_alias() flushes the internal cache to make the new definition
354available.
355
356 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
357 # superset of SHIFT_JIS
358
359 define_alias( qr/shift.*jis$/i => '"cp932"' );
360 define_alias( qr/sjis$/i => '"cp932"' );
361
362If you want to zap all predefined aliases, you can use
363
364 Encode::Alias->undef_aliases;
365
366to do so. And
367
368 Encode::Alias->init_aliases;
369
370gets the factory settings back.
371
372=head1 SEE ALSO
373
374L<Encode>, L<Encode::Supported>
375
376=cut
377