File | /usr/share/perl/5.10/charnames.pm |
Statements Executed | 41 |
Total Time | 0.002019 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 40µs | 216µs | import | charnames::
0 | 0 | 0 | 0s | 0s | BEGIN | charnames::
0 | 0 | 0 | 0s | 0s | alias | charnames::
0 | 0 | 0 | 0s | 0s | alias_file | charnames::
0 | 0 | 0 | 0s | 0s | carp | charnames::
0 | 0 | 0 | 0s | 0s | charnames | charnames::
0 | 0 | 0 | 0s | 0s | croak | charnames::
0 | 0 | 0 | 0s | 0s | viacode | charnames::
0 | 0 | 0 | 0s | 0s | vianame | charnames::
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package charnames; | |||
2 | 3 | 29µs | 10µs | use strict; # spent 8µs making 1 call to strict::import |
3 | 3 | 22µs | 7µs | use warnings; # spent 18µs making 1 call to warnings::import |
4 | 3 | 48µs | 16µs | use File::Spec; # spent 4µs making 1 call to import |
5 | 1 | 900ns | 900ns | our $VERSION = '1.06'; |
6 | ||||
7 | 3 | 850µs | 283µs | use bytes (); # for $bytes::hint_bits |
8 | ||||
9 | 1 | 10µs | 10µs | my %alias1 = ( |
10 | # Icky 3.2 names with parentheses. | |||
11 | 'LINE FEED' => 'LINE FEED (LF)', | |||
12 | 'FORM FEED' => 'FORM FEED (FF)', | |||
13 | 'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)', | |||
14 | 'NEXT LINE' => 'NEXT LINE (NEL)', | |||
15 | # Convenience. | |||
16 | 'LF' => 'LINE FEED (LF)', | |||
17 | 'FF' => 'FORM FEED (FF)', | |||
18 | 'CR' => 'CARRIAGE RETURN (CR)', | |||
19 | 'NEL' => 'NEXT LINE (NEL)', | |||
20 | # More convenience. For futher convencience, | |||
21 | # it is suggested some way using using the NamesList | |||
22 | # aliases is implemented. | |||
23 | 'ZWNJ' => 'ZERO WIDTH NON-JOINER', | |||
24 | 'ZWJ' => 'ZERO WIDTH JOINER', | |||
25 | 'BOM' => 'BYTE ORDER MARK', | |||
26 | ); | |||
27 | ||||
28 | 1 | 8µs | 8µs | my %alias2 = ( |
29 | # Pre-3.2 compatibility (only for the first 256 characters). | |||
30 | 'HORIZONTAL TABULATION' => 'CHARACTER TABULATION', | |||
31 | 'VERTICAL TABULATION' => 'LINE TABULATION', | |||
32 | 'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR', | |||
33 | 'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE', | |||
34 | 'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO', | |||
35 | 'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE', | |||
36 | 'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD', | |||
37 | 'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD', | |||
38 | ); | |||
39 | ||||
40 | 1 | 400ns | 400ns | my %alias3 = ( |
41 | # User defined aliasses. Even more convenient :) | |||
42 | ); | |||
43 | 1 | 200ns | 200ns | my $txt; |
44 | ||||
45 | sub croak | |||
46 | { | |||
47 | require Carp; goto &Carp::croak; | |||
48 | } # croak | |||
49 | ||||
50 | sub carp | |||
51 | { | |||
52 | require Carp; goto &Carp::carp; | |||
53 | } # carp | |||
54 | ||||
55 | sub alias (@) | |||
56 | { | |||
57 | @_ or return %alias3; | |||
58 | my $alias = ref $_[0] ? $_[0] : { @_ }; | |||
59 | @alias3{keys %$alias} = values %$alias; | |||
60 | } # alias | |||
61 | ||||
62 | sub alias_file ($) | |||
63 | { | |||
64 | my ($arg, $file) = @_; | |||
65 | if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { | |||
66 | $file = $arg; | |||
67 | } | |||
68 | elsif ($arg =~ m/^\w+$/) { | |||
69 | $file = "unicore/${arg}_alias.pl"; | |||
70 | } | |||
71 | else { | |||
72 | croak "Charnames alias files can only have identifier characters"; | |||
73 | } | |||
74 | if (my @alias = do $file) { | |||
75 | @alias == 1 && !defined $alias[0] and | |||
76 | croak "$file cannot be used as alias file for charnames"; | |||
77 | @alias % 2 and | |||
78 | croak "$file did not return a (valid) list of alias pairs"; | |||
79 | alias (@alias); | |||
80 | return (1); | |||
81 | } | |||
82 | 0; | |||
83 | } # alias_file | |||
84 | ||||
85 | # This is not optimized in any way yet | |||
86 | sub charnames | |||
87 | { | |||
88 | my $name = shift; | |||
89 | ||||
90 | if (exists $alias1{$name}) { | |||
91 | $name = $alias1{$name}; | |||
92 | } | |||
93 | elsif (exists $alias2{$name}) { | |||
94 | require warnings; | |||
95 | warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead}); | |||
96 | $name = $alias2{$name}; | |||
97 | } | |||
98 | elsif (exists $alias3{$name}) { | |||
99 | $name = $alias3{$name}; | |||
100 | } | |||
101 | ||||
102 | my $ord; | |||
103 | my @off; | |||
104 | my $fname; | |||
105 | ||||
106 | if ($name eq "BYTE ORDER MARK") { | |||
107 | $fname = $name; | |||
108 | $ord = 0xFEFF; | |||
109 | } else { | |||
110 | ## Suck in the code/name list as a big string. | |||
111 | ## Lines look like: | |||
112 | ## "0052\t\tLATIN CAPITAL LETTER R\n" | |||
113 | $txt = do "unicore/Name.pl" unless $txt; | |||
114 | ||||
115 | ## @off will hold the index into the code/name string of the start and | |||
116 | ## end of the name as we find it. | |||
117 | ||||
118 | ## If :full, look for the name exactly | |||
119 | if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) { | |||
120 | @off = ($-[0], $+[0]); | |||
121 | } | |||
122 | ||||
123 | ## If we didn't get above, and :short allowed, look for the short name. | |||
124 | ## The short name is like "greek:Sigma" | |||
125 | unless (@off) { | |||
126 | if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) { | |||
127 | my ($script, $cname) = ($1, $2); | |||
128 | my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | |||
129 | if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) { | |||
130 | @off = ($-[0], $+[0]); | |||
131 | } | |||
132 | } | |||
133 | } | |||
134 | ||||
135 | ## If we still don't have it, check for the name among the loaded | |||
136 | ## scripts. | |||
137 | if (not @off) { | |||
138 | my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | |||
139 | for my $script (@{$^H{charnames_scripts}}) { | |||
140 | if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) { | |||
141 | @off = ($-[0], $+[0]); | |||
142 | last; | |||
143 | } | |||
144 | } | |||
145 | } | |||
146 | ||||
147 | ## If we don't have it by now, give up. | |||
148 | unless (@off) { | |||
149 | carp "Unknown charname '$name'"; | |||
150 | return "\x{FFFD}"; | |||
151 | } | |||
152 | ||||
153 | ## | |||
154 | ## Now know where in the string the name starts. | |||
155 | ## The code, in hex, is before that. | |||
156 | ## | |||
157 | ## The code can be 4-6 characters long, so we've got to sort of | |||
158 | ## go look for it, just after the newline that comes before $off[0]. | |||
159 | ## | |||
160 | ## This would be much easier if unicore/Name.pl had info in | |||
161 | ## a name/code order, instead of code/name order. | |||
162 | ## | |||
163 | ## The +1 after the rindex() is to skip past the newline we're finding, | |||
164 | ## or, if the rindex() fails, to put us to an offset of zero. | |||
165 | ## | |||
166 | my $hexstart = rindex($txt, "\n", $off[0]) + 1; | |||
167 | ||||
168 | ## we know where it starts, so turn into number - | |||
169 | ## the ordinal for the char. | |||
170 | $ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart); | |||
171 | } | |||
172 | ||||
173 | if ($^H & $bytes::hint_bits) { # "use bytes" in effect? | |||
174 | 3 | 95µs | 32µs | use bytes; # spent 8µs making 1 call to bytes::import |
175 | return chr $ord if $ord <= 255; | |||
176 | my $hex = sprintf "%04x", $ord; | |||
177 | if (not defined $fname) { | |||
178 | $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; | |||
179 | } | |||
180 | croak "Character 0x$hex with name '$fname' is above 0xFF"; | |||
181 | } | |||
182 | ||||
183 | 3 | 894µs | 298µs | no warnings 'utf8'; # allow even illegal characters # spent 33µs making 1 call to warnings::unimport |
184 | return pack "U", $ord; | |||
185 | } # charnames | |||
186 | ||||
187 | sub import | |||
188 | # spent 216µs (40+176) within charnames::import which was called
# once (40µs+176µs) at line 12 of /usr/share/perl5/MARC/Charset.pm | |||
189 | 11 | 34µs | 3µs | shift; ## ignore class name |
190 | ||||
191 | if (not @_) { | |||
192 | carp("`use charnames' needs explicit imports list"); | |||
193 | } | |||
194 | $^H{charnames} = \&charnames ; | |||
195 | ||||
196 | ## | |||
197 | ## fill %h keys with our @_ args. | |||
198 | ## | |||
199 | my ($promote, %h, @args) = (0); | |||
200 | 3 | 3µs | 1µs | while (my $arg = shift) { |
201 | 1 | 1µs | 1µs | if ($arg eq ":alias") { |
202 | @_ or | |||
203 | croak ":alias needs an argument in charnames"; | |||
204 | my $alias = shift; | |||
205 | if (ref $alias) { | |||
206 | ref $alias eq "HASH" or | |||
207 | croak "Only HASH reference supported as argument to :alias"; | |||
208 | alias ($alias); | |||
209 | next; | |||
210 | } | |||
211 | if ($alias =~ m{:(\w+)$}) { | |||
212 | $1 eq "full" || $1 eq "short" and | |||
213 | croak ":alias cannot use existing pragma :$1 (reversed order?)"; | |||
214 | alias_file ($1) and $promote = 1; | |||
215 | next; | |||
216 | } | |||
217 | alias_file ($alias); | |||
218 | next; | |||
219 | } | |||
220 | if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) { | |||
221 | warn "unsupported special '$arg' in charnames"; | |||
222 | next; | |||
223 | } | |||
224 | push @args, $arg; | |||
225 | } | |||
226 | @args == 0 && $promote and @args = (":full"); | |||
227 | @h{@args} = (1) x @args; | |||
228 | ||||
229 | $^H{charnames_full} = delete $h{':full'}; | |||
230 | $^H{charnames_short} = delete $h{':short'}; | |||
231 | $^H{charnames_scripts} = [map uc, keys %h]; | |||
232 | ||||
233 | ## | |||
234 | ## If utf8? warnings are enabled, and some scripts were given, | |||
235 | ## see if at least we can find one letter of each script. | |||
236 | ## | |||
237 | if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) { # spent 176µs making 1 call to warnings::enabled | |||
238 | $txt = do "unicore/Name.pl" unless $txt; | |||
239 | ||||
240 | for my $script (@{$^H{charnames_scripts}}) { | |||
241 | if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) { | |||
242 | warnings::warn('utf8', "No such script: '$script'"); | |||
243 | } | |||
244 | } | |||
245 | } | |||
246 | } # import | |||
247 | ||||
248 | 1 | 200ns | 200ns | my %viacode; |
249 | ||||
250 | sub viacode | |||
251 | { | |||
252 | if (@_ != 1) { | |||
253 | carp "charnames::viacode() expects one argument"; | |||
254 | return; | |||
255 | } | |||
256 | ||||
257 | my $arg = shift; | |||
258 | ||||
259 | # this comes actually from Unicode::UCD, where it is the named | |||
260 | # function _getcode (), but it avoids the overhead of loading it | |||
261 | my $hex; | |||
262 | if ($arg =~ /^[1-9]\d*$/) { | |||
263 | $hex = sprintf "%04X", $arg; | |||
264 | } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) { | |||
265 | $hex = $1; | |||
266 | } else { | |||
267 | carp("unexpected arg \"$arg\" to charnames::viacode()"); | |||
268 | return; | |||
269 | } | |||
270 | ||||
271 | # checking the length first is slightly faster | |||
272 | if (length($hex) > 5 && hex($hex) > 0x10FFFF) { | |||
273 | carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; | |||
274 | return; | |||
275 | } | |||
276 | ||||
277 | return $viacode{$hex} if exists $viacode{$hex}; | |||
278 | ||||
279 | $txt = do "unicore/Name.pl" unless $txt; | |||
280 | ||||
281 | return unless $txt =~ m/^$hex\t\t(.+)/m; | |||
282 | ||||
283 | $viacode{$hex} = $1; | |||
284 | } # viacode | |||
285 | ||||
286 | 1 | 100ns | 100ns | my %vianame; |
287 | ||||
288 | sub vianame | |||
289 | { | |||
290 | if (@_ != 1) { | |||
291 | carp "charnames::vianame() expects one name argument"; | |||
292 | return () | |||
293 | } | |||
294 | ||||
295 | my $arg = shift; | |||
296 | ||||
297 | return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/; | |||
298 | ||||
299 | return $vianame{$arg} if exists $vianame{$arg}; | |||
300 | ||||
301 | $txt = do "unicore/Name.pl" unless $txt; | |||
302 | ||||
303 | my $pos = index $txt, "\t\t$arg\n"; | |||
304 | if ($[ <= $pos) { | |||
305 | my $posLF = rindex $txt, "\n", $pos; | |||
306 | (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d; | |||
307 | return $vianame{$arg} = CORE::hex $code; | |||
308 | ||||
309 | # If $pos is at the 1st line, $posLF must be $[ - 1 (not found); | |||
310 | # then $posLF + 1 equals to $[ (at the beginning of $txt). | |||
311 | # Otherwise $posLF is the position of "\n"; | |||
312 | # then $posLF + 1 must be the position of the next to "\n" | |||
313 | # (the beginning of the line). | |||
314 | # substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t", | |||
315 | # "10300\t", "100000", etc. So we can get the code via removing TAB. | |||
316 | } else { | |||
317 | return; | |||
318 | } | |||
319 | } # vianame | |||
320 | ||||
321 | ||||
322 | 1 | 22µs | 22µs | 1; |
323 | __END__ | |||
324 | ||||
325 | =head1 NAME | |||
326 | ||||
327 | charnames - define character names for C<\N{named}> string literal escapes | |||
328 | ||||
329 | =head1 SYNOPSIS | |||
330 | ||||
331 | use charnames ':full'; | |||
332 | print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n"; | |||
333 | ||||
334 | use charnames ':short'; | |||
335 | print "\N{greek:Sigma} is an upper-case sigma.\n"; | |||
336 | ||||
337 | use charnames qw(cyrillic greek); | |||
338 | print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; | |||
339 | ||||
340 | use charnames ":full", ":alias" => { | |||
341 | e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", | |||
342 | }; | |||
343 | print "\N{e_ACUTE} is a small letter e with an acute.\n"; | |||
344 | ||||
345 | use charnames (); | |||
346 | print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" | |||
347 | printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints "10330" | |||
348 | ||||
349 | =head1 DESCRIPTION | |||
350 | ||||
351 | Pragma C<use charnames> supports arguments C<:full>, C<:short>, script | |||
352 | names and customized aliases. If C<:full> is present, for expansion of | |||
353 | C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of | |||
354 | standard Unicode character names. If C<:short> is present, and | |||
355 | C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up | |||
356 | as a letter in script C<SCRIPT>. If pragma C<use charnames> is used | |||
357 | with script name arguments, then for C<\N{CHARNAME}> the name | |||
358 | C<CHARNAME> is looked up as a letter in the given scripts (in the | |||
359 | specified order). Customized aliases are explained in L</CUSTOM ALIASES>. | |||
360 | ||||
361 | For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME> | |||
362 | this pragma looks for the names | |||
363 | ||||
364 | SCRIPTNAME CAPITAL LETTER CHARNAME | |||
365 | SCRIPTNAME SMALL LETTER CHARNAME | |||
366 | SCRIPTNAME LETTER CHARNAME | |||
367 | ||||
368 | in the table of standard Unicode names. If C<CHARNAME> is lowercase, | |||
369 | then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant | |||
370 | is ignored. | |||
371 | ||||
372 | Note that C<\N{...}> is compile-time, it's a special form of string | |||
373 | constant used inside double-quoted strings: in other words, you cannot | |||
374 | use variables inside the C<\N{...}>. If you want similar run-time | |||
375 | functionality, use charnames::vianame(). | |||
376 | ||||
377 | For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F) | |||
378 | as of Unicode 3.1, there are no official Unicode names but you can use | |||
379 | instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth). In | |||
380 | Unicode 3.2 (as of Perl 5.8) some naming changes take place ISO 6429 | |||
381 | has been updated, see L</ALIASES>. Also note that the U+UU80, U+0081, | |||
382 | U+0084, and U+0099 do not have names even in ISO 6429. | |||
383 | ||||
384 | Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}" | |||
385 | is the Unicode smiley face, or "\N{WHITE SMILING FACE}". | |||
386 | ||||
387 | =head1 ALIASES | |||
388 | ||||
389 | A few aliases have been defined for convenience: instead of having | |||
390 | to use the official names | |||
391 | ||||
392 | LINE FEED (LF) | |||
393 | FORM FEED (FF) | |||
394 | CARRIAGE RETURN (CR) | |||
395 | NEXT LINE (NEL) | |||
396 | ||||
397 | (yes, with parentheses) one can use | |||
398 | ||||
399 | LINE FEED | |||
400 | FORM FEED | |||
401 | CARRIAGE RETURN | |||
402 | NEXT LINE | |||
403 | LF | |||
404 | FF | |||
405 | CR | |||
406 | NEL | |||
407 | ||||
408 | One can also use | |||
409 | ||||
410 | BYTE ORDER MARK | |||
411 | BOM | |||
412 | ||||
413 | and | |||
414 | ||||
415 | ZWNJ | |||
416 | ZWJ | |||
417 | ||||
418 | for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. | |||
419 | ||||
420 | For backward compatibility one can use the old names for | |||
421 | certain C0 and C1 controls | |||
422 | ||||
423 | old new | |||
424 | ||||
425 | HORIZONTAL TABULATION CHARACTER TABULATION | |||
426 | VERTICAL TABULATION LINE TABULATION | |||
427 | FILE SEPARATOR INFORMATION SEPARATOR FOUR | |||
428 | GROUP SEPARATOR INFORMATION SEPARATOR THREE | |||
429 | RECORD SEPARATOR INFORMATION SEPARATOR TWO | |||
430 | UNIT SEPARATOR INFORMATION SEPARATOR ONE | |||
431 | PARTIAL LINE DOWN PARTIAL LINE FORWARD | |||
432 | PARTIAL LINE UP PARTIAL LINE BACKWARD | |||
433 | ||||
434 | but the old names in addition to giving the character | |||
435 | will also give a warning about being deprecated. | |||
436 | ||||
437 | =head1 CUSTOM ALIASES | |||
438 | ||||
439 | This version of charnames supports three mechanisms of adding local | |||
440 | or customized aliases to standard Unicode naming conventions (:full) | |||
441 | ||||
442 | =head2 Anonymous hashes | |||
443 | ||||
444 | use charnames ":full", ":alias" => { | |||
445 | e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", | |||
446 | }; | |||
447 | my $str = "\N{e_ACUTE}"; | |||
448 | ||||
449 | =head2 Alias file | |||
450 | ||||
451 | use charnames ":full", ":alias" => "pro"; | |||
452 | ||||
453 | will try to read "unicore/pro_alias.pl" from the @INC path. This | |||
454 | file should return a list in plain perl: | |||
455 | ||||
456 | ( | |||
457 | A_GRAVE => "LATIN CAPITAL LETTER A WITH GRAVE", | |||
458 | A_CIRCUM => "LATIN CAPITAL LETTER A WITH CIRCUMFLEX", | |||
459 | A_DIAERES => "LATIN CAPITAL LETTER A WITH DIAERESIS", | |||
460 | A_TILDE => "LATIN CAPITAL LETTER A WITH TILDE", | |||
461 | A_BREVE => "LATIN CAPITAL LETTER A WITH BREVE", | |||
462 | A_RING => "LATIN CAPITAL LETTER A WITH RING ABOVE", | |||
463 | A_MACRON => "LATIN CAPITAL LETTER A WITH MACRON", | |||
464 | ); | |||
465 | ||||
466 | =head2 Alias shortcut | |||
467 | ||||
468 | use charnames ":alias" => ":pro"; | |||
469 | ||||
470 | works exactly the same as the alias pairs, only this time, | |||
471 | ":full" is inserted automatically as first argument (if no | |||
472 | other argument is given). | |||
473 | ||||
474 | =head1 charnames::viacode(code) | |||
475 | ||||
476 | Returns the full name of the character indicated by the numeric code. | |||
477 | The example | |||
478 | ||||
479 | print charnames::viacode(0x2722); | |||
480 | ||||
481 | prints "FOUR TEARDROP-SPOKED ASTERISK". | |||
482 | ||||
483 | Returns undef if no name is known for the code. | |||
484 | ||||
485 | This works only for the standard names, and does not yet apply | |||
486 | to custom translators. | |||
487 | ||||
488 | Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK | |||
489 | SPACE", not "BYTE ORDER MARK". | |||
490 | ||||
491 | =head1 charnames::vianame(name) | |||
492 | ||||
493 | Returns the code point indicated by the name. | |||
494 | The example | |||
495 | ||||
496 | printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK"); | |||
497 | ||||
498 | prints "2722". | |||
499 | ||||
500 | Returns undef if the name is unknown. | |||
501 | ||||
502 | This works only for the standard names, and does not yet apply | |||
503 | to custom translators. | |||
504 | ||||
505 | =head1 CUSTOM TRANSLATORS | |||
506 | ||||
507 | The mechanism of translation of C<\N{...}> escapes is general and not | |||
508 | hardwired into F<charnames.pm>. A module can install custom | |||
509 | translations (inside the scope which C<use>s the module) with the | |||
510 | following magic incantation: | |||
511 | ||||
512 | sub import { | |||
513 | shift; | |||
514 | $^H{charnames} = \&translator; | |||
515 | } | |||
516 | ||||
517 | Here translator() is a subroutine which takes C<CHARNAME> as an | |||
518 | argument, and returns text to insert into the string instead of the | |||
519 | C<\N{CHARNAME}> escape. Since the text to insert should be different | |||
520 | in C<bytes> mode and out of it, the function should check the current | |||
521 | state of C<bytes>-flag as in: | |||
522 | ||||
523 | use bytes (); # for $bytes::hint_bits | |||
524 | sub translator { | |||
525 | if ($^H & $bytes::hint_bits) { | |||
526 | return bytes_translator(@_); | |||
527 | } | |||
528 | else { | |||
529 | return utf8_translator(@_); | |||
530 | } | |||
531 | } | |||
532 | ||||
533 | =head1 ILLEGAL CHARACTERS | |||
534 | ||||
535 | If you ask by name for a character that does not exist, a warning is | |||
536 | given and the Unicode I<replacement character> "\x{FFFD}" is returned. | |||
537 | ||||
538 | If you ask by code for a character that does not exist, no warning is | |||
539 | given and C<undef> is returned. (Though if you ask for a code point | |||
540 | past U+10FFFF you do get a warning.) | |||
541 | ||||
542 | =head1 BUGS | |||
543 | ||||
544 | Since evaluation of the translation function happens in a middle of | |||
545 | compilation (of a string literal), the translation function should not | |||
546 | do any C<eval>s or C<require>s. This restriction should be lifted in | |||
547 | a future version of Perl. | |||
548 | ||||
549 | =cut |