Filename | /home/hinrik/perl5/perlbrew/perls/perl-5.13.5/lib/5.13.5/charnames.pm |
Statements | Executed 97 statements in 2.71ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
4 | 3 | 1 | 65µs | 126µs | charnames | charnames::
1 | 1 | 1 | 54µs | 259µs | import | charnames::
4 | 1 | 1 | 49µs | 49µs | lookup_name | charnames::
1 | 1 | 1 | 32µs | 37µs | BEGIN@2 | charnames::
1 | 1 | 1 | 16µs | 25µs | BEGIN@3 | charnames::
1 | 1 | 1 | 14µs | 41µs | BEGIN@692 | charnames::
1 | 1 | 1 | 13µs | 13µs | BEGIN@4 | charnames::
4 | 1 | 1 | 12µs | 12µs | CORE:pack (opcode) | charnames::
2 | 2 | 1 | 8µs | 8µs | CORE:qr (opcode) | charnames::
1 | 1 | 1 | 6µs | 6µs | BEGIN@7 | charnames::
0 | 0 | 0 | 0s | 0s | alias | charnames::
0 | 0 | 0 | 0s | 0s | alias_file | charnames::
0 | 0 | 0 | 0s | 0s | carp | charnames::
0 | 0 | 0 | 0s | 0s | croak | charnames::
0 | 0 | 0 | 0s | 0s | not_legal_use_bytes_msg | charnames::
0 | 0 | 0 | 0s | 0s | viacode | charnames::
0 | 0 | 0 | 0s | 0s | vianame | charnames::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package charnames; | ||||
2 | 2 | 35µs | 2 | 43µs | # spent 37µs (32+5) within charnames::BEGIN@2 which was called:
# once (32µs+5µs) by File::CountLines::BEGIN@12 at line 2 # spent 37µs making 1 call to charnames::BEGIN@2
# spent 5µs making 1 call to strict::import |
3 | 2 | 26µs | 2 | 34µs | # spent 25µs (16+9) within charnames::BEGIN@3 which was called:
# once (16µs+9µs) by File::CountLines::BEGIN@12 at line 3 # spent 25µs making 1 call to charnames::BEGIN@3
# spent 9µs making 1 call to warnings::import |
4 | 2 | 43µs | 1 | 13µs | # spent 13µs within charnames::BEGIN@4 which was called:
# once (13µs+0s) by File::CountLines::BEGIN@12 at line 4 # spent 13µs making 1 call to charnames::BEGIN@4 |
5 | 1 | 1µs | our $VERSION = '1.15'; | ||
6 | |||||
7 | 2 | 1.51ms | 1 | 6µs | # spent 6µs within charnames::BEGIN@7 which was called:
# once (6µs+0s) by File::CountLines::BEGIN@12 at line 7 # spent 6µs making 1 call to charnames::BEGIN@7 |
8 | |||||
9 | 1 | 153µs | my %system_aliases = ( | ||
10 | # Icky 3.2 names with parentheses. | ||||
11 | 'LINE FEED' => 0x0A, # LINE FEED (LF) | ||||
12 | 'FORM FEED' => 0x0C, # FORM FEED (FF) | ||||
13 | 'CARRIAGE RETURN' => 0x0D, # CARRIAGE RETURN (CR) | ||||
14 | 'NEXT LINE' => 0x85, # NEXT LINE (NEL) | ||||
15 | |||||
16 | # Some variant names from Wikipedia | ||||
17 | 'SINGLE-SHIFT 2' => 0x8E, | ||||
18 | 'SINGLE-SHIFT 3' => 0x8F, | ||||
19 | 'PRIVATE USE 1' => 0x91, | ||||
20 | 'PRIVATE USE 2' => 0x92, | ||||
21 | 'START OF PROTECTED AREA' => 0x96, | ||||
22 | 'END OF PROTECTED AREA' => 0x97, | ||||
23 | |||||
24 | # Convenience. Standard abbreviations for the controls | ||||
25 | 'NUL' => 0x00, # NULL | ||||
26 | 'SOH' => 0x01, # START OF HEADING | ||||
27 | 'STX' => 0x02, # START OF TEXT | ||||
28 | 'ETX' => 0x03, # END OF TEXT | ||||
29 | 'EOT' => 0x04, # END OF TRANSMISSION | ||||
30 | 'ENQ' => 0x05, # ENQUIRY | ||||
31 | 'ACK' => 0x06, # ACKNOWLEDGE | ||||
32 | 'BEL' => 0x07, # BELL | ||||
33 | 'BS' => 0x08, # BACKSPACE | ||||
34 | 'HT' => 0x09, # HORIZONTAL TABULATION | ||||
35 | 'LF' => 0x0A, # LINE FEED (LF) | ||||
36 | 'VT' => 0x0B, # VERTICAL TABULATION | ||||
37 | 'FF' => 0x0C, # FORM FEED (FF) | ||||
38 | 'CR' => 0x0D, # CARRIAGE RETURN (CR) | ||||
39 | 'SO' => 0x0E, # SHIFT OUT | ||||
40 | 'SI' => 0x0F, # SHIFT IN | ||||
41 | 'DLE' => 0x10, # DATA LINK ESCAPE | ||||
42 | 'DC1' => 0x11, # DEVICE CONTROL ONE | ||||
43 | 'DC2' => 0x12, # DEVICE CONTROL TWO | ||||
44 | 'DC3' => 0x13, # DEVICE CONTROL THREE | ||||
45 | 'DC4' => 0x14, # DEVICE CONTROL FOUR | ||||
46 | 'NAK' => 0x15, # NEGATIVE ACKNOWLEDGE | ||||
47 | 'SYN' => 0x16, # SYNCHRONOUS IDLE | ||||
48 | 'ETB' => 0x17, # END OF TRANSMISSION BLOCK | ||||
49 | 'CAN' => 0x18, # CANCEL | ||||
50 | 'EOM' => 0x19, # END OF MEDIUM | ||||
51 | 'SUB' => 0x1A, # SUBSTITUTE | ||||
52 | 'ESC' => 0x1B, # ESCAPE | ||||
53 | 'FS' => 0x1C, # FILE SEPARATOR | ||||
54 | 'GS' => 0x1D, # GROUP SEPARATOR | ||||
55 | 'RS' => 0x1E, # RECORD SEPARATOR | ||||
56 | 'US' => 0x1F, # UNIT SEPARATOR | ||||
57 | 'DEL' => 0x7F, # DELETE | ||||
58 | 'BPH' => 0x82, # BREAK PERMITTED HERE | ||||
59 | 'NBH' => 0x83, # NO BREAK HERE | ||||
60 | 'NEL' => 0x85, # NEXT LINE (NEL) | ||||
61 | 'SSA' => 0x86, # START OF SELECTED AREA | ||||
62 | 'ESA' => 0x87, # END OF SELECTED AREA | ||||
63 | 'HTS' => 0x88, # CHARACTER TABULATION SET | ||||
64 | 'HTJ' => 0x89, # CHARACTER TABULATION WITH JUSTIFICATION | ||||
65 | 'VTS' => 0x8A, # LINE TABULATION SET | ||||
66 | 'PLD' => 0x8B, # PARTIAL LINE FORWARD | ||||
67 | 'PLU' => 0x8C, # PARTIAL LINE BACKWARD | ||||
68 | 'RI ' => 0x8D, # REVERSE LINE FEED | ||||
69 | 'SS2' => 0x8E, # SINGLE SHIFT TWO | ||||
70 | 'SS3' => 0x8F, # SINGLE SHIFT THREE | ||||
71 | 'DCS' => 0x90, # DEVICE CONTROL STRING | ||||
72 | 'PU1' => 0x91, # PRIVATE USE ONE | ||||
73 | 'PU2' => 0x92, # PRIVATE USE TWO | ||||
74 | 'STS' => 0x93, # SET TRANSMIT STATE | ||||
75 | 'CCH' => 0x94, # CANCEL CHARACTER | ||||
76 | 'MW ' => 0x95, # MESSAGE WAITING | ||||
77 | 'SPA' => 0x96, # START OF GUARDED AREA | ||||
78 | 'EPA' => 0x97, # END OF GUARDED AREA | ||||
79 | 'SOS' => 0x98, # START OF STRING | ||||
80 | 'SCI' => 0x9A, # SINGLE CHARACTER INTRODUCER | ||||
81 | 'CSI' => 0x9B, # CONTROL SEQUENCE INTRODUCER | ||||
82 | 'ST ' => 0x9C, # STRING TERMINATOR | ||||
83 | 'OSC' => 0x9D, # OPERATING SYSTEM COMMAND | ||||
84 | 'PM ' => 0x9E, # PRIVACY MESSAGE | ||||
85 | 'APC' => 0x9F, # APPLICATION PROGRAM COMMAND | ||||
86 | |||||
87 | # There are no names for these in the Unicode standard; | ||||
88 | # perhaps should be deprecated, but then again there are | ||||
89 | # no alternative names, so am not deprecating. And if | ||||
90 | # did, the code would have to change to not recommend an | ||||
91 | # alternative for these. | ||||
92 | 'PADDING CHARACTER' => 0x80, | ||||
93 | 'PAD' => 0x80, | ||||
94 | 'HIGH OCTET PRESET' => 0x81, | ||||
95 | 'HOP' => 0x81, | ||||
96 | 'INDEX' => 0x84, | ||||
97 | 'IND' => 0x84, | ||||
98 | 'SINGLE GRAPHIC CHARACTER INTRODUCER' => 0x99, | ||||
99 | 'SGC' => 0x99, | ||||
100 | |||||
101 | # More convenience. For further convenience, | ||||
102 | # it is suggested some way of using the NamesList | ||||
103 | # aliases be implemented, but there are ambiguities in | ||||
104 | # NamesList.txt | ||||
105 | 'BOM' => 0xFEFF, # BYTE ORDER MARK | ||||
106 | 'BYTE ORDER MARK'=> 0xFEFF, | ||||
107 | 'CGJ' => 0x034F, # COMBINING GRAPHEME JOINER | ||||
108 | 'FVS1' => 0x180B, # MONGOLIAN FREE VARIATION SELECTOR ONE | ||||
109 | 'FVS2' => 0x180C, # MONGOLIAN FREE VARIATION SELECTOR TWO | ||||
110 | 'FVS3' => 0x180D, # MONGOLIAN FREE VARIATION SELECTOR THREE | ||||
111 | 'LRE' => 0x202A, # LEFT-TO-RIGHT EMBEDDING | ||||
112 | 'LRM' => 0x200E, # LEFT-TO-RIGHT MARK | ||||
113 | 'LRO' => 0x202D, # LEFT-TO-RIGHT OVERRIDE | ||||
114 | 'MMSP' => 0x205F, # MEDIUM MATHEMATICAL SPACE | ||||
115 | 'MVS' => 0x180E, # MONGOLIAN VOWEL SEPARATOR | ||||
116 | 'NBSP' => 0x00A0, # NO-BREAK SPACE | ||||
117 | 'NNBSP' => 0x202F, # NARROW NO-BREAK SPACE | ||||
118 | 'PDF' => 0x202C, # POP DIRECTIONAL FORMATTING | ||||
119 | 'RLE' => 0x202B, # RIGHT-TO-LEFT EMBEDDING | ||||
120 | 'RLM' => 0x200F, # RIGHT-TO-LEFT MARK | ||||
121 | 'RLO' => 0x202E, # RIGHT-TO-LEFT OVERRIDE | ||||
122 | 'SHY' => 0x00AD, # SOFT HYPHEN | ||||
123 | 'VS1' => 0xFE00, # VARIATION SELECTOR-1 | ||||
124 | 'VS2' => 0xFE01, # VARIATION SELECTOR-2 | ||||
125 | 'VS3' => 0xFE02, # VARIATION SELECTOR-3 | ||||
126 | 'VS4' => 0xFE03, # VARIATION SELECTOR-4 | ||||
127 | 'VS5' => 0xFE04, # VARIATION SELECTOR-5 | ||||
128 | 'VS6' => 0xFE05, # VARIATION SELECTOR-6 | ||||
129 | 'VS7' => 0xFE06, # VARIATION SELECTOR-7 | ||||
130 | 'VS8' => 0xFE07, # VARIATION SELECTOR-8 | ||||
131 | 'VS9' => 0xFE08, # VARIATION SELECTOR-9 | ||||
132 | 'VS10' => 0xFE09, # VARIATION SELECTOR-10 | ||||
133 | 'VS11' => 0xFE0A, # VARIATION SELECTOR-11 | ||||
134 | 'VS12' => 0xFE0B, # VARIATION SELECTOR-12 | ||||
135 | 'VS13' => 0xFE0C, # VARIATION SELECTOR-13 | ||||
136 | 'VS14' => 0xFE0D, # VARIATION SELECTOR-14 | ||||
137 | 'VS15' => 0xFE0E, # VARIATION SELECTOR-15 | ||||
138 | 'VS16' => 0xFE0F, # VARIATION SELECTOR-16 | ||||
139 | 'VS17' => 0xE0100, # VARIATION SELECTOR-17 | ||||
140 | 'VS18' => 0xE0101, # VARIATION SELECTOR-18 | ||||
141 | 'VS19' => 0xE0102, # VARIATION SELECTOR-19 | ||||
142 | 'VS20' => 0xE0103, # VARIATION SELECTOR-20 | ||||
143 | 'VS21' => 0xE0104, # VARIATION SELECTOR-21 | ||||
144 | 'VS22' => 0xE0105, # VARIATION SELECTOR-22 | ||||
145 | 'VS23' => 0xE0106, # VARIATION SELECTOR-23 | ||||
146 | 'VS24' => 0xE0107, # VARIATION SELECTOR-24 | ||||
147 | 'VS25' => 0xE0108, # VARIATION SELECTOR-25 | ||||
148 | 'VS26' => 0xE0109, # VARIATION SELECTOR-26 | ||||
149 | 'VS27' => 0xE010A, # VARIATION SELECTOR-27 | ||||
150 | 'VS28' => 0xE010B, # VARIATION SELECTOR-28 | ||||
151 | 'VS29' => 0xE010C, # VARIATION SELECTOR-29 | ||||
152 | 'VS30' => 0xE010D, # VARIATION SELECTOR-30 | ||||
153 | 'VS31' => 0xE010E, # VARIATION SELECTOR-31 | ||||
154 | 'VS32' => 0xE010F, # VARIATION SELECTOR-32 | ||||
155 | 'VS33' => 0xE0110, # VARIATION SELECTOR-33 | ||||
156 | 'VS34' => 0xE0111, # VARIATION SELECTOR-34 | ||||
157 | 'VS35' => 0xE0112, # VARIATION SELECTOR-35 | ||||
158 | 'VS36' => 0xE0113, # VARIATION SELECTOR-36 | ||||
159 | 'VS37' => 0xE0114, # VARIATION SELECTOR-37 | ||||
160 | 'VS38' => 0xE0115, # VARIATION SELECTOR-38 | ||||
161 | 'VS39' => 0xE0116, # VARIATION SELECTOR-39 | ||||
162 | 'VS40' => 0xE0117, # VARIATION SELECTOR-40 | ||||
163 | 'VS41' => 0xE0118, # VARIATION SELECTOR-41 | ||||
164 | 'VS42' => 0xE0119, # VARIATION SELECTOR-42 | ||||
165 | 'VS43' => 0xE011A, # VARIATION SELECTOR-43 | ||||
166 | 'VS44' => 0xE011B, # VARIATION SELECTOR-44 | ||||
167 | 'VS45' => 0xE011C, # VARIATION SELECTOR-45 | ||||
168 | 'VS46' => 0xE011D, # VARIATION SELECTOR-46 | ||||
169 | 'VS47' => 0xE011E, # VARIATION SELECTOR-47 | ||||
170 | 'VS48' => 0xE011F, # VARIATION SELECTOR-48 | ||||
171 | 'VS49' => 0xE0120, # VARIATION SELECTOR-49 | ||||
172 | 'VS50' => 0xE0121, # VARIATION SELECTOR-50 | ||||
173 | 'VS51' => 0xE0122, # VARIATION SELECTOR-51 | ||||
174 | 'VS52' => 0xE0123, # VARIATION SELECTOR-52 | ||||
175 | 'VS53' => 0xE0124, # VARIATION SELECTOR-53 | ||||
176 | 'VS54' => 0xE0125, # VARIATION SELECTOR-54 | ||||
177 | 'VS55' => 0xE0126, # VARIATION SELECTOR-55 | ||||
178 | 'VS56' => 0xE0127, # VARIATION SELECTOR-56 | ||||
179 | 'VS57' => 0xE0128, # VARIATION SELECTOR-57 | ||||
180 | 'VS58' => 0xE0129, # VARIATION SELECTOR-58 | ||||
181 | 'VS59' => 0xE012A, # VARIATION SELECTOR-59 | ||||
182 | 'VS60' => 0xE012B, # VARIATION SELECTOR-60 | ||||
183 | 'VS61' => 0xE012C, # VARIATION SELECTOR-61 | ||||
184 | 'VS62' => 0xE012D, # VARIATION SELECTOR-62 | ||||
185 | 'VS63' => 0xE012E, # VARIATION SELECTOR-63 | ||||
186 | 'VS64' => 0xE012F, # VARIATION SELECTOR-64 | ||||
187 | 'VS65' => 0xE0130, # VARIATION SELECTOR-65 | ||||
188 | 'VS66' => 0xE0131, # VARIATION SELECTOR-66 | ||||
189 | 'VS67' => 0xE0132, # VARIATION SELECTOR-67 | ||||
190 | 'VS68' => 0xE0133, # VARIATION SELECTOR-68 | ||||
191 | 'VS69' => 0xE0134, # VARIATION SELECTOR-69 | ||||
192 | 'VS70' => 0xE0135, # VARIATION SELECTOR-70 | ||||
193 | 'VS71' => 0xE0136, # VARIATION SELECTOR-71 | ||||
194 | 'VS72' => 0xE0137, # VARIATION SELECTOR-72 | ||||
195 | 'VS73' => 0xE0138, # VARIATION SELECTOR-73 | ||||
196 | 'VS74' => 0xE0139, # VARIATION SELECTOR-74 | ||||
197 | 'VS75' => 0xE013A, # VARIATION SELECTOR-75 | ||||
198 | 'VS76' => 0xE013B, # VARIATION SELECTOR-76 | ||||
199 | 'VS77' => 0xE013C, # VARIATION SELECTOR-77 | ||||
200 | 'VS78' => 0xE013D, # VARIATION SELECTOR-78 | ||||
201 | 'VS79' => 0xE013E, # VARIATION SELECTOR-79 | ||||
202 | 'VS80' => 0xE013F, # VARIATION SELECTOR-80 | ||||
203 | 'VS81' => 0xE0140, # VARIATION SELECTOR-81 | ||||
204 | 'VS82' => 0xE0141, # VARIATION SELECTOR-82 | ||||
205 | 'VS83' => 0xE0142, # VARIATION SELECTOR-83 | ||||
206 | 'VS84' => 0xE0143, # VARIATION SELECTOR-84 | ||||
207 | 'VS85' => 0xE0144, # VARIATION SELECTOR-85 | ||||
208 | 'VS86' => 0xE0145, # VARIATION SELECTOR-86 | ||||
209 | 'VS87' => 0xE0146, # VARIATION SELECTOR-87 | ||||
210 | 'VS88' => 0xE0147, # VARIATION SELECTOR-88 | ||||
211 | 'VS89' => 0xE0148, # VARIATION SELECTOR-89 | ||||
212 | 'VS90' => 0xE0149, # VARIATION SELECTOR-90 | ||||
213 | 'VS91' => 0xE014A, # VARIATION SELECTOR-91 | ||||
214 | 'VS92' => 0xE014B, # VARIATION SELECTOR-92 | ||||
215 | 'VS93' => 0xE014C, # VARIATION SELECTOR-93 | ||||
216 | 'VS94' => 0xE014D, # VARIATION SELECTOR-94 | ||||
217 | 'VS95' => 0xE014E, # VARIATION SELECTOR-95 | ||||
218 | 'VS96' => 0xE014F, # VARIATION SELECTOR-96 | ||||
219 | 'VS97' => 0xE0150, # VARIATION SELECTOR-97 | ||||
220 | 'VS98' => 0xE0151, # VARIATION SELECTOR-98 | ||||
221 | 'VS99' => 0xE0152, # VARIATION SELECTOR-99 | ||||
222 | 'VS100' => 0xE0153, # VARIATION SELECTOR-100 | ||||
223 | 'VS101' => 0xE0154, # VARIATION SELECTOR-101 | ||||
224 | 'VS102' => 0xE0155, # VARIATION SELECTOR-102 | ||||
225 | 'VS103' => 0xE0156, # VARIATION SELECTOR-103 | ||||
226 | 'VS104' => 0xE0157, # VARIATION SELECTOR-104 | ||||
227 | 'VS105' => 0xE0158, # VARIATION SELECTOR-105 | ||||
228 | 'VS106' => 0xE0159, # VARIATION SELECTOR-106 | ||||
229 | 'VS107' => 0xE015A, # VARIATION SELECTOR-107 | ||||
230 | 'VS108' => 0xE015B, # VARIATION SELECTOR-108 | ||||
231 | 'VS109' => 0xE015C, # VARIATION SELECTOR-109 | ||||
232 | 'VS110' => 0xE015D, # VARIATION SELECTOR-110 | ||||
233 | 'VS111' => 0xE015E, # VARIATION SELECTOR-111 | ||||
234 | 'VS112' => 0xE015F, # VARIATION SELECTOR-112 | ||||
235 | 'VS113' => 0xE0160, # VARIATION SELECTOR-113 | ||||
236 | 'VS114' => 0xE0161, # VARIATION SELECTOR-114 | ||||
237 | 'VS115' => 0xE0162, # VARIATION SELECTOR-115 | ||||
238 | 'VS116' => 0xE0163, # VARIATION SELECTOR-116 | ||||
239 | 'VS117' => 0xE0164, # VARIATION SELECTOR-117 | ||||
240 | 'VS118' => 0xE0165, # VARIATION SELECTOR-118 | ||||
241 | 'VS119' => 0xE0166, # VARIATION SELECTOR-119 | ||||
242 | 'VS120' => 0xE0167, # VARIATION SELECTOR-120 | ||||
243 | 'VS121' => 0xE0168, # VARIATION SELECTOR-121 | ||||
244 | 'VS122' => 0xE0169, # VARIATION SELECTOR-122 | ||||
245 | 'VS123' => 0xE016A, # VARIATION SELECTOR-123 | ||||
246 | 'VS124' => 0xE016B, # VARIATION SELECTOR-124 | ||||
247 | 'VS125' => 0xE016C, # VARIATION SELECTOR-125 | ||||
248 | 'VS126' => 0xE016D, # VARIATION SELECTOR-126 | ||||
249 | 'VS127' => 0xE016E, # VARIATION SELECTOR-127 | ||||
250 | 'VS128' => 0xE016F, # VARIATION SELECTOR-128 | ||||
251 | 'VS129' => 0xE0170, # VARIATION SELECTOR-129 | ||||
252 | 'VS130' => 0xE0171, # VARIATION SELECTOR-130 | ||||
253 | 'VS131' => 0xE0172, # VARIATION SELECTOR-131 | ||||
254 | 'VS132' => 0xE0173, # VARIATION SELECTOR-132 | ||||
255 | 'VS133' => 0xE0174, # VARIATION SELECTOR-133 | ||||
256 | 'VS134' => 0xE0175, # VARIATION SELECTOR-134 | ||||
257 | 'VS135' => 0xE0176, # VARIATION SELECTOR-135 | ||||
258 | 'VS136' => 0xE0177, # VARIATION SELECTOR-136 | ||||
259 | 'VS137' => 0xE0178, # VARIATION SELECTOR-137 | ||||
260 | 'VS138' => 0xE0179, # VARIATION SELECTOR-138 | ||||
261 | 'VS139' => 0xE017A, # VARIATION SELECTOR-139 | ||||
262 | 'VS140' => 0xE017B, # VARIATION SELECTOR-140 | ||||
263 | 'VS141' => 0xE017C, # VARIATION SELECTOR-141 | ||||
264 | 'VS142' => 0xE017D, # VARIATION SELECTOR-142 | ||||
265 | 'VS143' => 0xE017E, # VARIATION SELECTOR-143 | ||||
266 | 'VS144' => 0xE017F, # VARIATION SELECTOR-144 | ||||
267 | 'VS145' => 0xE0180, # VARIATION SELECTOR-145 | ||||
268 | 'VS146' => 0xE0181, # VARIATION SELECTOR-146 | ||||
269 | 'VS147' => 0xE0182, # VARIATION SELECTOR-147 | ||||
270 | 'VS148' => 0xE0183, # VARIATION SELECTOR-148 | ||||
271 | 'VS149' => 0xE0184, # VARIATION SELECTOR-149 | ||||
272 | 'VS150' => 0xE0185, # VARIATION SELECTOR-150 | ||||
273 | 'VS151' => 0xE0186, # VARIATION SELECTOR-151 | ||||
274 | 'VS152' => 0xE0187, # VARIATION SELECTOR-152 | ||||
275 | 'VS153' => 0xE0188, # VARIATION SELECTOR-153 | ||||
276 | 'VS154' => 0xE0189, # VARIATION SELECTOR-154 | ||||
277 | 'VS155' => 0xE018A, # VARIATION SELECTOR-155 | ||||
278 | 'VS156' => 0xE018B, # VARIATION SELECTOR-156 | ||||
279 | 'VS157' => 0xE018C, # VARIATION SELECTOR-157 | ||||
280 | 'VS158' => 0xE018D, # VARIATION SELECTOR-158 | ||||
281 | 'VS159' => 0xE018E, # VARIATION SELECTOR-159 | ||||
282 | 'VS160' => 0xE018F, # VARIATION SELECTOR-160 | ||||
283 | 'VS161' => 0xE0190, # VARIATION SELECTOR-161 | ||||
284 | 'VS162' => 0xE0191, # VARIATION SELECTOR-162 | ||||
285 | 'VS163' => 0xE0192, # VARIATION SELECTOR-163 | ||||
286 | 'VS164' => 0xE0193, # VARIATION SELECTOR-164 | ||||
287 | 'VS165' => 0xE0194, # VARIATION SELECTOR-165 | ||||
288 | 'VS166' => 0xE0195, # VARIATION SELECTOR-166 | ||||
289 | 'VS167' => 0xE0196, # VARIATION SELECTOR-167 | ||||
290 | 'VS168' => 0xE0197, # VARIATION SELECTOR-168 | ||||
291 | 'VS169' => 0xE0198, # VARIATION SELECTOR-169 | ||||
292 | 'VS170' => 0xE0199, # VARIATION SELECTOR-170 | ||||
293 | 'VS171' => 0xE019A, # VARIATION SELECTOR-171 | ||||
294 | 'VS172' => 0xE019B, # VARIATION SELECTOR-172 | ||||
295 | 'VS173' => 0xE019C, # VARIATION SELECTOR-173 | ||||
296 | 'VS174' => 0xE019D, # VARIATION SELECTOR-174 | ||||
297 | 'VS175' => 0xE019E, # VARIATION SELECTOR-175 | ||||
298 | 'VS176' => 0xE019F, # VARIATION SELECTOR-176 | ||||
299 | 'VS177' => 0xE01A0, # VARIATION SELECTOR-177 | ||||
300 | 'VS178' => 0xE01A1, # VARIATION SELECTOR-178 | ||||
301 | 'VS179' => 0xE01A2, # VARIATION SELECTOR-179 | ||||
302 | 'VS180' => 0xE01A3, # VARIATION SELECTOR-180 | ||||
303 | 'VS181' => 0xE01A4, # VARIATION SELECTOR-181 | ||||
304 | 'VS182' => 0xE01A5, # VARIATION SELECTOR-182 | ||||
305 | 'VS183' => 0xE01A6, # VARIATION SELECTOR-183 | ||||
306 | 'VS184' => 0xE01A7, # VARIATION SELECTOR-184 | ||||
307 | 'VS185' => 0xE01A8, # VARIATION SELECTOR-185 | ||||
308 | 'VS186' => 0xE01A9, # VARIATION SELECTOR-186 | ||||
309 | 'VS187' => 0xE01AA, # VARIATION SELECTOR-187 | ||||
310 | 'VS188' => 0xE01AB, # VARIATION SELECTOR-188 | ||||
311 | 'VS189' => 0xE01AC, # VARIATION SELECTOR-189 | ||||
312 | 'VS190' => 0xE01AD, # VARIATION SELECTOR-190 | ||||
313 | 'VS191' => 0xE01AE, # VARIATION SELECTOR-191 | ||||
314 | 'VS192' => 0xE01AF, # VARIATION SELECTOR-192 | ||||
315 | 'VS193' => 0xE01B0, # VARIATION SELECTOR-193 | ||||
316 | 'VS194' => 0xE01B1, # VARIATION SELECTOR-194 | ||||
317 | 'VS195' => 0xE01B2, # VARIATION SELECTOR-195 | ||||
318 | 'VS196' => 0xE01B3, # VARIATION SELECTOR-196 | ||||
319 | 'VS197' => 0xE01B4, # VARIATION SELECTOR-197 | ||||
320 | 'VS198' => 0xE01B5, # VARIATION SELECTOR-198 | ||||
321 | 'VS199' => 0xE01B6, # VARIATION SELECTOR-199 | ||||
322 | 'VS200' => 0xE01B7, # VARIATION SELECTOR-200 | ||||
323 | 'VS201' => 0xE01B8, # VARIATION SELECTOR-201 | ||||
324 | 'VS202' => 0xE01B9, # VARIATION SELECTOR-202 | ||||
325 | 'VS203' => 0xE01BA, # VARIATION SELECTOR-203 | ||||
326 | 'VS204' => 0xE01BB, # VARIATION SELECTOR-204 | ||||
327 | 'VS205' => 0xE01BC, # VARIATION SELECTOR-205 | ||||
328 | 'VS206' => 0xE01BD, # VARIATION SELECTOR-206 | ||||
329 | 'VS207' => 0xE01BE, # VARIATION SELECTOR-207 | ||||
330 | 'VS208' => 0xE01BF, # VARIATION SELECTOR-208 | ||||
331 | 'VS209' => 0xE01C0, # VARIATION SELECTOR-209 | ||||
332 | 'VS210' => 0xE01C1, # VARIATION SELECTOR-210 | ||||
333 | 'VS211' => 0xE01C2, # VARIATION SELECTOR-211 | ||||
334 | 'VS212' => 0xE01C3, # VARIATION SELECTOR-212 | ||||
335 | 'VS213' => 0xE01C4, # VARIATION SELECTOR-213 | ||||
336 | 'VS214' => 0xE01C5, # VARIATION SELECTOR-214 | ||||
337 | 'VS215' => 0xE01C6, # VARIATION SELECTOR-215 | ||||
338 | 'VS216' => 0xE01C7, # VARIATION SELECTOR-216 | ||||
339 | 'VS217' => 0xE01C8, # VARIATION SELECTOR-217 | ||||
340 | 'VS218' => 0xE01C9, # VARIATION SELECTOR-218 | ||||
341 | 'VS219' => 0xE01CA, # VARIATION SELECTOR-219 | ||||
342 | 'VS220' => 0xE01CB, # VARIATION SELECTOR-220 | ||||
343 | 'VS221' => 0xE01CC, # VARIATION SELECTOR-221 | ||||
344 | 'VS222' => 0xE01CD, # VARIATION SELECTOR-222 | ||||
345 | 'VS223' => 0xE01CE, # VARIATION SELECTOR-223 | ||||
346 | 'VS224' => 0xE01CF, # VARIATION SELECTOR-224 | ||||
347 | 'VS225' => 0xE01D0, # VARIATION SELECTOR-225 | ||||
348 | 'VS226' => 0xE01D1, # VARIATION SELECTOR-226 | ||||
349 | 'VS227' => 0xE01D2, # VARIATION SELECTOR-227 | ||||
350 | 'VS228' => 0xE01D3, # VARIATION SELECTOR-228 | ||||
351 | 'VS229' => 0xE01D4, # VARIATION SELECTOR-229 | ||||
352 | 'VS230' => 0xE01D5, # VARIATION SELECTOR-230 | ||||
353 | 'VS231' => 0xE01D6, # VARIATION SELECTOR-231 | ||||
354 | 'VS232' => 0xE01D7, # VARIATION SELECTOR-232 | ||||
355 | 'VS233' => 0xE01D8, # VARIATION SELECTOR-233 | ||||
356 | 'VS234' => 0xE01D9, # VARIATION SELECTOR-234 | ||||
357 | 'VS235' => 0xE01DA, # VARIATION SELECTOR-235 | ||||
358 | 'VS236' => 0xE01DB, # VARIATION SELECTOR-236 | ||||
359 | 'VS237' => 0xE01DC, # VARIATION SELECTOR-237 | ||||
360 | 'VS238' => 0xE01DD, # VARIATION SELECTOR-238 | ||||
361 | 'VS239' => 0xE01DE, # VARIATION SELECTOR-239 | ||||
362 | 'VS240' => 0xE01DF, # VARIATION SELECTOR-240 | ||||
363 | 'VS241' => 0xE01E0, # VARIATION SELECTOR-241 | ||||
364 | 'VS242' => 0xE01E1, # VARIATION SELECTOR-242 | ||||
365 | 'VS243' => 0xE01E2, # VARIATION SELECTOR-243 | ||||
366 | 'VS244' => 0xE01E3, # VARIATION SELECTOR-244 | ||||
367 | 'VS245' => 0xE01E4, # VARIATION SELECTOR-245 | ||||
368 | 'VS246' => 0xE01E5, # VARIATION SELECTOR-246 | ||||
369 | 'VS247' => 0xE01E6, # VARIATION SELECTOR-247 | ||||
370 | 'VS248' => 0xE01E7, # VARIATION SELECTOR-248 | ||||
371 | 'VS249' => 0xE01E8, # VARIATION SELECTOR-249 | ||||
372 | 'VS250' => 0xE01E9, # VARIATION SELECTOR-250 | ||||
373 | 'VS251' => 0xE01EA, # VARIATION SELECTOR-251 | ||||
374 | 'VS252' => 0xE01EB, # VARIATION SELECTOR-252 | ||||
375 | 'VS253' => 0xE01EC, # VARIATION SELECTOR-253 | ||||
376 | 'VS254' => 0xE01ED, # VARIATION SELECTOR-254 | ||||
377 | 'VS255' => 0xE01EE, # VARIATION SELECTOR-255 | ||||
378 | 'VS256' => 0xE01EF, # VARIATION SELECTOR-256 | ||||
379 | 'WJ' => 0x2060, # WORD JOINER | ||||
380 | 'ZWJ' => 0x200D, # ZERO WIDTH JOINER | ||||
381 | 'ZWNJ' => 0x200C, # ZERO WIDTH NON-JOINER | ||||
382 | 'ZWSP' => 0x200B, # ZERO WIDTH SPACE | ||||
383 | ); | ||||
384 | |||||
385 | 1 | 5µs | my %deprecated_aliases = ( | ||
386 | # Pre-3.2 compatibility (only for the first 256 characters). | ||||
387 | # Use of these gives deprecated message. | ||||
388 | 'HORIZONTAL TABULATION' => 0x09, # CHARACTER TABULATION | ||||
389 | 'VERTICAL TABULATION' => 0x0B, # LINE TABULATION | ||||
390 | 'FILE SEPARATOR' => 0x1C, # INFORMATION SEPARATOR FOUR | ||||
391 | 'GROUP SEPARATOR' => 0x1D, # INFORMATION SEPARATOR THREE | ||||
392 | 'RECORD SEPARATOR' => 0x1E, # INFORMATION SEPARATOR TWO | ||||
393 | 'UNIT SEPARATOR' => 0x1F, # INFORMATION SEPARATOR ONE | ||||
394 | 'HORIZONTAL TABULATION SET' => 0x88, # CHARACTER TABULATION SET | ||||
395 | 'HORIZONTAL TABULATION WITH JUSTIFICATION' => 0x89, # CHARACTER TABULATION WITH JUSTIFICATION | ||||
396 | 'PARTIAL LINE DOWN' => 0x8B, # PARTIAL LINE FORWARD | ||||
397 | 'PARTIAL LINE UP' => 0x8C, # PARTIAL LINE BACKWARD | ||||
398 | 'VERTICAL TABULATION SET' => 0x8A, # LINE TABULATION SET | ||||
399 | 'REVERSE INDEX' => 0x8D, # REVERSE LINE FEED | ||||
400 | ); | ||||
401 | |||||
402 | |||||
403 | 1 | 1µs | my $txt; # The table of official character names | ||
404 | |||||
405 | 1 | 600ns | my %full_names_cache; # Holds already-looked-up names, so don't have to | ||
406 | # re-look them up again. The previous versions of charnames had scoping | ||||
407 | # bugs. For example if we use script A in one scope and find and cache | ||||
408 | # what Z resolves to, we can't use that cache in a different scope that | ||||
409 | # uses script B instead of A, as Z might be an entirely different letter | ||||
410 | # there; or there might be different aliases in effect in different | ||||
411 | # scopes, or :short may be in effect or not effect in different scopes, | ||||
412 | # or various combinations thereof. This was solved in this version | ||||
413 | # mostly by moving things to %^H. But some things couldn't be moved | ||||
414 | # there. One of them was the cache of runtime looked-up names, in part | ||||
415 | # because %^H is read-only at runtime. I (khw) don't know why the cache | ||||
416 | # was run-time only in the previous versions: perhaps oversight; perhaps | ||||
417 | # that compile time looking doesn't happen in a loop so didn't think it | ||||
418 | # was worthwhile; perhaps not wanting to make the cache too large. But | ||||
419 | # I decided to make it compile time as well; this could easily be | ||||
420 | # changed. | ||||
421 | # Anyway, this hash is not scoped, and is added to at runtime. It | ||||
422 | # doesn't have scoping problems because the data in it is restricted to | ||||
423 | # official names, which are always invariant, and we only set it and | ||||
424 | # look at it at during :full lookups, so is unaffected by any other | ||||
425 | # scoped options. I put this in to maintain parity with the older | ||||
426 | # version. If desired, a %short_names cache could also be made, as well | ||||
427 | # as one for each script, say in %script_names_cache, with each key | ||||
428 | # being a hash for a script named in a 'use charnames' statement. I | ||||
429 | # decided not to do that for now, just because it's added complication, | ||||
430 | # and because I'm just trying to maintain parity, not extend it. | ||||
431 | |||||
432 | # Designed so that test decimal first, and then hex. Leading zeros | ||||
433 | # imply non-decimal, as do non-[0-9] | ||||
434 | 1 | 14µs | 1 | 6µs | my $decimal_qr = qr/^[1-9]\d*$/; # spent 6µs making 1 call to charnames::CORE:qr |
435 | |||||
436 | # Returns the hex number in $1. | ||||
437 | 1 | 6µs | 1 | 2µs | my $hex_qr = qr/^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/; # spent 2µs making 1 call to charnames::CORE:qr |
438 | |||||
439 | sub croak | ||||
440 | { | ||||
441 | require Carp; goto &Carp::croak; | ||||
442 | } # croak | ||||
443 | |||||
444 | sub carp | ||||
445 | { | ||||
446 | require Carp; goto &Carp::carp; | ||||
447 | } # carp | ||||
448 | |||||
449 | sub alias (@) # Set up a single alias | ||||
450 | { | ||||
451 | my $alias = ref $_[0] ? $_[0] : { @_ }; | ||||
452 | foreach my $name (keys %$alias) { | ||||
453 | my $value = $alias->{$name}; | ||||
454 | next unless defined $value; # Omit if screwed up. | ||||
455 | |||||
456 | # Is slightly slower to just after this statement see if it is | ||||
457 | # decimal, since we already know it is after having converted from | ||||
458 | # hex, but makes the code easier to maintain, and is called | ||||
459 | # infrequently, only at compile-time | ||||
460 | if ($value !~ $decimal_qr && $value =~ $hex_qr) { | ||||
461 | $value = CORE::hex $1; | ||||
462 | } | ||||
463 | if ($value =~ $decimal_qr) { | ||||
464 | $^H{charnames_ord_aliases}{$name} = $value; | ||||
465 | |||||
466 | # Use a canonical form. | ||||
467 | $^H{charnames_inverse_ords}{sprintf("%04X", $value)} = $name; | ||||
468 | } | ||||
469 | else { | ||||
470 | # XXX validate syntax when deprecation cycle complete. ie. start | ||||
471 | # with an alpha only, etc. | ||||
472 | $^H{charnames_name_aliases}{$name} = $value; | ||||
473 | } | ||||
474 | } | ||||
475 | } # alias | ||||
476 | |||||
477 | sub not_legal_use_bytes_msg { | ||||
478 | my ($name, $ord) = @_; | ||||
479 | return sprintf("Character 0x%04x with name '$name' is above 0xFF with 'use bytes' in effect", $ord); | ||||
480 | } | ||||
481 | |||||
482 | sub alias_file ($) # Reads a file containing alias definitions | ||||
483 | { | ||||
484 | my ($arg, $file) = @_; | ||||
485 | if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { | ||||
486 | $file = $arg; | ||||
487 | } | ||||
488 | elsif ($arg =~ m/^\w+$/) { | ||||
489 | $file = "unicore/${arg}_alias.pl"; | ||||
490 | } | ||||
491 | else { | ||||
492 | croak "Charnames alias files can only have identifier characters"; | ||||
493 | } | ||||
494 | if (my @alias = do $file) { | ||||
495 | @alias == 1 && !defined $alias[0] and | ||||
496 | croak "$file cannot be used as alias file for charnames"; | ||||
497 | @alias % 2 and | ||||
498 | croak "$file did not return a (valid) list of alias pairs"; | ||||
499 | alias (@alias); | ||||
500 | return (1); | ||||
501 | } | ||||
502 | 0; | ||||
503 | } # alias_file | ||||
504 | |||||
505 | # For use when don't import anything. This structure must be kept in | ||||
506 | # sync with the one that import() fills up. | ||||
507 | 1 | 3µs | my %dummy_H = ( | ||
508 | charnames_stringified_names => "", | ||||
509 | charnames_stringified_ords => "", | ||||
510 | charnames_scripts => "", | ||||
511 | charnames_full => 1, | ||||
512 | charnames_short => 0, | ||||
513 | ); | ||||
514 | |||||
515 | |||||
516 | # spent 49µs within charnames::lookup_name which was called 4 times, avg 12µs/call:
# 4 times (49µs+0s) by charnames::charnames at line 688, avg 12µs/call | ||||
517 | |||||
518 | # Finds the ordinal of a character name, first in the aliases, then in | ||||
519 | # the large table. If not found, returns undef if runtime; if | ||||
520 | # compile, complains and returns the Unicode replacement character. | ||||
521 | |||||
522 | 36 | 62µs | my $runtime = (@_ > 1); # compile vs run time | ||
523 | |||||
524 | my ($name, $hints_ref) = @_; | ||||
525 | |||||
526 | my $ord; | ||||
527 | my $save_input; | ||||
528 | |||||
529 | if ($runtime) { | ||||
530 | |||||
531 | # If we didn't import anything (which happens with 'use charnames ()', | ||||
532 | # substitute a dummy structure. | ||||
533 | $hints_ref = \%dummy_H if ! defined $hints_ref | ||||
534 | || ! defined $hints_ref->{charnames_full}; | ||||
535 | |||||
536 | # At runtime, but currently not at compile time, $^H gets | ||||
537 | # stringified, so un-stringify back to the original data structures. | ||||
538 | # These get thrown away by perl before the next invocation | ||||
539 | # Also fill in the hash with the non-stringified data. | ||||
540 | # N.B. New fields must be also added to %dummy_H | ||||
541 | |||||
542 | %{$^H{charnames_name_aliases}} = split ',', | ||||
543 | $hints_ref->{charnames_stringified_names}; | ||||
544 | %{$^H{charnames_ord_aliases}} = split ',', | ||||
545 | $hints_ref->{charnames_stringified_ords}; | ||||
546 | $^H{charnames_scripts} = $hints_ref->{charnames_scripts}; | ||||
547 | $^H{charnames_full} = $hints_ref->{charnames_full}; | ||||
548 | $^H{charnames_short} = $hints_ref->{charnames_short}; | ||||
549 | } | ||||
550 | |||||
551 | # User alias should be checked first or else can't override ours, and if we | ||||
552 | # add any, could conflict with theirs. | ||||
553 | if (exists $^H{charnames_ord_aliases}{$name}) { | ||||
554 | $ord = $^H{charnames_ord_aliases}{$name}; | ||||
555 | } | ||||
556 | elsif (exists $^H{charnames_name_aliases}{$name}) { | ||||
557 | $name = $^H{charnames_name_aliases}{$name}; | ||||
558 | $save_input = $name; # Cache the result for any error message | ||||
559 | } | ||||
560 | elsif (exists $system_aliases{$name}) { | ||||
561 | $ord = $system_aliases{$name}; | ||||
562 | } | ||||
563 | elsif (exists $deprecated_aliases{$name}) { | ||||
564 | require warnings; | ||||
565 | warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" . viacode($deprecated_aliases{$name}) . "\" instead"); | ||||
566 | $ord = $deprecated_aliases{$name}; | ||||
567 | } | ||||
568 | |||||
569 | my @off; | ||||
570 | |||||
571 | if (! defined $ord) { | ||||
572 | |||||
573 | # See if has looked this up earlier. | ||||
574 | if ($^H{charnames_full} && exists $full_names_cache{$name}) { | ||||
575 | $ord = $full_names_cache{$name}; | ||||
576 | } | ||||
577 | else { | ||||
578 | |||||
579 | ## Suck in the code/name list as a big string. | ||||
580 | ## Lines look like: | ||||
581 | ## "0052\t\tLATIN CAPITAL LETTER R\n" | ||||
582 | $txt = do "unicore/Name.pl" unless $txt; | ||||
583 | |||||
584 | ## @off will hold the index into the code/name string of the start and | ||||
585 | ## end of the name as we find it. | ||||
586 | |||||
587 | ## If :full, look for the name exactly; runtime implies full | ||||
588 | my $found_full_in_table = 0; # Tells us if can cache the result | ||||
589 | if ($^H{charnames_full}) { | ||||
590 | |||||
591 | # See if the name is one which is algorithmically determinable. | ||||
592 | # The subroutine is included in Name.pl. The table contained in | ||||
593 | # $txt doesn't contain these. Experiments show that checking | ||||
594 | # for these before checking for the regular names has no | ||||
595 | # noticeable impact on performance for the regular names, but | ||||
596 | # the other way around slows down finding these immensely. | ||||
597 | # Algorithmically determinables are not placed in the cache (that | ||||
598 | # $found_full_in_table indicates) because that uses up memory, | ||||
599 | # and finding these again is fast. | ||||
600 | if (! defined ($ord = name_to_code_point_special($name))) { | ||||
601 | |||||
602 | # Not algorthmically determinable; look up in the table. | ||||
603 | if ($txt =~ /\t\t\Q$name\E$/m) { | ||||
604 | @off = ($-[0] + 2, $+[0]); # The 2 is for the 2 tabs | ||||
605 | $found_full_in_table = 1; | ||||
606 | } | ||||
607 | } | ||||
608 | } | ||||
609 | |||||
610 | # If we didn't get it above, keep looking | ||||
611 | if (! $found_full_in_table && ! defined $ord) { | ||||
612 | |||||
613 | # If :short is allowed, see if input is like "greek:Sigma". | ||||
614 | my $scripts_trie; | ||||
615 | if (($^H{charnames_short}) | ||||
616 | && $name =~ /^ \s* (.+?) \s* : \s* (.+?) \s* $ /xs) | ||||
617 | { | ||||
618 | $scripts_trie = "\U\Q$1"; | ||||
619 | $name = $2; | ||||
620 | } | ||||
621 | else { | ||||
622 | $scripts_trie = $^H{charnames_scripts}; | ||||
623 | } | ||||
624 | |||||
625 | my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | ||||
626 | if ($txt !~ | ||||
627 | /\t\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U\Q$name\E $/xm) | ||||
628 | { | ||||
629 | # Here we still don't have it, give up. | ||||
630 | return if $runtime; | ||||
631 | |||||
632 | # May have zapped input name, get it again. | ||||
633 | $name = (defined $save_input) ? $save_input : $_[0]; | ||||
634 | carp "Unknown charname '$name'"; | ||||
635 | return 0xFFFD; | ||||
636 | } | ||||
637 | |||||
638 | @off = ($-[0] + 2, $+[0]); | ||||
639 | } | ||||
640 | |||||
641 | if (! defined $ord) { | ||||
642 | ## | ||||
643 | ## Now know where in the string the name starts. | ||||
644 | ## The code, in hex, is before that. | ||||
645 | ## | ||||
646 | ## The code can be 4-6 characters long, so we've got to sort of | ||||
647 | ## go look for it, just after the newline that comes before $off[0]. | ||||
648 | ## | ||||
649 | ## This would be much easier if unicore/Name.pl had info in | ||||
650 | ## a name/code order, instead of code/name order. | ||||
651 | ## | ||||
652 | ## The +1 after the rindex() is to skip past the newline we're finding, | ||||
653 | ## or, if the rindex() fails, to put us to an offset of zero. | ||||
654 | ## | ||||
655 | my $hexstart = rindex($txt, "\n", $off[0]) + 1; | ||||
656 | |||||
657 | ## we know where it starts, so turn into number - | ||||
658 | ## the ordinal for the char. | ||||
659 | $ord = CORE::hex substr($txt, $hexstart, $off[0] - 2 - $hexstart); | ||||
660 | } | ||||
661 | |||||
662 | # Cache the input so as to not have to search the large table | ||||
663 | # again, but only if it came from the one search that we cache. | ||||
664 | $full_names_cache{$name} = $ord if $found_full_in_table; | ||||
665 | } | ||||
666 | } | ||||
667 | |||||
668 | return $ord if $runtime || $ord <= 255 || ! ($^H & $bytes::hint_bits); | ||||
669 | |||||
670 | # Here is compile time, "use bytes" is in effect, and the character | ||||
671 | # won't fit in a byte | ||||
672 | # Prefer any official name over the input one. | ||||
673 | if (@off) { | ||||
674 | $name = substr($txt, $off[0], $off[1] - $off[0]) if @off; | ||||
675 | } | ||||
676 | else { | ||||
677 | $name = (defined $save_input) ? $save_input : $_[0]; | ||||
678 | } | ||||
679 | croak not_legal_use_bytes_msg($name, $ord); | ||||
680 | } # lookup_name | ||||
681 | |||||
682 | # spent 126µs (65+61) within charnames::charnames which was called 4 times, avg 31µs/call:
# 2 times (33µs+27µs) by Hailo::Command::__ANON__[lib/Hailo/Command.pm:307] at line 17 of File/CountLines.pm, avg 30µs/call
# once (19µs+20µs) by Hailo::Command::__ANON__[lib/Hailo/Command.pm:307] at line 15 of File/CountLines.pm
# once (13µs+14µs) by Hailo::Command::__ANON__[lib/Hailo/Command.pm:307] at line 16 of File/CountLines.pm | ||||
683 | 20 | 69µs | my $name = shift; | ||
684 | |||||
685 | # For \N{...}. Looks up the character name and returns its ordinal if | ||||
686 | # found, undef otherwise. If not in 'use bytes', forces into utf8 | ||||
687 | |||||
688 | 4 | 49µs | my $ord = lookup_name($name); # spent 49µs making 4 calls to charnames::lookup_name, avg 12µs/call | ||
689 | return if ! defined $ord; | ||||
690 | return chr $ord if $^H & $bytes::hint_bits; | ||||
691 | |||||
692 | 2 | 679µs | 2 | 68µs | # spent 41µs (14+27) within charnames::BEGIN@692 which was called:
# once (14µs+27µs) by File::CountLines::BEGIN@12 at line 692 # spent 41µs making 1 call to charnames::BEGIN@692
# spent 27µs making 1 call to warnings::unimport |
693 | 4 | 12µs | return pack "U", $ord; # spent 12µs making 4 calls to charnames::CORE:pack, avg 3µs/call | ||
694 | } | ||||
695 | |||||
696 | sub import | ||||
697 | # spent 259µs (54+204) within charnames::import which was called:
# once (54µs+204µs) by File::CountLines::BEGIN@12 at line 12 of File/CountLines.pm | ||||
698 | 21 | 55µs | shift; ## ignore class name | ||
699 | |||||
700 | if (not @_) { | ||||
701 | carp("`use charnames' needs explicit imports list"); | ||||
702 | } | ||||
703 | $^H{charnames} = \&charnames ; | ||||
704 | $^H{charnames_ord_aliases} = {}; | ||||
705 | $^H{charnames_name_aliases} = {}; | ||||
706 | $^H{charnames_inverse_ords} = {}; | ||||
707 | # New fields must be added to %dummy_H, and the code in lookup_name() | ||||
708 | # that copies fields from the runtime structure | ||||
709 | |||||
710 | ## | ||||
711 | ## fill %h keys with our @_ args. | ||||
712 | ## | ||||
713 | my ($promote, %h, @args) = (0); | ||||
714 | while (my $arg = shift) { | ||||
715 | if ($arg eq ":alias") { | ||||
716 | @_ or | ||||
717 | croak ":alias needs an argument in charnames"; | ||||
718 | my $alias = shift; | ||||
719 | if (ref $alias) { | ||||
720 | ref $alias eq "HASH" or | ||||
721 | croak "Only HASH reference supported as argument to :alias"; | ||||
722 | alias ($alias); | ||||
723 | next; | ||||
724 | } | ||||
725 | if ($alias =~ m{:(\w+)$}) { | ||||
726 | $1 eq "full" || $1 eq "short" and | ||||
727 | croak ":alias cannot use existing pragma :$1 (reversed order?)"; | ||||
728 | alias_file ($1) and $promote = 1; | ||||
729 | next; | ||||
730 | } | ||||
731 | alias_file ($alias); | ||||
732 | next; | ||||
733 | } | ||||
734 | if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) { | ||||
735 | warn "unsupported special '$arg' in charnames"; | ||||
736 | next; | ||||
737 | } | ||||
738 | push @args, $arg; | ||||
739 | } | ||||
740 | @args == 0 && $promote and @args = (":full"); | ||||
741 | @h{@args} = (1) x @args; | ||||
742 | |||||
743 | $^H{charnames_full} = delete $h{':full'} || 0; # Don't leave undefined, | ||||
744 | # as tested for in | ||||
745 | # lookup_names | ||||
746 | $^H{charnames_short} = delete $h{':short'} || 0; | ||||
747 | my @scripts = map uc, keys %h; | ||||
748 | |||||
749 | ## | ||||
750 | ## If utf8? warnings are enabled, and some scripts were given, | ||||
751 | ## see if at least we can find one letter from each script. | ||||
752 | ## | ||||
753 | 1 | 204µs | if (warnings::enabled('utf8') && @scripts) { # spent 204µs making 1 call to warnings::enabled | ||
754 | $txt = do "unicore/Name.pl" unless $txt; | ||||
755 | |||||
756 | for my $script (@scripts) { | ||||
757 | if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) { | ||||
758 | warnings::warn('utf8', "No such script: '$script'"); | ||||
759 | $script = quotemeta $script; # Escape it, for use in the re. | ||||
760 | } | ||||
761 | } | ||||
762 | } | ||||
763 | |||||
764 | # %^H gets stringified, so serialize it ourselves so can extract the | ||||
765 | # real data back later. | ||||
766 | $^H{charnames_stringified_ords} = join ",", %{$^H{charnames_ord_aliases}}; | ||||
767 | $^H{charnames_stringified_names} = join ",", %{$^H{charnames_name_aliases}}; | ||||
768 | $^H{charnames_stringified_inverse_ords} = join ",", %{$^H{charnames_inverse_ords}}; | ||||
769 | $^H{charnames_scripts} = join "|", @scripts; # Stringifiy them as a trie | ||||
770 | } # import | ||||
771 | |||||
772 | # Cache of already looked-up values. This is set to only contain | ||||
773 | # official values, and user aliases can't override them, so scoping is | ||||
774 | # not an issue. | ||||
775 | 1 | 1µs | my %viacode; | ||
776 | |||||
777 | sub viacode { | ||||
778 | |||||
779 | # Returns the name of the code point argument | ||||
780 | |||||
781 | if (@_ != 1) { | ||||
782 | carp "charnames::viacode() expects one argument"; | ||||
783 | return; | ||||
784 | } | ||||
785 | |||||
786 | my $arg = shift; | ||||
787 | |||||
788 | # This is derived from Unicode::UCD, where it is nearly the same as the | ||||
789 | # function _getcode(), but here it makes sure that even a hex argument | ||||
790 | # has the proper number of leading zeros, which is critical in | ||||
791 | # matching against $txt below | ||||
792 | # Must check if decimal first; see comments at that definition | ||||
793 | my $hex; | ||||
794 | if ($arg =~ $decimal_qr) { | ||||
795 | $hex = sprintf "%04X", $arg; | ||||
796 | } elsif ($arg =~ $hex_qr) { | ||||
797 | # Below is the line that differs from the _getcode() source | ||||
798 | $hex = sprintf "%04X", hex $1; | ||||
799 | } else { | ||||
800 | carp("unexpected arg \"$arg\" to charnames::viacode()"); | ||||
801 | return; | ||||
802 | } | ||||
803 | |||||
804 | return $viacode{$hex} if exists $viacode{$hex}; | ||||
805 | |||||
806 | # If the code point is above the max in the table, there's no point | ||||
807 | # looking through it. Checking the length first is slightly faster | ||||
808 | if (length($hex) <= 5 || CORE::hex($hex) <= 0x10FFFF) { | ||||
809 | $txt = do "unicore/Name.pl" unless $txt; | ||||
810 | |||||
811 | # See if the name is algorithmically determinable. | ||||
812 | my $algorithmic = code_point_to_name_special(CORE::hex $hex); | ||||
813 | if (defined $algorithmic) { | ||||
814 | $viacode{$hex} = $algorithmic; | ||||
815 | return $algorithmic; | ||||
816 | } | ||||
817 | |||||
818 | # Return the official name, if exists. It's unclear to me (khw) at | ||||
819 | # this juncture if it is better to return a user-defined override, so | ||||
820 | # leaving it as is for now. | ||||
821 | if ($txt =~ m/^$hex\t\t/m) { | ||||
822 | |||||
823 | # The name starts with the next character and goes up to the | ||||
824 | # next new-line. Using capturing parentheses above instead of | ||||
825 | # @+ more than doubles the execution time in Perl 5.13 | ||||
826 | $viacode{$hex} = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]); | ||||
827 | return $viacode{$hex}; | ||||
828 | } | ||||
829 | } | ||||
830 | |||||
831 | # See if there is a user name for it, before giving up completely. | ||||
832 | # First get the scoped aliases, give up if have none. | ||||
833 | my $H_ref = (caller(0))[10]; | ||||
834 | return if ! defined $H_ref | ||||
835 | || ! exists $H_ref->{charnames_stringified_inverse_ords}; | ||||
836 | |||||
837 | my %code_point_aliases = split ',', | ||||
838 | $H_ref->{charnames_stringified_inverse_ords}; | ||||
839 | if (! exists $code_point_aliases{$hex}) { | ||||
840 | if (CORE::hex($hex) > 0x10FFFF) { | ||||
841 | carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; | ||||
842 | } | ||||
843 | return; | ||||
844 | } | ||||
845 | |||||
846 | return $code_point_aliases{$hex}; | ||||
847 | } # viacode | ||||
848 | |||||
849 | sub vianame | ||||
850 | { | ||||
851 | if (@_ != 1) { | ||||
852 | carp "charnames::vianame() expects one name argument"; | ||||
853 | return () | ||||
854 | } | ||||
855 | |||||
856 | # Looks up the character name and returns its ordinal if | ||||
857 | # found, undef otherwise. | ||||
858 | |||||
859 | my $arg = shift; | ||||
860 | |||||
861 | if ($arg =~ /^U\+([0-9a-fA-F]+)$/) { | ||||
862 | |||||
863 | # khw claims that this is bad. The function should return either a | ||||
864 | # an ord or a chr for all inputs; not be bipolar. | ||||
865 | my $ord = CORE::hex $1; | ||||
866 | return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits); | ||||
867 | carp not_legal_use_bytes_msg($arg, $ord); | ||||
868 | return; | ||||
869 | } | ||||
870 | |||||
871 | return lookup_name($arg, (caller(0))[10]); | ||||
872 | } # vianame | ||||
873 | |||||
874 | |||||
875 | 1 | 45µs | 1; | ||
876 | __END__ | ||||
# spent 12µs within charnames::CORE:pack which was called 4 times, avg 3µs/call:
# 4 times (12µs+0s) by charnames::charnames at line 693, avg 3µs/call | |||||
sub charnames::CORE:qr; # opcode |