File | /usr/local/lib/perl/5.10.0/Encode/Encoding.pm |
Statements Executed | 12 |
Total Time | 0.0005711 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
0 | 0 | 0 | 0s | 0s | BEGIN | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | DEBUG | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | DESTROY | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | Define | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | decode | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | encode | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | fromUnicode | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | mime_name | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | name | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | needs_lines | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | perlio_ok | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | renew | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | renewed | Encode::Encoding::
0 | 0 | 0 | 0s | 0s | toUnicode | Encode::Encoding::
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package Encode::Encoding; | |||
2 | ||||
3 | # Base class for classes which implement encodings | |||
4 | 3 | 25µs | 8µs | use strict; # spent 8µs making 1 call to strict::import |
5 | 3 | 519µs | 173µs | use warnings; # spent 32µs making 1 call to warnings::import |
6 | 3 | 18µs | 6µs | our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; |
7 | ||||
8 | 1 | 600ns | 600ns | require Encode; |
9 | ||||
10 | sub DEBUG { 0 } | |||
11 | ||||
12 | sub Define { | |||
13 | my $obj = shift; | |||
14 | my $canonical = shift; | |||
15 | $obj = bless { Name => $canonical }, $obj unless ref $obj; | |||
16 | ||||
17 | # warn "$canonical => $obj\n"; | |||
18 | Encode::define_encoding( $obj, $canonical, @_ ); | |||
19 | } | |||
20 | ||||
21 | sub name { return shift->{'Name'} } | |||
22 | ||||
23 | sub mime_name{ | |||
24 | require Encode::MIME::Name; | |||
25 | return Encode::MIME::Name::get_mime_name(shift->name); | |||
26 | } | |||
27 | ||||
28 | # sub renew { return $_[0] } | |||
29 | ||||
30 | sub renew { | |||
31 | my $self = shift; | |||
32 | my $clone = bless {%$self} => ref($self); | |||
33 | $clone->{renewed}++; # so the caller can see it | |||
34 | DEBUG and warn $clone->{renewed}; | |||
35 | return $clone; | |||
36 | } | |||
37 | ||||
38 | sub renewed { return $_[0]->{renewed} || 0 } | |||
39 | ||||
40 | 1 | 1µs | 1µs | *new_sequence = \&renew; |
41 | ||||
42 | sub needs_lines { 0 } | |||
43 | ||||
44 | sub perlio_ok { | |||
45 | eval { require PerlIO::encoding }; | |||
46 | return $@ ? 0 : 1; | |||
47 | } | |||
48 | ||||
49 | # (Temporary|legacy) methods | |||
50 | ||||
51 | sub toUnicode { shift->decode(@_) } | |||
52 | sub fromUnicode { shift->encode(@_) } | |||
53 | ||||
54 | # | |||
55 | # Needs to be overloaded or just croak | |||
56 | # | |||
57 | ||||
58 | sub encode { | |||
59 | require Carp; | |||
60 | my $obj = shift; | |||
61 | my $class = ref($obj) ? ref($obj) : $obj; | |||
62 | Carp::croak( $class . "->encode() not defined!" ); | |||
63 | } | |||
64 | ||||
65 | sub decode { | |||
66 | require Carp; | |||
67 | my $obj = shift; | |||
68 | my $class = ref($obj) ? ref($obj) : $obj; | |||
69 | Carp::croak( $class . "->encode() not defined!" ); | |||
70 | } | |||
71 | ||||
72 | sub DESTROY { } | |||
73 | ||||
74 | 1 | 7µs | 7µs | 1; |
75 | __END__ | |||
76 | ||||
77 | =head1 NAME | |||
78 | ||||
79 | Encode::Encoding - Encode Implementation Base Class | |||
80 | ||||
81 | =head1 SYNOPSIS | |||
82 | ||||
83 | package Encode::MyEncoding; | |||
84 | use base qw(Encode::Encoding); | |||
85 | ||||
86 | __PACKAGE__->Define(qw(myCanonical myAlias)); | |||
87 | ||||
88 | =head1 DESCRIPTION | |||
89 | ||||
90 | As mentioned in L<Encode>, encodings are (in the current | |||
91 | implementation at least) defined as objects. The mapping of encoding | |||
92 | name to object is via the C<%Encode::Encoding> hash. Though you can | |||
93 | directly manipulate this hash, it is strongly encouraged to use this | |||
94 | base class module and add encode() and decode() methods. | |||
95 | ||||
96 | =head2 Methods you should implement | |||
97 | ||||
98 | You are strongly encouraged to implement methods below, at least | |||
99 | either encode() or decode(). | |||
100 | ||||
101 | =over 4 | |||
102 | ||||
103 | =item -E<gt>encode($string [,$check]) | |||
104 | ||||
105 | MUST return the octet sequence representing I<$string>. | |||
106 | ||||
107 | =over 2 | |||
108 | ||||
109 | =item * | |||
110 | ||||
111 | If I<$check> is true, it SHOULD modify I<$string> in place to remove | |||
112 | the converted part (i.e. the whole string unless there is an error). | |||
113 | If perlio_ok() is true, SHOULD becomes MUST. | |||
114 | ||||
115 | =item * | |||
116 | ||||
117 | If an error occurs, it SHOULD return the octet sequence for the | |||
118 | fragment of string that has been converted and modify $string in-place | |||
119 | to remove the converted part leaving it starting with the problem | |||
120 | fragment. If perlio_ok() is true, SHOULD becomes MUST. | |||
121 | ||||
122 | =item * | |||
123 | ||||
124 | If I<$check> is is false then C<encode> MUST make a "best effort" to | |||
125 | convert the string - for example, by using a replacement character. | |||
126 | ||||
127 | =back | |||
128 | ||||
129 | =item -E<gt>decode($octets [,$check]) | |||
130 | ||||
131 | MUST return the string that I<$octets> represents. | |||
132 | ||||
133 | =over 2 | |||
134 | ||||
135 | =item * | |||
136 | ||||
137 | If I<$check> is true, it SHOULD modify I<$octets> in place to remove | |||
138 | the converted part (i.e. the whole sequence unless there is an | |||
139 | error). If perlio_ok() is true, SHOULD becomes MUST. | |||
140 | ||||
141 | =item * | |||
142 | ||||
143 | If an error occurs, it SHOULD return the fragment of string that has | |||
144 | been converted and modify $octets in-place to remove the converted | |||
145 | part leaving it starting with the problem fragment. If perlio_ok() is | |||
146 | true, SHOULD becomes MUST. | |||
147 | ||||
148 | =item * | |||
149 | ||||
150 | If I<$check> is false then C<decode> should make a "best effort" to | |||
151 | convert the string - for example by using Unicode's "\x{FFFD}" as a | |||
152 | replacement character. | |||
153 | ||||
154 | =back | |||
155 | ||||
156 | =back | |||
157 | ||||
158 | If you want your encoding to work with L<encoding> pragma, you should | |||
159 | also implement the method below. | |||
160 | ||||
161 | =over 4 | |||
162 | ||||
163 | =item -E<gt>cat_decode($destination, $octets, $offset, $terminator [,$check]) | |||
164 | ||||
165 | MUST decode I<$octets> with I<$offset> and concatenate it to I<$destination>. | |||
166 | Decoding will terminate when $terminator (a string) appears in output. | |||
167 | I<$offset> will be modified to the last $octets position at end of decode. | |||
168 | Returns true if $terminator appears output, else returns false. | |||
169 | ||||
170 | =back | |||
171 | ||||
172 | =head2 Other methods defined in Encode::Encodings | |||
173 | ||||
174 | You do not have to override methods shown below unless you have to. | |||
175 | ||||
176 | =over 4 | |||
177 | ||||
178 | =item -E<gt>name | |||
179 | ||||
180 | Predefined As: | |||
181 | ||||
182 | sub name { return shift->{'Name'} } | |||
183 | ||||
184 | MUST return the string representing the canonical name of the encoding. | |||
185 | ||||
186 | =item -E<gt>mime_name | |||
187 | ||||
188 | Predefined As: | |||
189 | ||||
190 | sub mime_name{ | |||
191 | require Encode::MIME::Name; | |||
192 | return Encode::MIME::Name::get_mime_name(shift->name); | |||
193 | } | |||
194 | ||||
195 | MUST return the string representing the IANA charset name of the encoding. | |||
196 | ||||
197 | =item -E<gt>renew | |||
198 | ||||
199 | Predefined As: | |||
200 | ||||
201 | sub renew { | |||
202 | my $self = shift; | |||
203 | my $clone = bless { %$self } => ref($self); | |||
204 | $clone->{renewed}++; | |||
205 | return $clone; | |||
206 | } | |||
207 | ||||
208 | This method reconstructs the encoding object if necessary. If you need | |||
209 | to store the state during encoding, this is where you clone your object. | |||
210 | ||||
211 | PerlIO ALWAYS calls this method to make sure it has its own private | |||
212 | encoding object. | |||
213 | ||||
214 | =item -E<gt>renewed | |||
215 | ||||
216 | Predefined As: | |||
217 | ||||
218 | sub renewed { $_[0]->{renewed} || 0 } | |||
219 | ||||
220 | Tells whether the object is renewed (and how many times). Some | |||
221 | modules emit C<Use of uninitialized value in null operation> warning | |||
222 | unless the value is numeric so return 0 for false. | |||
223 | ||||
224 | =item -E<gt>perlio_ok() | |||
225 | ||||
226 | Predefined As: | |||
227 | ||||
228 | sub perlio_ok { | |||
229 | eval{ require PerlIO::encoding }; | |||
230 | return $@ ? 0 : 1; | |||
231 | } | |||
232 | ||||
233 | If your encoding does not support PerlIO for some reasons, just; | |||
234 | ||||
235 | sub perlio_ok { 0 } | |||
236 | ||||
237 | =item -E<gt>needs_lines() | |||
238 | ||||
239 | Predefined As: | |||
240 | ||||
241 | sub needs_lines { 0 }; | |||
242 | ||||
243 | If your encoding can work with PerlIO but needs line buffering, you | |||
244 | MUST define this method so it returns true. 7bit ISO-2022 encodings | |||
245 | are one example that needs this. When this method is missing, false | |||
246 | is assumed. | |||
247 | ||||
248 | =back | |||
249 | ||||
250 | =head2 Example: Encode::ROT13 | |||
251 | ||||
252 | package Encode::ROT13; | |||
253 | use strict; | |||
254 | use base qw(Encode::Encoding); | |||
255 | ||||
256 | __PACKAGE__->Define('rot13'); | |||
257 | ||||
258 | sub encode($$;$){ | |||
259 | my ($obj, $str, $chk) = @_; | |||
260 | $str =~ tr/A-Za-z/N-ZA-Mn-za-m/; | |||
261 | $_[1] = '' if $chk; # this is what in-place edit means | |||
262 | return $str; | |||
263 | } | |||
264 | ||||
265 | # Jr pna or ynml yvxr guvf; | |||
266 | *decode = \&encode; | |||
267 | ||||
268 | 1; | |||
269 | ||||
270 | =head1 Why the heck Encode API is different? | |||
271 | ||||
272 | It should be noted that the I<$check> behaviour is different from the | |||
273 | outer public API. The logic is that the "unchecked" case is useful | |||
274 | when the encoding is part of a stream which may be reporting errors | |||
275 | (e.g. STDERR). In such cases, it is desirable to get everything | |||
276 | through somehow without causing additional errors which obscure the | |||
277 | original one. Also, the encoding is best placed to know what the | |||
278 | correct replacement character is, so if that is the desired behaviour | |||
279 | then letting low level code do it is the most efficient. | |||
280 | ||||
281 | By contrast, if I<$check> is true, the scheme above allows the | |||
282 | encoding to do as much as it can and tell the layer above how much | |||
283 | that was. What is lacking at present is a mechanism to report what | |||
284 | went wrong. The most likely interface will be an additional method | |||
285 | call to the object, or perhaps (to avoid forcing per-stream objects | |||
286 | on otherwise stateless encodings) an additional parameter. | |||
287 | ||||
288 | It is also highly desirable that encoding classes inherit from | |||
289 | C<Encode::Encoding> as a base class. This allows that class to define | |||
290 | additional behaviour for all encoding objects. | |||
291 | ||||
292 | package Encode::MyEncoding; | |||
293 | use base qw(Encode::Encoding); | |||
294 | ||||
295 | __PACKAGE__->Define(qw(myCanonical myAlias)); | |||
296 | ||||
297 | to create an object with C<< bless {Name => ...}, $class >>, and call | |||
298 | define_encoding. They inherit their C<name> method from | |||
299 | C<Encode::Encoding>. | |||
300 | ||||
301 | =head2 Compiled Encodings | |||
302 | ||||
303 | For the sake of speed and efficiency, most of the encodings are now | |||
304 | supported via a I<compiled form>: XS modules generated from UCM | |||
305 | files. Encode provides the enc2xs tool to achieve that. Please see | |||
306 | L<enc2xs> for more details. | |||
307 | ||||
308 | =head1 SEE ALSO | |||
309 | ||||
310 | L<perlmod>, L<enc2xs> | |||
311 | ||||
312 | =begin future | |||
313 | ||||
314 | =over 4 | |||
315 | ||||
316 | =item Scheme 1 | |||
317 | ||||
318 | The fixup routine gets passed the remaining fragment of string being | |||
319 | processed. It modifies it in place to remove bytes/characters it can | |||
320 | understand and returns a string used to represent them. For example: | |||
321 | ||||
322 | sub fixup { | |||
323 | my $ch = substr($_[0],0,1,''); | |||
324 | return sprintf("\x{%02X}",ord($ch); | |||
325 | } | |||
326 | ||||
327 | This scheme is close to how the underlying C code for Encode works, | |||
328 | but gives the fixup routine very little context. | |||
329 | ||||
330 | =item Scheme 2 | |||
331 | ||||
332 | The fixup routine gets passed the original string, an index into | |||
333 | it of the problem area, and the output string so far. It appends | |||
334 | what it wants to the output string and returns a new index into the | |||
335 | original string. For example: | |||
336 | ||||
337 | sub fixup { | |||
338 | # my ($s,$i,$d) = @_; | |||
339 | my $ch = substr($_[0],$_[1],1); | |||
340 | $_[2] .= sprintf("\x{%02X}",ord($ch); | |||
341 | return $_[1]+1; | |||
342 | } | |||
343 | ||||
344 | This scheme gives maximal control to the fixup routine but is more | |||
345 | complicated to code, and may require that the internals of Encode be tweaked to | |||
346 | keep the original string intact. | |||
347 | ||||
348 | =item Other Schemes | |||
349 | ||||
350 | Hybrids of the above. | |||
351 | ||||
352 | Multiple return values rather than in-place modifications. | |||
353 | ||||
354 | Index into the string could be C<pos($str)> allowing C<s/\G...//>. | |||
355 | ||||
356 | =back | |||
357 | ||||
358 | =end future | |||
359 | ||||
360 | =cut |