← Index
Performance Profile   « block view • line view • sub view »
For t/test-parsing
  Run on Sun Nov 14 09:49:57 2010
Reported on Sun Nov 14 09:50:09 2010

File /usr/share/perl5/MARC/Charset/Code.pm
Statements Executed 20
Total Time 0.0009115 seconds
Subroutines — ordered by exclusive time
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
0000s0sMARC::Charset::Code::::BEGINMARC::Charset::Code::BEGIN
0000s0sMARC::Charset::Code::::char_valueMARC::Charset::Code::char_value
0000s0sMARC::Charset::Code::::charset_nameMARC::Charset::Code::charset_name
0000s0sMARC::Charset::Code::::charset_valueMARC::Charset::Code::charset_value
0000s0sMARC::Charset::Code::::default_charset_groupMARC::Charset::Code::default_charset_group
0000s0sMARC::Charset::Code::::get_escapeMARC::Charset::Code::get_escape
0000s0sMARC::Charset::Code::::marc8_hash_codeMARC::Charset::Code::marc8_hash_code
0000s0sMARC::Charset::Code::::marc_valueMARC::Charset::Code::marc_value
0000s0sMARC::Charset::Code::::to_stringMARC::Charset::Code::to_string
0000s0sMARC::Charset::Code::::utf8_hash_codeMARC::Charset::Code::utf8_hash_code
LineStmts.Exclusive
Time
Avg.Code
1package MARC::Charset::Code;
2
3327µs9µsuse strict;
# spent 10µs making 1 call to strict::import
4332µs10µsuse warnings;
# spent 28µs making 1 call to warnings::import
5342µs14µsuse base qw(Class::Accessor);
# spent 1.73ms making 1 call to base::import
6335µs12µsuse Carp qw(croak);
# spent 44µs making 1 call to Exporter::import
7333µs11µsuse Encode qw(encode_utf8);
# spent 44µs making 1 call to Exporter::import
83720µs240µsuse MARC::Charset::Constants qw(:all);
# spent 278µs making 1 call to Exporter::import
9
10114µs14µsMARC::Charset::Code
# spent 288µs making 1 call to Class::Accessor::mk_accessors
11 ->mk_accessors(qw(marc ucs name charset is_combining alt));
12
13=head1 NAME
14
15MARC::Charset::Code - represents a MARC-8/UTF-8 mapping
16
17=head1 SYNOPSIS
18
19=head1 DESCRIPTION
20
21Each mapping from a MARC-8 value to a UTF-8 value is represented by
22a MARC::Charset::Code object in a MARC::Charset::Table.
23
24=head1 METHODS
25
26=head2 new()
27
28The constructor.
29
30=head2 name()
31
32A descriptive name for the code point.
33
34=head2 marc()
35
36A string representing the MARC-8 bytes codes.
37
38=head2 ucs()
39
40A string representing the UCS code point in hex.
41
42=head2 charset_code()
43
44The MARC-8 character set code.
45
46=head2 is_combining()
47
48Returns true/false to tell if the character is a combining character.
49
50=head2 to_string()
51
52A stringified version of the object suitable for pretty printing.
53
54=head2 char_value()
55
56Returns the unicode character. Essentially just a helper around
57ucs().
58
59=cut
60
61sub char_value()
62{
63 return chr(hex(shift->ucs()));
64}
65
66=head2 marc_value()
67
68The string representing the MARC-8 encoding.
69
70=cut
71
72sub marc_value
73{
74 my $code = shift;
75 my $marc = $code->marc();
76 return chr(hex($marc)) unless $code->charset_name eq 'CJK';
77 return
78 chr(hex(substr($marc,0,2))) .
79 chr(hex(substr($marc,2,2))) .
80 chr(hex(substr($marc,4,2)));
81}
82
83
84=head2 charset_name()
85
86Returns the name of the character set, instead of the code.
87
88=cut
89
90sub charset_name()
91{
92 return MARC::Charset::Constants::charset_name(shift->charset_value());
93}
94
95=head2 to_string()
96
97Returns a stringified version of the object.
98
99=cut
100
101sub to_string
102{
103 my $self = shift;
104 my $str =
105 $self->name() . ': ' .
106 'charset_code=' . $self->charset() . ' ' .
107 'marc=' . $self->marc() . ' ' .
108 'ucs=' . $self->ucs() . ' ';
109
110 $str .= ' combining' if $self->is_combining();
111 return $str;
112}
113
114
115=head2 marc8_hash_code()
116
117Returns a hash code for this Code object for looking up the object using
118MARC8. First portion is the character set code and the second is the
119MARC-8 value.
120
121=cut
122
123sub marc8_hash_code
124{
125 my $self = shift;
126 return sprintf('%s:%s', $self->charset_value(), $self->marc_value());
127}
128
129
130=head2 utf8_hash_code()
131
132Returns a hash code for uniquely identifying a Code by it's UCS value.
133
134=cut
135
136sub utf8_hash_code
137{
138 return int(hex(shift->ucs()));
139}
140
141
142=head2 default_charset_group
143
144Returns 'G0' or 'G1' indicating where the character is typicalling used
145in the MARC-8 environment.
146
147=cut
148
149sub default_charset_group
150{
151 my $charset = shift->charset_value();
152
153 return 'G0'
154 if $charset eq ASCII_DEFAULT
155 or $charset eq GREEK_SYMBOLS
156 or $charset eq SUBSCRIPTS
157 or $charset eq SUPERSCRIPTS
158 or $charset eq BASIC_LATIN
159 or $charset eq BASIC_ARABIC
160 or $charset eq BASIC_CYRILLIC
161 or $charset eq BASIC_GREEK
162 or $charset eq BASIC_HEBREW
163 or $charset eq CJK;
164
165 return 'G1';
166}
167
168
169=head2 get_marc8_escape
170
171Returns an escape sequence to move to the Code from another marc-8 character
172set.
173
174=cut
175
176sub get_escape
177{
178 my $charset = shift->charset_value();
179
180 return ESCAPE . $charset
181 if $charset eq ASCII_DEFAULT
182 or $charset eq GREEK_SYMBOLS
183 or $charset eq SUBSCRIPTS
184 or $charset eq SUPERSCRIPTS;
185
186 return ESCAPE . SINGLE_G0_A . $charset
187 if $charset eq ASCII_DEFAULT
188 or $charset eq BASIC_LATIN
189 or $charset eq BASIC_ARABIC
190 or $charset eq BASIC_CYRILLIC
191 or $charset eq BASIC_GREEK
192 or $charset eq BASIC_HEBREW;
193
194 return ESCAPE . SINGLE_G1_A . $charset
195 if $charset eq EXTENDED_ARABIC
196 or $charset eq EXTENDED_LATIN
197 or $charset eq EXTENDED_CYRILLIC;
198
199 return ESCAPE . MULTI_G0_A . CJK
200 if $charset eq CJK;
201}
202
203=head2 charset_value
204
205Returns the charset value, not the hex sequence.
206
207=cut
208
209sub charset_value
210{
211 return chr(hex(shift->charset()));
212}
213
214
215
21619µs9µs1;