File | /usr/share/perl5/MARC/Charset/Code.pm |
Statements Executed | 20 |
Total Time | 0.0009115 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
0 | 0 | 0 | 0s | 0s | BEGIN | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | char_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | charset_name | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | charset_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | default_charset_group | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | get_escape | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | marc8_hash_code | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | marc_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | to_string | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | utf8_hash_code | MARC::Charset::Code::
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package MARC::Charset::Code; | |||
2 | ||||
3 | 3 | 27µs | 9µs | use strict; # spent 10µs making 1 call to strict::import |
4 | 3 | 32µs | 10µs | use warnings; # spent 28µs making 1 call to warnings::import |
5 | 3 | 42µs | 14µs | use base qw(Class::Accessor); # spent 1.73ms making 1 call to base::import |
6 | 3 | 35µs | 12µs | use Carp qw(croak); # spent 44µs making 1 call to Exporter::import |
7 | 3 | 33µs | 11µs | use Encode qw(encode_utf8); # spent 44µs making 1 call to Exporter::import |
8 | 3 | 720µs | 240µs | use MARC::Charset::Constants qw(:all); # spent 278µs making 1 call to Exporter::import |
9 | ||||
10 | 1 | 14µs | 14µs | MARC::Charset::Code # spent 288µs making 1 call to Class::Accessor::mk_accessors |
11 | ->mk_accessors(qw(marc ucs name charset is_combining alt)); | |||
12 | ||||
13 | =head1 NAME | |||
14 | ||||
15 | MARC::Charset::Code - represents a MARC-8/UTF-8 mapping | |||
16 | ||||
17 | =head1 SYNOPSIS | |||
18 | ||||
19 | =head1 DESCRIPTION | |||
20 | ||||
21 | Each mapping from a MARC-8 value to a UTF-8 value is represented by | |||
22 | a MARC::Charset::Code object in a MARC::Charset::Table. | |||
23 | ||||
24 | =head1 METHODS | |||
25 | ||||
26 | =head2 new() | |||
27 | ||||
28 | The constructor. | |||
29 | ||||
30 | =head2 name() | |||
31 | ||||
32 | A descriptive name for the code point. | |||
33 | ||||
34 | =head2 marc() | |||
35 | ||||
36 | A string representing the MARC-8 bytes codes. | |||
37 | ||||
38 | =head2 ucs() | |||
39 | ||||
40 | A string representing the UCS code point in hex. | |||
41 | ||||
42 | =head2 charset_code() | |||
43 | ||||
44 | The MARC-8 character set code. | |||
45 | ||||
46 | =head2 is_combining() | |||
47 | ||||
48 | Returns true/false to tell if the character is a combining character. | |||
49 | ||||
50 | =head2 to_string() | |||
51 | ||||
52 | A stringified version of the object suitable for pretty printing. | |||
53 | ||||
54 | =head2 char_value() | |||
55 | ||||
56 | Returns the unicode character. Essentially just a helper around | |||
57 | ucs(). | |||
58 | ||||
59 | =cut | |||
60 | ||||
61 | sub char_value() | |||
62 | { | |||
63 | return chr(hex(shift->ucs())); | |||
64 | } | |||
65 | ||||
66 | =head2 marc_value() | |||
67 | ||||
68 | The string representing the MARC-8 encoding. | |||
69 | ||||
70 | =cut | |||
71 | ||||
72 | sub marc_value | |||
73 | { | |||
74 | my $code = shift; | |||
75 | my $marc = $code->marc(); | |||
76 | return chr(hex($marc)) unless $code->charset_name eq 'CJK'; | |||
77 | return | |||
78 | chr(hex(substr($marc,0,2))) . | |||
79 | chr(hex(substr($marc,2,2))) . | |||
80 | chr(hex(substr($marc,4,2))); | |||
81 | } | |||
82 | ||||
83 | ||||
84 | =head2 charset_name() | |||
85 | ||||
86 | Returns the name of the character set, instead of the code. | |||
87 | ||||
88 | =cut | |||
89 | ||||
90 | sub charset_name() | |||
91 | { | |||
92 | return MARC::Charset::Constants::charset_name(shift->charset_value()); | |||
93 | } | |||
94 | ||||
95 | =head2 to_string() | |||
96 | ||||
97 | Returns a stringified version of the object. | |||
98 | ||||
99 | =cut | |||
100 | ||||
101 | sub to_string | |||
102 | { | |||
103 | my $self = shift; | |||
104 | my $str = | |||
105 | $self->name() . ': ' . | |||
106 | 'charset_code=' . $self->charset() . ' ' . | |||
107 | 'marc=' . $self->marc() . ' ' . | |||
108 | 'ucs=' . $self->ucs() . ' '; | |||
109 | ||||
110 | $str .= ' combining' if $self->is_combining(); | |||
111 | return $str; | |||
112 | } | |||
113 | ||||
114 | ||||
115 | =head2 marc8_hash_code() | |||
116 | ||||
117 | Returns a hash code for this Code object for looking up the object using | |||
118 | MARC8. First portion is the character set code and the second is the | |||
119 | MARC-8 value. | |||
120 | ||||
121 | =cut | |||
122 | ||||
123 | sub marc8_hash_code | |||
124 | { | |||
125 | my $self = shift; | |||
126 | return sprintf('%s:%s', $self->charset_value(), $self->marc_value()); | |||
127 | } | |||
128 | ||||
129 | ||||
130 | =head2 utf8_hash_code() | |||
131 | ||||
132 | Returns a hash code for uniquely identifying a Code by it's UCS value. | |||
133 | ||||
134 | =cut | |||
135 | ||||
136 | sub utf8_hash_code | |||
137 | { | |||
138 | return int(hex(shift->ucs())); | |||
139 | } | |||
140 | ||||
141 | ||||
142 | =head2 default_charset_group | |||
143 | ||||
144 | Returns 'G0' or 'G1' indicating where the character is typicalling used | |||
145 | in the MARC-8 environment. | |||
146 | ||||
147 | =cut | |||
148 | ||||
149 | sub default_charset_group | |||
150 | { | |||
151 | my $charset = shift->charset_value(); | |||
152 | ||||
153 | return 'G0' | |||
154 | if $charset eq ASCII_DEFAULT | |||
155 | or $charset eq GREEK_SYMBOLS | |||
156 | or $charset eq SUBSCRIPTS | |||
157 | or $charset eq SUPERSCRIPTS | |||
158 | or $charset eq BASIC_LATIN | |||
159 | or $charset eq BASIC_ARABIC | |||
160 | or $charset eq BASIC_CYRILLIC | |||
161 | or $charset eq BASIC_GREEK | |||
162 | or $charset eq BASIC_HEBREW | |||
163 | or $charset eq CJK; | |||
164 | ||||
165 | return 'G1'; | |||
166 | } | |||
167 | ||||
168 | ||||
169 | =head2 get_marc8_escape | |||
170 | ||||
171 | Returns an escape sequence to move to the Code from another marc-8 character | |||
172 | set. | |||
173 | ||||
174 | =cut | |||
175 | ||||
176 | sub get_escape | |||
177 | { | |||
178 | my $charset = shift->charset_value(); | |||
179 | ||||
180 | return ESCAPE . $charset | |||
181 | if $charset eq ASCII_DEFAULT | |||
182 | or $charset eq GREEK_SYMBOLS | |||
183 | or $charset eq SUBSCRIPTS | |||
184 | or $charset eq SUPERSCRIPTS; | |||
185 | ||||
186 | return ESCAPE . SINGLE_G0_A . $charset | |||
187 | if $charset eq ASCII_DEFAULT | |||
188 | or $charset eq BASIC_LATIN | |||
189 | or $charset eq BASIC_ARABIC | |||
190 | or $charset eq BASIC_CYRILLIC | |||
191 | or $charset eq BASIC_GREEK | |||
192 | or $charset eq BASIC_HEBREW; | |||
193 | ||||
194 | return ESCAPE . SINGLE_G1_A . $charset | |||
195 | if $charset eq EXTENDED_ARABIC | |||
196 | or $charset eq EXTENDED_LATIN | |||
197 | or $charset eq EXTENDED_CYRILLIC; | |||
198 | ||||
199 | return ESCAPE . MULTI_G0_A . CJK | |||
200 | if $charset eq CJK; | |||
201 | } | |||
202 | ||||
203 | =head2 charset_value | |||
204 | ||||
205 | Returns the charset value, not the hex sequence. | |||
206 | ||||
207 | =cut | |||
208 | ||||
209 | sub charset_value | |||
210 | { | |||
211 | return chr(hex(shift->charset())); | |||
212 | } | |||
213 | ||||
214 | ||||
215 | ||||
216 | 1 | 9µs | 9µs | 1; |