File | /usr/share/perl5/MARC/Charset/Table.pm |
Statements Executed | 32 |
Total Time | 0.0012879 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 81µs | 95µs | _init | MARC::Charset::Table::
1 | 1 | 1 | 22µs | 117µs | new | MARC::Charset::Table::
1 | 1 | 1 | 14µs | 14µs | db_path | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | BEGIN | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | add_code | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | brand_new | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | db | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | get_code | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | lookup_by_marc8 | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | lookup_by_utf8 | MARC::Charset::Table::
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | package MARC::Charset::Table; | |||
2 | ||||
3 | =head1 NAME | |||
4 | ||||
5 | MARC::Charset::Table - character mapping db | |||
6 | ||||
7 | =head1 SYNOPSIS | |||
8 | ||||
9 | use MARC::Charset::Table; | |||
10 | use MARC::Charset::Constants qw(:all); | |||
11 | ||||
12 | # create the table object | |||
13 | my $table = MARC::Charset::Table->new(); | |||
14 | ||||
15 | # get a code using the marc8 character set code and the character | |||
16 | my $code = $table->lookup_by_marc8(CYRILLIC_BASIC, 'K'); | |||
17 | ||||
18 | # get a code using the utf8 value | |||
19 | $code = $table->lookup_by_utf8(chr(0x043A)); | |||
20 | ||||
21 | =head1 DESCRIPTION | |||
22 | ||||
23 | MARC::Charset::Table is a wrapper around the character mapping database, | |||
24 | which is implemented as a tied hash on disk. This database gets generated | |||
25 | by Makefile.PL on installation of MARC::Charset using | |||
26 | MARC::Charset::Compiler. | |||
27 | ||||
28 | The database is essentially a key/value mapping where a key is a | |||
29 | MARC-8 character set code + a MARC-8 character, or an integer representing the | |||
30 | UCS code point. These keys map to a serialized MARC::Charset::Code object. | |||
31 | ||||
32 | =cut | |||
33 | ||||
34 | 3 | 33µs | 11µs | use strict; # spent 11µs making 1 call to strict::import |
35 | 3 | 30µs | 10µs | use warnings; # spent 25µs making 1 call to warnings::import |
36 | 3 | 132µs | 44µs | use POSIX; # spent 7.29ms making 1 call to POSIX::import |
37 | 3 | 144µs | 48µs | use SDBM_File; # spent 6µs making 1 call to import |
38 | 3 | 138µs | 46µs | use MARC::Charset::Code; # spent 7µs making 1 call to import |
39 | 3 | 34µs | 11µs | use MARC::Charset::Constants qw(:all); # spent 244µs making 1 call to Exporter::import |
40 | 3 | 660µs | 220µs | use Storable qw(freeze thaw); # spent 88µs making 1 call to Exporter::import |
41 | ||||
42 | =head2 new() | |||
43 | ||||
44 | The consturctor. | |||
45 | ||||
46 | =cut | |||
47 | ||||
48 | sub new | |||
49 | # spent 117µs (22+95) within MARC::Charset::Table::new which was called
# once (22µs+95µs) at line 44 of /usr/share/perl5/MARC/Charset.pm | |||
50 | 4 | 19µs | 5µs | my $class = shift; |
51 | my $self = bless {}, ref($class) || $class; | |||
52 | $self->_init(O_RDONLY); # spent 95µs making 1 call to MARC::Charset::Table::_init | |||
53 | return $self; | |||
54 | } | |||
55 | ||||
56 | ||||
57 | =head2 add_code() | |||
58 | ||||
59 | Add a MARC::Charset::Code to the table. | |||
60 | ||||
61 | =cut | |||
62 | ||||
63 | ||||
64 | sub add_code | |||
65 | { | |||
66 | my ($self, $code) = @_; | |||
67 | ||||
68 | # the Code object is serialized | |||
69 | my $frozen = freeze($code); | |||
70 | ||||
71 | # to support lookup by marc8 and utf8 values we | |||
72 | # stash away the rule in the db using two keys | |||
73 | my $marc8_key = $code->marc8_hash_code(); | |||
74 | my $utf8_key = $code->utf8_hash_code(); | |||
75 | ||||
76 | # stash away the marc8 lookup key | |||
77 | $self->{db}->{$marc8_key} = $frozen; | |||
78 | ||||
79 | # stash away the utf8 lookup key (only if it's not already there!) | |||
80 | # this means that the sets that appear in the xml file will have | |||
81 | # precedence ascii/ansel | |||
82 | $self->{db}->{$utf8_key} = $frozen unless exists $self->{db}->{$utf8_key}; | |||
83 | } | |||
84 | ||||
85 | ||||
86 | =head2 get_code() | |||
87 | ||||
88 | Retrieve a code using a hash key. | |||
89 | ||||
90 | =cut | |||
91 | ||||
92 | sub get_code | |||
93 | { | |||
94 | my ($self, $key) = @_; | |||
95 | my $db = $self->db(); | |||
96 | my $frozen = $db->{$key}; | |||
97 | return thaw($frozen) if $frozen; | |||
98 | return undef; | |||
99 | } | |||
100 | ||||
101 | ||||
102 | =head2 lookup_by_marc8() | |||
103 | ||||
104 | Looks up MARC::Charset::Code entry using a character set code and a MARC-8 | |||
105 | value. | |||
106 | ||||
107 | use MARC::Charset::Constants qw(HEBREW); | |||
108 | $code = $table->lookup_by_marc8(HEBREW, chr(0x60)); | |||
109 | ||||
110 | =cut | |||
111 | ||||
112 | sub lookup_by_marc8 | |||
113 | { | |||
114 | my ($self, $charset, $marc8) = @_; | |||
115 | $charset = BASIC_LATIN if $charset eq ASCII_DEFAULT; | |||
116 | return $self->get_code(sprintf('%s:%s', $charset, $marc8)); | |||
117 | } | |||
118 | ||||
119 | ||||
120 | =head2 lookup_by_utf8() | |||
121 | ||||
122 | Looks up a MARC::Charset::Code object using a utf8 value. | |||
123 | ||||
124 | =cut | |||
125 | ||||
126 | sub lookup_by_utf8 | |||
127 | { | |||
128 | my ($self, $value) = @_; | |||
129 | return $self->get_code(ord($value)); | |||
130 | } | |||
131 | ||||
132 | ||||
133 | ||||
134 | ||||
135 | =head2 db() | |||
136 | ||||
137 | Returns a reference to a tied character database. MARC::Charset::Table | |||
138 | wraps access to the db, but you can get at it if you want. | |||
139 | ||||
140 | =cut | |||
141 | ||||
142 | sub db | |||
143 | { | |||
144 | return shift->{db}; | |||
145 | } | |||
146 | ||||
147 | ||||
148 | =head2 db_path() | |||
149 | ||||
150 | Returns the path to the character encoding database. Can be called | |||
151 | statically too: | |||
152 | ||||
153 | print MARC::Charset::Table->db_path(); | |||
154 | ||||
155 | =cut | |||
156 | ||||
157 | sub db_path | |||
158 | # spent 14µs within MARC::Charset::Table::db_path which was called
# once (14µs+0s) by MARC::Charset::Table::_init at line 187 | |||
159 | 3 | 7µs | 2µs | my $path = $INC{'MARC/Charset/Table.pm'}; |
160 | $path =~ s/\.pm$//; | |||
161 | return $path; | |||
162 | } | |||
163 | ||||
164 | ||||
165 | =head2 brand_new() | |||
166 | ||||
167 | An alternate constructor which removes the existing database and starts | |||
168 | afresh. Be careful with this one, it's really only used on MARC::Charset | |||
169 | installation. | |||
170 | ||||
171 | =cut | |||
172 | ||||
173 | sub brand_new | |||
174 | { | |||
175 | my $class = shift; | |||
176 | my $self = bless {}, ref($class) || $class; | |||
177 | $self->_init(O_CREAT|O_RDWR); | |||
178 | return $self; | |||
179 | } | |||
180 | ||||
181 | ||||
182 | # helper function for initializing table internals | |||
183 | ||||
184 | sub _init | |||
185 | # spent 95µs (81+14) within MARC::Charset::Table::_init which was called
# once (81µs+14µs) by MARC::Charset::Table::new at line 52 | |||
186 | 3 | 84µs | 28µs | my ($self,$opts) = @_; |
187 | tie my %db, 'SDBM_File', db_path(), $opts, 0644; # spent 14µs making 1 call to MARC::Charset::Table::db_path | |||
188 | $self->{db} = \%db; | |||
189 | } | |||
190 | ||||
191 | ||||
192 | ||||
193 | ||||
194 | ||||
195 | 1 | 8µs | 8µs | 1; |