Skip to content

Commit 3d2a8c3

Browse files
committed
Changed sbcs data to packed data
1 parent f63ce12 commit 3d2a8c3

File tree

3 files changed

+199
-196
lines changed

3 files changed

+199
-196
lines changed

lib/Web/Encoding/UnivCharDet/CharsetProber.pm

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -234,26 +234,27 @@ sub reset ($) {
234234
$self->{probers} = [
235235
Web::Encoding::UnivCharDet::CharsetProber::Latin1->new, # [0]
236236
map { Web::Encoding::UnivCharDet::CharsetProber::SBCS->new ($_) }
237-
Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel, # [1]
238-
Web::Encoding::UnivCharDet::Defs::Win1251Model,
239-
Web::Encoding::UnivCharDet::Defs::Koi8rModel,
240-
Web::Encoding::UnivCharDet::Defs::Latin5Model,
241-
Web::Encoding::UnivCharDet::Defs::MacCyrillicModel,
242-
Web::Encoding::UnivCharDet::Defs::Ibm866Model,
243-
Web::Encoding::UnivCharDet::Defs::Ibm855Model,
244-
Web::Encoding::UnivCharDet::Defs::Latin7Model,
245-
Web::Encoding::UnivCharDet::Defs::Win1253Model,
246-
Web::Encoding::UnivCharDet::Defs::Latin5BulgarianModel,
247-
Web::Encoding::UnivCharDet::Defs::Win1251BulgarianModel,
248-
Web::Encoding::UnivCharDet::Defs::TIS620ThaiModel,
237+
$Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel, # [1]
238+
$Web::Encoding::UnivCharDet::Defs::Win1251Model,
239+
$Web::Encoding::UnivCharDet::Defs::Koi8rModel,
240+
$Web::Encoding::UnivCharDet::Defs::Latin5Model,
241+
$Web::Encoding::UnivCharDet::Defs::MacCyrillicModel,
242+
$Web::Encoding::UnivCharDet::Defs::Ibm866Model,
243+
$Web::Encoding::UnivCharDet::Defs::Ibm855Model,
244+
$Web::Encoding::UnivCharDet::Defs::Latin7Model,
245+
$Web::Encoding::UnivCharDet::Defs::Win1253Model,
246+
$Web::Encoding::UnivCharDet::Defs::Latin5BulgarianModel,
247+
$Web::Encoding::UnivCharDet::Defs::Win1251BulgarianModel,
248+
$Web::Encoding::UnivCharDet::Defs::TIS620ThaiModel,
249+
$Web::Encoding::UnivCharDet::Defs::MacRomanSpanishModel,
249250
];
250251
my $hebprober = Web::Encoding::UnivCharDet::CharsetProber::Hebrew->new;
251252
push @{$self->{probers}},
252253
$hebprober,
253254
Web::Encoding::UnivCharDet::CharsetProber::SBCS->new
254-
(Web::Encoding::UnivCharDet::Defs::Win1255Model, 0, $hebprober), # logical
255+
($Web::Encoding::UnivCharDet::Defs::Win1255Model, 0, $hebprober), # logical
255256
Web::Encoding::UnivCharDet::CharsetProber::SBCS->new
256-
(Web::Encoding::UnivCharDet::Defs::Win1255Model, 1, $hebprober); # visual
257+
($Web::Encoding::UnivCharDet::Defs::Win1255Model, 1, $hebprober); # visual
257258
$hebprober->set_model_probers
258259
($self->{probers}->[-2], $self->{probers}->[-1]);
259260

@@ -305,22 +306,21 @@ sub handle_data ($$) {
305306
my $old_prober_count = @{$self->{probers}};
306307
my @new_prober = (
307308
map { Web::Encoding::UnivCharDet::CharsetProber::SBCS->new ($_) }
308-
Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel,
309-
Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel,
310-
Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel,
311-
Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel,
312-
#Web::Encoding::UnivCharDet::Defs::Iso_8859_2HungarianModel,
313-
Web::Encoding::UnivCharDet::Defs::Iso_8859_2CroatianModel,
314-
Web::Encoding::UnivCharDet::Defs::Iso_8859_2PolishModel,
315-
Web::Encoding::UnivCharDet::Defs::Iso_8859_2CzechModell,
316-
#Web::Encoding::UnivCharDet::Defs::Windows_1250HungarianModel,
317-
Web::Encoding::UnivCharDet::Defs::Windows_1250CroatianModel,
318-
Web::Encoding::UnivCharDet::Defs::Windows_1250PolishModel,
319-
#Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel,
320-
Web::Encoding::UnivCharDet::Defs::MacRomanSpanishModel,
321-
322-
#Web::Encoding::UnivCharDet::Defs::Win1250HungarianModel,
323-
#Web::Encoding::UnivCharDet::Defs::Latin2HungarianModel,
309+
$Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel,
310+
$Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel,
311+
$Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel,
312+
$Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel,
313+
#$Web::Encoding::UnivCharDet::Defs::Iso_8859_2HungarianModel,
314+
$Web::Encoding::UnivCharDet::Defs::Iso_8859_2CroatianModel,
315+
$Web::Encoding::UnivCharDet::Defs::Iso_8859_2PolishModel,
316+
$Web::Encoding::UnivCharDet::Defs::Iso_8859_2CzechModell,
317+
#$Web::Encoding::UnivCharDet::Defs::Windows_1250HungarianModel,
318+
$Web::Encoding::UnivCharDet::Defs::Windows_1250CroatianModel,
319+
$Web::Encoding::UnivCharDet::Defs::Windows_1250PolishModel,
320+
#$Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel,
321+
322+
#$Web::Encoding::UnivCharDet::Defs::Win1250HungarianModel,
323+
#$Web::Encoding::UnivCharDet::Defs::Latin2HungarianModel,
324324
);
325325
push @{$self->{probers}}, @new_prober;
326326
$self->{active_num} += @new_prober;
@@ -418,7 +418,7 @@ sub NUM () { 251 }
418418

419419
sub new ($$;$$) {
420420
my $self = bless {}, $_[0];
421-
$self->{model} = $_[1];
421+
$self->{model} = $_[1] // die "No model";
422422
$self->{reversed} = $_[2];
423423
$self->{name_prober} = $_[3];
424424
$self->reset;
@@ -442,7 +442,8 @@ sub handle_data ($$) {
442442

443443
my $ss = $self->{model}->{freq_char_count} // SAMPLE_SIZE;
444444
for my $i (0..((length $_[1]) - 1)) {
445-
my $order = $self->{model}->{char_to_order_map}->[ord substr $_[1], $i, 1] || 0;
445+
my $order = (ord substr $self->{model}->{char_to_order_map},
446+
(ord substr $_[1], $i, 1), 1) || 0;
446447

447448
$self->{total_char}++;
448449
if ($order == ILL) {
@@ -456,11 +457,13 @@ sub handle_data ($$) {
456457
$self->{total_seqs}++;
457458
unless ($self->{reversed}) {
458459
++$self->{seq_counters}->[
459-
$self->{model}->{precedence_matrix}->[$self->{last_order} * $ss + $order]
460+
ord substr $self->{model}->{precedence_matrix},
461+
($self->{last_order} * $ss + $order), 1
460462
];
461463
} else {
462464
++$self->{seq_counters}->[
463-
$self->{model}->{precedence_matrix}->[$order * $ss + $self->{last_order}]
465+
ord substr $self->{model}->{precedence_matrix},
466+
($order * $ss + $self->{last_order}), 1
464467
];
465468
}
466469
} elsif ($self->{last_order} < SYMBOL_CAT_ORDER) {

0 commit comments

Comments
 (0)