@@ -251,8 +251,10 @@ sub reset ($;$) {
251251 ] : [
252252 Web::Encoding::UnivCharDet::CharsetProber::Latin1-> new, # [0]
253253 map { Web::Encoding::UnivCharDet::CharsetProber::SBCS-> new ($_ ) }
254- $Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel , # [1]
255- $Web::Encoding::UnivCharDet::Defs::MacRomanSpanishModel , # [2]
254+ # $Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel, # [1]
255+ $Web::Encoding::UnivCharDet::Defs::Windows_1250CentralModel , # [1]
256+ # $Web::Encoding::UnivCharDet::Defs::MacRomanSpanishModel, # [2]
257+ $Web::Encoding::UnivCharDet::Defs::MacintoshWesternModel , # [2]
256258 $Web::Encoding::UnivCharDet::Defs::Win1251Model ,
257259 $Web::Encoding::UnivCharDet::Defs::Koi8rModel ,
258260 $Web::Encoding::UnivCharDet::Defs::Iso_8859_5Model ,
@@ -340,49 +342,68 @@ sub handle_data ($$) {
340342 my $old_prober_count = @{$self -> {probers }};
341343 my @new_prober = $self -> {resolve_latin1_refs } ? (
342344 map { Web::Encoding::UnivCharDet::CharsetProber::SBCS-> new ($_ ) }
343- $Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel ,
344- $Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel ,
345- $Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel ,
346- $Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel ,
347- $Web::Encoding::UnivCharDet::Defs::Iso_8859_4EstonianModel ,
348- $Web::Encoding::UnivCharDet::Defs::Iso_8859_4LatvianModel ,
345+ $Web::Encoding::UnivCharDet::Defs::Windows_1252WesternModel ,
346+ $Web::Encoding::UnivCharDet::Defs::Windows_1252ScandinavianModel ,
347+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel,
348+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel,
349+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel,
350+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel,
351+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_4BalticModel ,
352+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_4EstonianModel,
353+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_4LatvianModel,
349354 $Web::Encoding::UnivCharDet::Defs::Iso_8859_3EsperantoModel ,
350355 ) : (
351356 map { Web::Encoding::UnivCharDet::CharsetProber::SBCS-> new ($_ ) }
352- $Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel ,
353- $Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel ,
354- $Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel ,
355- $Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel ,
357+ $Web::Encoding::UnivCharDet::Defs::Windows_1252WesternModel ,
358+ $Web::Encoding::UnivCharDet::Defs::Windows_1252ScandinavianModel ,
359+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252FrenchModel,
360+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252SpanishModel,
361+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252PortugueseModel,
362+ # $Web::Encoding::UnivCharDet::Defs::Windows_1252GermanModel,
356363 # $Web::Encoding::UnivCharDet::Defs::Iso_8859_2HungarianModel,
357- $Web::Encoding::UnivCharDet::Defs::Iso_8859_2CroatianModel ,
358- $Web::Encoding::UnivCharDet::Defs::Iso_8859_2PolishModel ,
359- $Web::Encoding::UnivCharDet::Defs::Iso_8859_2CzechModell ,
364+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_2CentralModel ,
365+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_2CroatianModel,
366+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_2PolishModel,
367+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_2CzechModell,
360368 # $Web::Encoding::UnivCharDet::Defs::Windows_1250HungarianModel,
361- $Web::Encoding::UnivCharDet::Defs::Windows_1250CroatianModel ,
362- $Web::Encoding::UnivCharDet::Defs::Windows_1250PolishModel ,
369+ # $Web::Encoding::UnivCharDet::Defs::Windows_1250CroatianModel,
370+ # $Web::Encoding::UnivCharDet::Defs::Windows_1250PolishModel,
363371 # $Web::Encoding::UnivCharDet::Defs::Windows_1250CzechModel,
364- $Web::Encoding::UnivCharDet::Defs::Windows_1257EstonianModel ,
365- $Web::Encoding::UnivCharDet::Defs::Windows_1257LithuanianModel ,
366- $Web::Encoding::UnivCharDet::Defs::Windows_1257LatvianModel ,
367- $Web::Encoding::UnivCharDet::Defs::Iso_8859_13LatvianModel ,
368- $Web::Encoding::UnivCharDet::Defs::Iso_8859_4EstonianModel ,
369- $Web::Encoding::UnivCharDet::Defs::Iso_8859_4LatvianModel ,
372+ $Web::Encoding::UnivCharDet::Defs::Windows_1257BalticModel ,
373+ # $Web::Encoding::UnivCharDet::Defs::Windows_1257EstonianModel,
374+ # $Web::Encoding::UnivCharDet::Defs::Windows_1257LithuanianModel,
375+ # $Web::Encoding::UnivCharDet::Defs::Windows_1257LatvianModel,
376+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_13BalticModel ,
377+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_13LatvianModel,
378+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_4EstonianModel,
379+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_4LatvianModel,
370380 $Web::Encoding::UnivCharDet::Defs::Iso_8859_3EsperantoModel ,
371- $Web::Encoding::UnivCharDet::Defs::Iso_8859_10LithuanianModel ,
381+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_10LithuanianModel,
372382 $Web::Encoding::UnivCharDet::Defs::Iso_8859_15EstonianModel ,
373383 $Web::Encoding::UnivCharDet::Defs::Iso_8859_15FrenchModel ,
374- $Web::Encoding::UnivCharDet::Defs::Iso_8859_16RomanianModel ,
384+ # $Web::Encoding::UnivCharDet::Defs::Iso_8859_16RomanianModel,
375385 # $Web::Encoding::UnivCharDet::Defs::Iso_8859_9TurkishModel,
376386 $Web::Encoding::UnivCharDet::Defs::Windows_1254TurkishModel ,
377- # $Web::Encoding::UnivCharDet::Defs::Windows_1258VietnameseModel,
378- $Web::Encoding::UnivCharDet::Defs::Mac_CentraleuropeCzechModel ,
379- $Web::Encoding::UnivCharDet::Defs::Ibm852CzechModel ,
380- $Web::Encoding::UnivCharDet::Defs::Ibm852PolishModel ,
387+ $Web::Encoding::UnivCharDet::Defs::Ibm437WesternModel ,
388+ $Web::Encoding::UnivCharDet::Defs::Ibm850WesternModel ,
389+ $Web::Encoding::UnivCharDet::Defs::Ibm850ScandinavianModel ,
390+ $Web::Encoding::UnivCharDet::Defs::Ibm852CentralModel ,
391+ # $Web::Encoding::UnivCharDet::Defs::Ibm852CzechModel,
392+ # $Web::Encoding::UnivCharDet::Defs::Ibm852PolishModel,
393+ $Web::Encoding::UnivCharDet::Defs::Ibm775BalticModel ,
394+ $Web::Encoding::UnivCharDet::Defs::Ibm857TurkishModel ,
381395 $Web::Encoding::UnivCharDet::Defs::Ibm865DanishModel ,
382396 $Web::Encoding::UnivCharDet::Defs::Windows_1252IcelandicFaroeseModel ,
397+ $Web::Encoding::UnivCharDet::Defs::MacintoshScandinavianModel ,
398+ $Web::Encoding::UnivCharDet::Defs::X_Mac_CeCentralModel ,
399+ # $Web::Encoding::UnivCharDet::Defs::Mac_CentraleuropeCzechModel,
400+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_4BalticModel ,
401+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_10BalticModel ,
402+ $Web::Encoding::UnivCharDet::Defs::Iso_8859_16CentralModel ,
383403
384404 # $Web::Encoding::UnivCharDet::Defs::Win1250HungarianModel,
385405 # $Web::Encoding::UnivCharDet::Defs::Latin2HungarianModel,
406+ # $Web::Encoding::UnivCharDet::Defs::Windows_1258VietnameseModel,
386407 );
387408 push @{$self -> {probers }}, @new_prober ;
388409 $self -> {active_num } += @new_prober ;
@@ -465,7 +486,7 @@ sub dump_status ($) {
465486 $_ -> dump_status if defined $_ ;
466487 }
467488 for (@{$self -> {inactive_probers }}) {
468- print " [inactive ]" ;
489+ print " [in ]" ;
469490 $_ -> dump_status;
470491 }
471492} # dump_status
@@ -507,7 +528,10 @@ sub SYM () { 253 }
507528sub RET () { 252 }
508529sub NUM () { 251 }
509530sub CPY () { 250 }
510- sub SYMBOL_CAT_ORDER () { 249 }
531+ sub TMK () { 249 }
532+ sub ORD () { 248 }
533+ sub DLM () { 247 }
534+ sub SYMBOL_CAT_ORDER () { 246 }
511535
512536sub new ($$;$$) {
513537 my $self = bless {}, $_ [0];
@@ -574,13 +598,13 @@ sub handle_data ($$) {
574598 $self -> {seq_counters }-> [NEGATIVE_CAT]++;
575599 $self -> {total_seqs }++;
576600 }
577- } elsif ($order == SYM) {
601+ } elsif ($order == SYM or $order == DLM or $order == TMK or $order == ORD ) {
578602 $self -> {seq_counters }-> [SYM_CAT]++;
579603 if ($self -> {symbol_state } == 1) {
580604 $self -> {seq_counters }-> [CPY_CAT]++;
581605 }
582606 } elsif ($order == CPY) {
583- if ($self -> {last_order } == SYM or $self -> {last_order } == 255) {
607+ if ($self -> {last_order } == DLM or $self -> {last_order } == 255) {
584608 $self -> {seq_counters }-> [SYM_CAT]++;
585609 $self -> {symbol_state } = 1;
586610
@@ -1027,7 +1051,7 @@ sub dump_status ($) {
10271051 my $self = $_ [0];
10281052 $self -> get_confidence;
10291053 printf " MBCS [%s ] %s [%s ] (%s )\n " ,
1030- $self -> get_charset_name,
1054+ $self -> get_charset_name // ' ' ,
10311055 $self -> {resolve_latin1_refs } ? ' htmlrefs' : ' ' ,
10321056 $self -> get_confidence,
10331057 $self -> {state };
0 commit comments