Skip to content

Commit 7afb7f6

Browse files
committed
Update internal minimail functions (UTF-8 detection, winmail.dat fix)
1 parent 8c4f5fa commit 7afb7f6

File tree

1 file changed

+28
-11
lines changed

1 file changed

+28
-11
lines changed

textmail

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env perl
22
BEGIN { pop @INC if $INC[-1] eq '.' }
3-
use 5.006; # and v7
3+
use 5.014;
44
use warnings;
55
use strict;
66

@@ -566,6 +566,8 @@ sub formail # rfc2822 + mboxrd format (see http://www.qmail.org/man/man5/mbox.ht
566566
{
567567
my ($mail, $parent) = @_;
568568
my @lines = split /(?<=\n)/, $mail;
569+
# Needed to cope (badly) when message/rfc822 attachments incorrectly start with /^From / (thanks libpst)
570+
@lines = ('') unless @lines;
569571
formail(sub { shift @lines }, sub { $mail = shift }, $parent);
570572
return $mail;
571573
}
@@ -710,26 +712,37 @@ sub header
710712
{
711713
my ($m, $h) = @_;
712714
return () unless exists $m->{header} && exists $m->{header}->{lc $h};
713-
return map { s/\n\s+/ /g; $_ = header_display($_); /^$h:\s*(.*)\s*$/i; $1 } @{$m->{header}->{lc $h}};
715+
return map { s/\n\s+/ /g; header_display($_) =~ /^$h:\s*(.*)\s*$/i; $1 } @{$m->{header}->{lc $h}};
714716
}
715717

716-
my $encword = qr/=\?(us-ascii|iso-8859-\d)(?:\*\w+)?\?(q|b)\?([^? ]+)\?=/i; # encoded words to display (should really only decode ascii)
718+
my $encword = qr/=\?([^*?]+)(?:\*\w+)?\?(q|b)\?([^? ]+)\?=/i; # encoded words to display
717719
sub header_display # rfc2047, rfc2231
718720
{
721+
use Encode ();
719722
return join '',
720723
map { tr/ \t/ /s; $_ } # finally, squeeze multiple whitespace
721724
map { tr/\x00-\x08\x0b-\x1f\x7f//d; $_ } # strip control characters
722-
map { s/$encword/lc $2 eq 'q' ? join ' ', split '_', decode_quoted_printable($3), -1 : decode_base64($3)/ieg; $_ } # decode encoded words
725+
map { s/$encword/(defined Encode::find_encoding($1)) ? Encode::decode($1, (lc $2 eq 'q') ? decode_quoted_printable($3, 1) : decode_base64($3)) : $&/ieg; $_ } # decode encoded words if possible
723726
map { s/($encword)\s+($encword)/$1$5/g while /$encword\s+$encword/; $_ } # strip space between encoded words that we're about to decode
724727
map { s/\((?:\\[^\r\n]|[^\\()])*\)//g unless /^".*"$/; $_ } # strip (comments) outside "quoted strings"
725728
split /("(?:\\[^\r\n]|[^\\"])*")/, shift; # split on "quoted strings"
726729
}
727730
731+
sub charsetof
732+
{
733+
my $s = shift;
734+
return 'us-ascii' if !defined $s || $s =~ /^[\x00-\x7f]*$/;
735+
#return 'utf-8' if $s =~ /^(?:[\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf4][\x80-\xbf]{3})+$/; # This won't work until perl v5.38
736+
return 'utf-8' if defined eval { Encode::decode 'UTF-8', $s, Encode::FB_CROAK };
737+
return (defined $ENV{LANG} && $ENV{LANG} =~ /^.+\.(.+)$/) && $1 ne 'UTF-8' ? lc $1 : 'iso-8859-1'; # Make something up
738+
}
739+
728740
sub header_format # rfc2822, rfc2047
729741
{
730742
my ($h, $l, $c) = @_;
731743
$h =~ s/^\s+//, $h =~ s/\s+$//, $h =~ tr/ \t\n\r/ /s;
732-
$h = join ' ', map { /^".*"$/ ? $_ : !tr/\x80-\xff// ? $_ : tr/a-zA-Z0-9!*\/+-//c > length >> 1 ? join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? "*$l" : '') . '?b?' . substr(encode_base64($_), 0, -1) . '?=' } (split /\n/, (s/([^\r\n]{38})/$1\n/g, $_))) : join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? "*$l" : '') . '?q?' . substr(encode_quoted_printable($_), 0, -1) . '?=' } (split /\n/, (s/([^\r\n]{17})/$1\n/g, $_))) } map { /^[^\s"]*".*"[^\s"]*$/ ? $_ : split / / } split /(\S*"(?:\\[^\r\n]|[^\\"])*"\S*)/, $h;
744+
use Encode (); $h = Encode::encode('UTF-8', $h) if grep { ord > 255 } split //, $h;
745+
$h = join ' ', map { /^".*"$/ ? $_ : !tr/\x80-\xff// ? $_ : tr/a-zA-Z0-9!*\/+-//c > length >> 1 ? join(' ', map { '=?' . ($c || charsetof($h)) . ($l ? "*$l" : '') . '?b?' . substr(encode_base64($_), 0, -1) . '?=' } (split /\n/, (s/([^\r\n]{38})/$1\n/g, $_))) : join(' ', map { '=?' . ($c || charsetof($h)) . ($l ? "*$l" : '') . '?q?' . substr(encode_quoted_printable($_, 1), 0, -1) . '?=' } (split /\n/, (s/([^\r\n]{17})/$1\n/g, $_))) } map { /^[^\s"]*".*"[^\s"]*$/ ? $_ : split / / } split /(\S*"(?:\\[^\r\n]|[^\\"])*"\S*)/, $h;
733746
my ($f, $p, $lf) = ('', 0); $lf = length $f, $f .= ($lf && $lf + ($lf ? 1 : 0) + length($_) - $p > 78) ? ($p = $lf, "\n") : '', $f .= $f ? ' ' : '', $f .= $_ for map { /^\S*".*"\S*$/ ? $_ : grep { length } split / / } split /(\S*"(?:\\[^\r\n]|[^\\"\r\n])*"\S*)/, $h; # fold
734747
return $f . "\n";
735748
}
@@ -745,7 +758,7 @@ sub param # rfc2231, rfc2045
745758
{
746759
my ($n, $v) = ($1, $2);
747760
$v =~ s/^"//, $v =~ s/"$//, $v =~ s/\\(.)/$1/g if $v =~ /^".*"$/;
748-
$v =~ s/^(?:us-ascii|iso-8859-\d)'\w+'//i and $decode = 1;
761+
$v =~ s/^(?:us-ascii|utf-8|iso-8859-\d{1,2})'\w+'//i and $decode = 1;
749762
$v =~ s/%([\da-fA-f]{2})/chr hex $1/eg if $decode && substr($n, -1) eq '*';
750763
push @p, [lc $n, $v];
751764
}
@@ -790,7 +803,7 @@ sub body
790803
sub parts
791804
{
792805
my ($m, $p) = @_;
793-
return [@{$m->{mime_parts}}] unless defined $p;
806+
return exists $m->{mime_parts} ? [@{$m->{mime_parts}}] : [] unless defined $p;
794807
$m->{mime_parts} = [@{$p}];
795808
}
796809

@@ -800,7 +813,7 @@ sub newparam # rfc2231, rfc2045
800813
my $high = $v =~ tr/\x80-\xff//;
801814
my $ctrl = $v =~ tr/\x00-\x06\x0e-\x1f\x7f//;
802815
my $enc = $high || $ctrl ? '*' : '';
803-
$c = ('high' ? 'iso-8859-1' : 'us-ascii') if $enc && !$c;
816+
$c = charsetof($v) if $enc && !$c;
804817
$l = 'en' if $c && !$l;
805818
$v = "$c'$l'$v" if $enc;
806819
my @p; push @p, $_ while $_ = substr $v, 0, 40, '';
@@ -829,7 +842,7 @@ sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424
829842
($a{filename}) = $a{filename} =~ /([^\\\/]+)$/ if $a{filename};
830843
my $bound = $multi ? join '', map { substr $bchar, int(rand(length $bchar)), 1 } 0..30 : '';
831844
my $disp = $a{disposition} || ($type =~ /^(?:text\/|message\/rfc822)/i ? 'inline' : 'attachment');
832-
my $char = $a{charset} || ($a{body} && $a{body} =~ tr/\x80-\xff// ? 'iso-8859-1' : 'us-ascii');
845+
my $char = $a{charset} || charsetof($a{body});
833846
my $enc = $a{encoding} || ($multi || $msg ? '7bit' : $a{body} ? choose_encoding($a{body}) : '7bit');
834847
append_header($m, $a[$_] . ': ' . $a[$_ + 1]) for grep { $_ % 2 == 0 && $a[$_] =~ /^[A-Z]/ } 0..$#a;
835848
append_header($m, 'Date: ' . rfc822date(time)) if grep { /^(?:date|from|sender|reply-to)$/i } keys %a and !grep { /^date$/i } keys %a;
@@ -898,8 +911,10 @@ sub decode_base64 # MIME::Base64 (Gisle Aas)
898911
sub encode_quoted_printable
899912
{
900913
my $quoted = shift;
914+
my $qcode = shift;
901915
my $binary = ($quoted =~ tr/\x00-\x06\x0e-\x1f\x7f//) ? '' : '\r\n';
902916
$quoted =~ s/([^!-<>-~ \t$binary])/sprintf '=%02X', ord $1/eg;
917+
$quoted =~ s/([?_])/sprintf '=%02X', ord $1/eg if $qcode;
903918
$quoted =~ s/((?:[^\r\n]{73,75})(?=[=])|(?:[^\r\n]{75}(?=[ \t]))|(?:[^\r\n]{75})(?=[^\r\n]{2})|(?:[^\r\n]{75})(?=[^\r\n]$))/$1=\n/g;
904919
$quoted =~ s/([ \t])$/sprintf '=%02X', ord $1/emg;
905920
# Python and mutt both behave as though this is wrong
@@ -911,8 +926,10 @@ sub encode_quoted_printable
911926
sub decode_quoted_printable
912927
{
913928
my $quoted = shift;
914-
$quoted =~ tr/\x00-\x08\x0b-\x0c\x0e-\x19\x7f-\xff//d;
929+
my $qcode = shift;
930+
$quoted =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\xff//d;
915931
$quoted =~ s/=\n//g;
932+
$quoted =~ s/_/ /g if $qcode;
916933
$quoted =~ s/=([0-9A-Fa-f]{2})/chr hex $1/eg;
917934
return $quoted;
918935
}
@@ -1039,8 +1056,8 @@ sub winmail
10391056
return $name;
10401057
}
10411058
1042-
add_mimetypes();
10431059
my $m = shift;
1060+
add_mimetypes();
10441061
$pos = 0; $data = body($m); @attachment = (); $badtnef = 0;
10451062
my $signature = unpack 'V', substr($data, $pos, 4); $pos += 4;
10461063
return $m unless $signature == 0x223E9F78;

0 commit comments

Comments
 (0)