1
1
# !/usr/bin/env perl
2
2
BEGIN { pop @INC if $INC [-1] eq ' .' }
3
- use 5.006; # and v7
3
+ use 5.014;
4
4
use warnings;
5
5
use strict;
6
6
@@ -566,6 +566,8 @@ sub formail # rfc2822 + mboxrd format (see http://www.qmail.org/man/man5/mbox.ht
566
566
{
567
567
my ($mail , $parent ) = @_ ;
568
568
my @lines = split /(?<=\n)/, $mail ;
569
+ # Needed to cope (badly) when message/rfc822 attachments incorrectly start with /^From / (thanks libpst)
570
+ @lines = (' ' ) unless @lines ;
569
571
formail(sub { shift @lines }, sub { $mail = shift }, $parent );
570
572
return $mail ;
571
573
}
@@ -710,26 +712,37 @@ sub header
710
712
{
711
713
my ($m , $h ) = @_ ;
712
714
return () unless exists $m -> {header } && exists $m -> {header }-> {lc $h };
713
- return map { s /\n\s +/ / g ; $_ = header_display($_ ); /^$h :\s * (.* )\s *$ /i; $ 1 } @{$m -> {header }-> {lc $h }};
715
+ return map { s /\n\s +/ / g ; header_display($_ ) =~ / ^$h :\s *(.*)\s *$ /i ; $1 } @{$m -> {header }-> {lc $h }};
714
716
}
715
717
716
- my $encword = qr / =\? (us-ascii|iso-8859- \d )(?:\*\w +)?\? (q|b)\? ([^? ]+)\? =/ i ; # encoded words to display (should really only decode ascii)
718
+ my $encword = qr / =\? ([^*?]+ )(?:\*\w +)?\? (q|b)\? ([^? ]+)\? =/ i ; # encoded words to display
717
719
sub header_display # rfc2047, rfc2231
718
720
{
721
+ use Encode ();
719
722
return join ' ' ,
720
723
map { tr / \t/ /s ; $_ } # finally, squeeze multiple whitespace
721
724
map { tr / \x00-\x08\x0b-\x1f\x7f// d; $_ } # strip control characters
722
- map { s / $encword/ lc $2 eq 'q' ? join ' ', split '_', decode_quoted_printable($3 ), -1 : decode_base64($3 )/ ieg ; $_ } # decode encoded words
725
+ map { s / $encword/ (defined Encode::find_encoding( $1 )) ? Encode::decode( $1 , ( lc $2 eq 'q') ? decode_quoted_printable($3 , 1) : decode_base64($3 )) : $& / ieg ; $_ } # decode encoded words if possible
723
726
map { s / ($encword)\s +($encword)/ $1$5 / g while /$encword \s +$encword /; $_ } # strip space between encoded words that we're about to decode
724
727
map { s /\( (?:\\ [^\r\n ]|[^\\ ()])*\) // g unless / ^".*"$ / ; $_ } # strip (comments) outside "quoted strings"
725
728
split /(" (?:\\ [^\r\n ]|[^\\ " ])*" )/, shift; # split on " quoted strings"
726
729
}
727
730
731
+ sub charsetof
732
+ {
733
+ my $s = shift;
734
+ return 'us-ascii' if !defined $s || $s =~ /^[\x00 -\x7f ]*$/ ;
735
+ #return 'utf-8' if $s =~ /^(?:[\x00 -\x7f ]|[\xc2 -\xdf ][\x80 -\xbf ]|[\xe0 -\xef ][\x80 -\xbf ]{2}|[\xf0 -\xf4 ][\x80 -\xbf ]{3})+$/ ; # This won't work until perl v5.38
736
+ return 'utf-8' if defined eval { Encode::decode 'UTF-8', $s , Encode::FB_CROAK };
737
+ return (defined $ENV {LANG} && $ENV {LANG} =~ /^.+\. (.+)$/ ) && $1 ne 'UTF-8' ? lc $1 : 'iso-8859-1'; # Make something up
738
+ }
739
+
728
740
sub header_format # rfc2822, rfc2047
729
741
{
730
742
my ($h , $l , $c ) = @_ ;
731
743
$h =~ s/^\s +//, $h =~ s/\s +$/ /, $h =~ tr/ \t\n\r / /s;
732
- $h = join ' ', map { /^" .*" $/ ? $_ : !tr/\x80 -\xff // ? $_ : tr/a-zA-Z0-9!*\/ +-//c > length >> 1 ? join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? " *$l " : '') . '?b?' . substr(encode_base64($_ ), 0, -1) . '?=' } (split /\n /, (s/([^\r\n ]{38})/$1 \n /g, $_ ))) : join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? " *$l " : '') . '?q?' . substr(encode_quoted_printable($_ ), 0, -1) . '?=' } (split /\n /, (s/([^\r\n ]{17})/$1 \n /g, $_ ))) } map { /^[^\s " ]*" .*" [^\s " ]*$/ ? $_ : split / / } split /(\S *" (?:\\[^\r\n]|[^\\" ])*" \S*)/, $h ;
744
+ use Encode (); $h = Encode::encode('UTF-8', $h ) if grep { ord > 255 } split //, $h ;
745
+ $h = join ' ', map { /^" .*" $/ ? $_ : !tr/\x80 -\xff // ? $_ : tr/a-zA-Z0-9!*\/ +-//c > length >> 1 ? join(' ', map { '=?' . ($c || charsetof($h )) . ($l ? " *$l " : '') . '?b?' . substr(encode_base64($_ ), 0, -1) . '?=' } (split /\n /, (s/([^\r\n ]{38})/$1 \n /g, $_ ))) : join(' ', map { '=?' . ($c || charsetof($h )) . ($l ? " *$l " : '') . '?q?' . substr(encode_quoted_printable($_ , 1), 0, -1) . '?=' } (split /\n /, (s/([^\r\n ]{17})/$1 \n /g, $_ ))) } map { /^[^\s " ]*" .*" [^\s " ]*$/ ? $_ : split / / } split /(\S *" (?:\\[^\r\n]|[^\\" ])*" \S*)/, $h ;
733
746
my ($f , $p , $lf ) = (' ' , 0); $lf = length $f , $f .= ($lf && $lf + ($lf ? 1 : 0) + length ($_ ) - $p > 78) ? ($p = $lf , " \n " ) : ' ' , $f .= $f ? ' ' : ' ' , $f .= $_ for map { / ^\S *".*"\S *$ / ? $_ : grep { length } split / / } split /(\S*" (?:\\ [^\r\n ]|[^\\ " \r\n])*" \S *)/, $h ; # fold
734
747
return $f . " \n" ;
735
748
}
@@ -745,7 +758,7 @@ sub param # rfc2231, rfc2045
745
758
{
746
759
my ($n , $v ) = ($1 , $2 );
747
760
$v =~ s/^" //, $v =~ s / "$// , $v =~ s /\\ (.)/ $1 / g if $v =~ / ^".*"$ / ;
748
- $v =~ s / ^(?:us-ascii|iso-8859-\d )'\w +'// i and $decode = 1;
761
+ $v =~ s / ^(?:us-ascii|utf-8| iso-8859-\d {1,2} )'\w +'// i and $decode = 1;
749
762
$v =~ s / %([\d a-fA-f]{2})/ chr hex $1 / eg if $decode && substr ($n , -1) eq ' *' ;
750
763
push @p , [lc $n , $v ];
751
764
}
@@ -790,7 +803,7 @@ sub body
790
803
sub parts
791
804
{
792
805
my ($m , $p ) = @_ ;
793
- return [@{$m -> {mime_parts }}] unless defined $p ;
806
+ return exists $m -> { mime_parts } ? [@{$m -> {mime_parts }}] : [ ] unless defined $p ;
794
807
$m -> {mime_parts } = [@{$p }];
795
808
}
796
809
@@ -800,7 +813,7 @@ sub newparam # rfc2231, rfc2045
800
813
my $high = $v =~ tr / \x80-\xff// ;
801
814
my $ctrl = $v =~ tr / \x00-\x06\x0e-\x1f\x7f// ;
802
815
my $enc = $high || $ctrl ? ' *' : ' ' ;
803
- $c = ( ' high ' ? ' iso-8859-1 ' : ' us-ascii ' ) if $enc && !$c ;
816
+ $c = charsetof( $v ) if $enc && !$c ;
804
817
$l = ' en' if $c && !$l ;
805
818
$v = " $c '$l '$v " if $enc ;
806
819
my @p ; push @p , $_ while $_ = substr $v , 0, 40, ' ' ;
@@ -829,7 +842,7 @@ sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424
829
842
($a {filename }) = $a {filename } =~ / ([^\\\/ ]+)$ / if $a {filename };
830
843
my $bound = $multi ? join ' ' , map { substr $bchar , int (rand (length $bchar )), 1 } 0..30 : ' ' ;
831
844
my $disp = $a {disposition } || ($type =~ / ^(?:text\/ |message\/ rfc822)/i ? ' inline' : ' attachment' );
832
- my $char = $a {charset } || ($a {body } && $a { body } =~ tr / \x80-\xff // ? ' iso-8859-1 ' : ' us-ascii ' );
845
+ my $char = $a {charset } || charsetof ($a {body });
833
846
my $enc = $a {encoding } || ($multi || $msg ? ' 7bit' : $a {body } ? choose_encoding($a {body }) : ' 7bit' );
834
847
append_header($m , $a [$_ ] . ' : ' . $a [$_ + 1]) for grep { $_ % 2 == 0 && $a [$_ ] =~ / ^[A-Z]/ } 0..$#a ;
835
848
append_header($m , ' Date: ' . rfc822date(time )) if grep { / ^(?:date|from|sender|reply-to)$ /i } keys %a and !grep { / ^date$ /i } keys %a ;
@@ -898,8 +911,10 @@ sub decode_base64 # MIME::Base64 (Gisle Aas)
898
911
sub encode_quoted_printable
899
912
{
900
913
my $quoted = shift;
914
+ my $qcode = shift;
901
915
my $binary = ($quoted =~ tr/\x00 -\x06\x0e -\x1f\x7f //) ? '' : '\r\n ';
902
916
$quoted =~ s/([^!-<>-~ \t $binary ])/sprintf '=%02X', ord $1 /eg;
917
+ $quoted =~ s/([?_])/sprintf '=%02X', ord $1 /eg if $qcode ;
903
918
$quoted =~ s/((?:[^\r\n ]{73,75})(?=[=])|(?:[^\r\n ]{75}(?=[ \t ]))|(?:[^\r\n ]{75})(?=[^\r\n ]{2})|(?:[^\r\n ]{75})(?=[^\r\n ]$) )/$1 =\n /g;
904
919
$quoted =~ s/([ \t ])$/sprintf '=%02X', ord $1 /emg;
905
920
# Python and mutt both behave as though this is wrong
@@ -911,8 +926,10 @@ sub encode_quoted_printable
911
926
sub decode_quoted_printable
912
927
{
913
928
my $quoted = shift;
914
- $quoted =~ tr/\x00 -\x08\x0b -\x0c\x0e -\x19\x7f -\xff //d;
929
+ my $qcode = shift;
930
+ $quoted =~ tr/\x00 -\x08\x0b -\x0c\x0e -\x1f\x7f -\xff //d;
915
931
$quoted =~ s/=\n //g;
932
+ $quoted =~ s/_/ /g if $qcode ;
916
933
$quoted =~ s/=([0-9A-Fa-f]{2})/chr hex $1 /eg;
917
934
return $quoted ;
918
935
}
@@ -1039,8 +1056,8 @@ sub winmail
1039
1056
return $name ;
1040
1057
}
1041
1058
1042
- add_mimetypes();
1043
1059
my $m = shift;
1060
+ add_mimetypes();
1044
1061
$pos = 0; $data = body($m ); @attachment = (); $badtnef = 0;
1045
1062
my $signature = unpack 'V', substr($data , $pos , 4); $pos += 4;
1046
1063
return $m unless $signature == 0x223E9F78;
0 commit comments