Skip to content

Commit

Permalink
Basedata imports: handle unicode in xlsx files on Windows
Browse files Browse the repository at this point in the history
The SpreadSheet::Read path handled it already via
file handles but the Excel::ValueReader::XLSX
approach needs file names.  So we use Windows short paths.
  • Loading branch information
shawnlaffan committed Oct 23, 2023
1 parent 4fc2303 commit 51e1f3f
Showing 1 changed file with 22 additions and 6 deletions.
28 changes: 22 additions & 6 deletions lib/Biodiverse/Common.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,22 @@ sub guess_eol {
return $eol // "\n";
}

sub get_shortpath_filename {
my ($self, %args) = @_;

my $file_name = $args{file_name}
// croak 'file_name not specified';

return $file_name if not ON_WINDOWS;

my $short_path = $self->file_exists_aa($file_name) ? shortpathL ($file_name) : '';

# die "unable to get short name for $file_name ($^E)"
# if $short_path eq '';

return $short_path;
}

sub get_file_handle {
my ($self, %args) = @_;

Expand Down Expand Up @@ -1804,12 +1820,12 @@ sub get_book_struct_from_spreadsheet_file {
# stringify any Path::Class etc objects
$file = "$file";

# First block is a faster read method but the Unicode bug means
# it does not "see" unicode file names on Windows.
# If the file does not exist then we fall back to the
# Spreadsheet::Read method to try generating a file handle.
if (1 and $file =~ /\.xlsx$/ and -e $file) {
$book = $self->get_book_struct_from_xlsx_file (filename => $file);
# Could set second condition as a fallback if first parse fails
# but it seems to work pretty well in practice.
if ($file =~ /\.xlsx$/ and $self->file_exists_aa($file)) {
# handle unicode on windows
my $f = $self->get_shortpath_filename (file_name => $file);
$book = $self->get_book_struct_from_xlsx_file (filename => $f);
}
elsif ($file =~ /\.(xlsx?|ods)$/) {
# we can use file handles for excel and ods
Expand Down

0 comments on commit 51e1f3f

Please sign in to comment.