diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..9fd91d3 --- /dev/null +++ b/bin/console @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'bundler/setup' +require 'henkei' + +require 'irb' +IRB.start diff --git a/lib/henkei.rb b/lib/henkei.rb index c336b37..e533887 100644 --- a/lib/henkei.rb +++ b/lib/henkei.rb @@ -11,6 +11,8 @@ require 'socket' require 'stringio' +require 'open3' + # Read text and metadata from files and documents using Apache Tika toolkit class Henkei # rubocop:disable Metrics/ClassLength GEM_PATH = File.dirname(File.dirname(__FILE__)) @@ -224,11 +226,7 @@ def self.java_path # Internal helper for calling to Tika library directly # def self.client_read(type, data) - IO.popen tika_command(type), 'r+' do |io| - io.write data - io.close_write - io.read - end + Open3.capture2(tika_command(type), stdin_data: data).first end private_class_method :client_read diff --git a/spec/henkei_spec.rb b/spec/henkei_spec.rb index 21cf8bf..87dcbb1 100644 --- a/spec/henkei_spec.rb +++ b/spec/henkei_spec.rb @@ -40,6 +40,16 @@ ) expect(mimetype.extensions).to include 'docx' end + + context 'when passing in the `pipe-error.png` test file' do + let(:data) { File.read 'spec/samples/pipe-error.png' } + + it 'returns an empty result' do + text = Henkei.read :text, data + + expect(text).to eq '' + end + end end describe '.new' do @@ -129,6 +139,23 @@ specify '#metadata reads metadata' do expect(henkei.metadata['Content-Type']).to eq %w[application/vnd.apple.pages application/vnd.apple.pages] end + + context 'when passing in the `pipe-error.png` test file' do + let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' } + + it '#text returns an empty result' do + expect(henkei.text).to eq '' + end + + it '#html returns an empty body' do + expect(henkei.html).to include '' + expect(henkei.html).to include '' + end + + it '#mimetype returns an empty result' do + expect(henkei.mimetype.content_type).to eq 'image/png' + end + end end context 'initialized with a given URI' do diff --git a/spec/samples/pipe-error.png b/spec/samples/pipe-error.png new file mode 100644 index 0000000..e2ce1a0 Binary files /dev/null and b/spec/samples/pipe-error.png differ