diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..9fda407 --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--format documentation +--no-profile +--color diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..4bcd54c --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source 'https://rubygems.org' +ruby '2.0.0' + +gem 'rspec' +gem 'webmock' +gem 'nokogiri' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..b84ea02 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,30 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.3.5) + crack (0.4.1) + safe_yaml (~> 0.9.0) + diff-lcs (1.2.4) + mini_portile (0.5.1) + nokogiri (1.6.0) + mini_portile (~> 0.5.0) + rspec (2.14.1) + rspec-core (~> 2.14.0) + rspec-expectations (~> 2.14.0) + rspec-mocks (~> 2.14.0) + rspec-core (2.14.5) + rspec-expectations (2.14.2) + diff-lcs (>= 1.1.3, < 2.0) + rspec-mocks (2.14.3) + safe_yaml (0.9.5) + webmock (1.13.0) + addressable (>= 2.2.7) + crack (>= 0.3.2) + +PLATFORMS + ruby + +DEPENDENCIES + nokogiri + rspec + webmock diff --git a/bin/shakespeare_analyzer.rb b/bin/shakespeare_analyzer.rb new file mode 100755 index 0000000..6634f27 --- /dev/null +++ b/bin/shakespeare_analyzer.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby + +require_relative '../lib/shakespeare_analyzer.rb' + +DEFAULT_XML_URL = 'http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml' + +analyzer = ShakespeareAnalyzer.new(DEFAULT_XML_URL) +analyzer.run diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb new file mode 100644 index 0000000..87145b8 --- /dev/null +++ b/lib/shakespeare_analyzer.rb @@ -0,0 +1,28 @@ +require 'open-uri' +require_relative 'xml_parser' + +class ShakespeareAnalyzer + def initialize(uri) + @file_content = get_content_from_uri(uri) + end + + def run + print_speakers_sorted_by_line_count + end + + private + + def get_content_from_uri(uri) + open(uri, proxy: ENV['http_proxy']).read + end + + def print_speakers_sorted_by_line_count + xml_parser.speakers_sorted_by_line_count.each do |speaker, lines| + puts "#{lines} #{speaker}" + end + end + + def xml_parser + @_xml_parser ||= XmlParser.new(@file_content) + end +end diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb new file mode 100644 index 0000000..f6453c7 --- /dev/null +++ b/lib/xml_parser.rb @@ -0,0 +1,25 @@ +require 'nokogiri' + +class XmlParser + def initialize(xml) + @doc = Nokogiri.XML(xml) + end + + def speakers_sorted_by_line_count + speakers_with_line_count.sort_by { |_key, value| value }.reverse + end + + private + + def speakers_with_line_count + Hash[*speakers.map { |speaker| [speaker, count_lines_by_speaker(speaker)] }.flatten] + end + + def speakers + @doc.css('PLAY SPEAKER').map { |speaker| speaker.text }.uniq + end + + def count_lines_by_speaker(speaker) + @doc.css("PLAY SPEECH:has(SPEAKER[text()='#{speaker}']) LINE").count + end +end diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..ea84e66 --- /dev/null +++ b/output.txt @@ -0,0 +1,41 @@ +719 MACBETH +265 LADY MACBETH +212 MALCOLM +180 MACDUFF +135 ROSS +113 BANQUO +74 LENNOX +70 DUNCAN +62 First Witch +46 Porter +45 Doctor +41 LADY MACDUFF +39 HECATE +35 Sergeant +30 First Murderer +30 SIWARD +27 Third Witch +27 Second Witch +24 ALL +23 Gentlewoman +23 Messenger +21 Lord +21 ANGUS +20 Son +15 Second Murderer +12 MENTEITH +11 Old Man +11 CAITHNESS +10 DONALBAIN +8 Third Murderer +7 YOUNG SIWARD +5 Third Apparition +5 SEYTON +5 Servant +4 Second Apparition +3 Lords +2 First Apparition +2 FLEANCE +2 Both Murderers +1 ATTENDANT +1 Soldiers diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb new file mode 100644 index 0000000..072d6f4 --- /dev/null +++ b/spec/shakespeare_analyzer_spec.rb @@ -0,0 +1,33 @@ +require 'spec_helper' +require 'shakespeare_analyzer' +require 'stringio' + +describe ShakespeareAnalyzer do + describe '#initialize' do + it 'reads provided URI and stores its content to @file_content' do + stub_request(:get, 'http://www.example.com/test_file.txt').to_return(body: 'This is just a test file!') + analyzer = ShakespeareAnalyzer.new('http://www.example.com/test_file.txt') + expect(analyzer.instance_variable_get(:@file_content)).to eq 'This is just a test file!' + end + end + + describe '#run' do + it 'prints list of speakers sorted by line count' do + test_file = File.dirname(__FILE__) + '/test_files/test.xml' + stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) + analyzer = ShakespeareAnalyzer.new('http://www.example.com/test.xml') + output = capture_stdout { analyzer.run } + expect(output).to eq "4 FourLiner\n3 ThreeLiner\n2 TwoLiner\n1 Liner\n" + end + end +end + +def capture_stdout &block + old_stdout = $stdout + fake_stdout = StringIO.new + $stdout = fake_stdout + block.call + fake_stdout.string +ensure + $stdout = old_stdout +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..1926339 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1 @@ +require 'webmock/rspec' diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml new file mode 100644 index 0000000..d909453 --- /dev/null +++ b/spec/test_files/test.xml @@ -0,0 +1,42 @@ + + + + + + + + Liner + Nothing more to say. + + + TwoLiner + FourLiner + Hello! + World! + + + ThreeLiner + One + + + + + + + ThreeLiner + Two + + + + + + ThreeLiner + FourLiner + Three! + + FourLiner + Three! + + + + diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb new file mode 100644 index 0000000..81248dd --- /dev/null +++ b/spec/xml_parser_spec.rb @@ -0,0 +1,19 @@ +require 'spec_helper' +require 'xml_parser' + +describe XmlParser do + describe '#initialize' do + it 'instantiates @doc with Nokogiri XML parser' do + doc = XmlParser.new(nil).instance_variable_get(:@doc) + expect(doc.instance_of?(Nokogiri::XML::Document)).to be true + end + end + + describe '#speakers_sorted_by_line_count' do + it 'returns hash of speakers with line count sorted by line count' do + filename = File.dirname(__FILE__) + '/test_files/test.xml' + xml_parser = XmlParser.new(File.read(filename)) + expect(xml_parser.speakers_sorted_by_line_count.to_a).to eq({'FourLiner' => 4, 'ThreeLiner' => 3, 'TwoLiner' => 2, 'Liner' => 1}.to_a) + end + end +end