From c4bfcc31d259ab40e3dd417340e04d90d39df3ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Wed, 11 Sep 2013 14:31:55 +0200 Subject: [PATCH 01/21] Add rspec to project * create Gemfile and add rspec gem * create empty spec dir * set some default options in .rspec --- .rspec | 3 +++ Gemfile | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 .rspec create mode 100644 Gemfile diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..9fda407 --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--format documentation +--no-profile +--color diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..000721b --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' +ruby '2.0.0' + +gem 'rspec' From 6e6ed1a7ba66292be6121643a3e2fe674b556277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Thu, 12 Sep 2013 14:21:58 +0200 Subject: [PATCH 02/21] Add possibility to download file from URI * URI content is stored in @file_content --- Gemfile | 1 + Gemfile.lock | 26 ++++++++++++++++++++++++++ lib/shakespeare_analyzer.rb | 15 +++++++++++++++ spec/shakespeare_analyzer_spec.rb | 12 ++++++++++++ spec/spec_helper.rb | 1 + spec/test_files/test_file.txt | 1 + 6 files changed, 56 insertions(+) create mode 100644 Gemfile.lock create mode 100644 lib/shakespeare_analyzer.rb create mode 100644 spec/shakespeare_analyzer_spec.rb create mode 100644 spec/spec_helper.rb create mode 100644 spec/test_files/test_file.txt diff --git a/Gemfile b/Gemfile index 000721b..770e30c 100644 --- a/Gemfile +++ b/Gemfile @@ -2,3 +2,4 @@ source 'https://rubygems.org' ruby '2.0.0' gem 'rspec' +gem 'webmock' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..530ebff --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,26 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.3.5) + crack (0.4.1) + safe_yaml (~> 0.9.0) + diff-lcs (1.2.4) + rspec (2.14.1) + rspec-core (~> 2.14.0) + rspec-expectations (~> 2.14.0) + rspec-mocks (~> 2.14.0) + rspec-core (2.14.5) + rspec-expectations (2.14.2) + diff-lcs (>= 1.1.3, < 2.0) + rspec-mocks (2.14.3) + safe_yaml (0.9.5) + webmock (1.13.0) + addressable (>= 2.2.7) + crack (>= 0.3.2) + +PLATFORMS + ruby + +DEPENDENCIES + rspec + webmock diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb new file mode 100644 index 0000000..1f86518 --- /dev/null +++ b/lib/shakespeare_analyzer.rb @@ -0,0 +1,15 @@ +require 'net/http' + +class ShakespeareAnalyzer + attr_accessor :file_content + + def initialize(uri) + @file_content = get_content_from_uri(uri) + end + + private + + def get_content_from_uri(uri) + Net::HTTP.get_response(URI.parse(uri)).body + end +end diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb new file mode 100644 index 0000000..3b5000f --- /dev/null +++ b/spec/shakespeare_analyzer_spec.rb @@ -0,0 +1,12 @@ +require 'spec_helper' +require 'shakespeare_analyzer' + +describe ShakespeareAnalyzer do + describe '#initialize' do + it 'reads provided URI and stores its content to @file_content' do + stub_request(:get, 'http://www.example.com/test_file.txt').to_return(body: 'This is just a test file!') + analyzer = ShakespeareAnalyzer.new('http://www.example.com/test_file.txt') + expect(analyzer.file_content).to eq 'This is just a test file!' + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..1926339 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1 @@ +require 'webmock/rspec' diff --git a/spec/test_files/test_file.txt b/spec/test_files/test_file.txt new file mode 100644 index 0000000..9629995 --- /dev/null +++ b/spec/test_files/test_file.txt @@ -0,0 +1 @@ +This is just a test file! From 8bf4aac2890d873404e7d63d39ddd74da1f23b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Thu, 12 Sep 2013 16:10:22 +0200 Subject: [PATCH 03/21] Add XmlParser and possibility to count lines by speaker --- Gemfile | 1 + Gemfile.lock | 4 ++++ lib/xml_parser.rb | 11 +++++++++++ spec/test_files/test.xml | 36 +++++++++++++++++++++++++++++++++++ spec/test_files/test_file.txt | 1 - spec/xml_parser_spec.rb | 14 ++++++++++++++ 6 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 lib/xml_parser.rb create mode 100644 spec/test_files/test.xml delete mode 100644 spec/test_files/test_file.txt create mode 100644 spec/xml_parser_spec.rb diff --git a/Gemfile b/Gemfile index 770e30c..4bcd54c 100644 --- a/Gemfile +++ b/Gemfile @@ -3,3 +3,4 @@ ruby '2.0.0' gem 'rspec' gem 'webmock' +gem 'nokogiri' diff --git a/Gemfile.lock b/Gemfile.lock index 530ebff..b84ea02 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,6 +5,9 @@ GEM crack (0.4.1) safe_yaml (~> 0.9.0) diff-lcs (1.2.4) + mini_portile (0.5.1) + nokogiri (1.6.0) + mini_portile (~> 0.5.0) rspec (2.14.1) rspec-core (~> 2.14.0) rspec-expectations (~> 2.14.0) @@ -22,5 +25,6 @@ PLATFORMS ruby DEPENDENCIES + nokogiri rspec webmock diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb new file mode 100644 index 0000000..ebd8e32 --- /dev/null +++ b/lib/xml_parser.rb @@ -0,0 +1,11 @@ +require 'nokogiri' + +class XmlParser + def initialize(xml) + @doc = Nokogiri.XML(xml) + end + + def lines_by_speaker(speaker) + @doc.css("PLAY SPEAKER:contains('#{speaker}') ~ LINE").count + end +end diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml new file mode 100644 index 0000000..f9d4cba --- /dev/null +++ b/spec/test_files/test.xml @@ -0,0 +1,36 @@ + + + + + + + + OneLiner + Nothing more to say. + + + TwoLiner + Hello! + World! + + + ThreeLiners + One + + + + + + + ThreeLiner + Two + + + + + ThreeLiner + Three! + + + + diff --git a/spec/test_files/test_file.txt b/spec/test_files/test_file.txt deleted file mode 100644 index 9629995..0000000 --- a/spec/test_files/test_file.txt +++ /dev/null @@ -1 +0,0 @@ -This is just a test file! diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb new file mode 100644 index 0000000..b3a5355 --- /dev/null +++ b/spec/xml_parser_spec.rb @@ -0,0 +1,14 @@ +require 'spec_helper' +require 'xml_parser' + +describe XmlParser do + describe '#lines_by_speaker' do + it 'it returns lines count by speaker' do + filename = File.dirname(__FILE__) + '/test_files/test.xml' + xml_parser = XmlParser.new(File.read(filename)) + expect(xml_parser.lines_by_speaker('OneLiner')).to be 1 + expect(xml_parser.lines_by_speaker('TwoLiner')).to be 2 + expect(xml_parser.lines_by_speaker('ThreeLiner')).to be 3 + end + end +end From 30afc47071443cd104240c31617146ad740696bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Thu, 12 Sep 2013 16:27:34 +0200 Subject: [PATCH 04/21] Add possibility to retrieve all speakers * with speaker method in XmlParser object --- lib/xml_parser.rb | 8 ++++++++ spec/test_files/test.xml | 9 +++++++++ spec/xml_parser_spec.rb | 8 ++++++++ 3 files changed, 25 insertions(+) diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index ebd8e32..59f66ce 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -8,4 +8,12 @@ def initialize(xml) def lines_by_speaker(speaker) @doc.css("PLAY SPEAKER:contains('#{speaker}') ~ LINE").count end + + def speakers + speakers = [] + @doc.css('PLAY PERSONAE PERSONA').each do |speaker| + speakers << speaker.text + end + speakers + end end diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml index f9d4cba..5812acb 100644 --- a/spec/test_files/test.xml +++ b/spec/test_files/test.xml @@ -2,6 +2,15 @@ + + Speakers + OneLiner + + TwoLiner + ThreeLiner + More Liners + + diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index b3a5355..4b61d52 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -11,4 +11,12 @@ expect(xml_parser.lines_by_speaker('ThreeLiner')).to be 3 end end + + describe '#speakers' do + it 'returs all speakers in play' do + filename = File.dirname(__FILE__) + '/test_files/test.xml' + xml_parser = XmlParser.new(File.read(filename)) + expect(xml_parser.speakers).to eq ['OneLiner', 'TwoLiner', 'ThreeLiner'] + end + end end From ef5eecefd4bbdf0fb2a159901496eeea2557dd08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 10:28:34 +0200 Subject: [PATCH 05/21] ShakespeareAnalyzer can print correct output parsed from XML file --- lib/shakespeare_analyzer.rb | 16 ++++++++++++++++ spec/shakespeare_analyzer_spec.rb | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 1f86518..09b08b1 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -1,4 +1,5 @@ require 'net/http' +require 'xml_parser' class ShakespeareAnalyzer attr_accessor :file_content @@ -7,9 +8,24 @@ def initialize(uri) @file_content = get_content_from_uri(uri) end + def print_speakers_sorted_by_line_count + speakers_sorted_by_line_count.each do |speaker, lines| + puts "#{lines} #{speaker}" + end + end + private def get_content_from_uri(uri) Net::HTTP.get_response(URI.parse(uri)).body end + + def speakers_sorted_by_line_count + speakers = {} + xml_parser = XmlParser.new(@file_content) + xml_parser.speakers.each do |speaker| + speakers[speaker] = xml_parser.lines_by_speaker(speaker) + end + speakers.sort_by { |_key, value| value }.reverse + end end diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index 3b5000f..3a93c1a 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -1,5 +1,6 @@ require 'spec_helper' require 'shakespeare_analyzer' +require 'stringio' describe ShakespeareAnalyzer do describe '#initialize' do @@ -9,4 +10,24 @@ expect(analyzer.file_content).to eq 'This is just a test file!' end end + + describe '#print_speakers_sorted_by_line_count' do + it 'prints list of speakers sorted by line count' do + test_file = File.dirname(__FILE__) + '/test_files/test.xml' + stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) + analyzer = ShakespeareAnalyzer.new('http://www.example.com/test.xml') + output = capture_stdout { analyzer.print_speakers_sorted_by_line_count } + expect(output).to eq "3 ThreeLiner\n2 TwoLiner\n1 OneLiner\n" + end + end +end + +def capture_stdout &block + old_stdout = $stdout + fake_stdout = StringIO.new + $stdout = fake_stdout + block.call + fake_stdout.string +ensure + $stdout = old_stdout end From 10e57b9bb243adeb1f4d54eb38e550ad13adcaef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 12:47:34 +0200 Subject: [PATCH 06/21] Replace net/http with open-uri and add proxy support --- lib/shakespeare_analyzer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 09b08b1..3d23bdc 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -1,5 +1,5 @@ -require 'net/http' require 'xml_parser' +require 'open-uri' class ShakespeareAnalyzer attr_accessor :file_content @@ -17,7 +17,7 @@ def print_speakers_sorted_by_line_count private def get_content_from_uri(uri) - Net::HTTP.get_response(URI.parse(uri)).body + open(uri, proxy: ENV['http_proxy']).read end def speakers_sorted_by_line_count From 22fd85a401265e897cc29dd31c3595493f48a568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 13:36:09 +0200 Subject: [PATCH 07/21] Change method how to find all speakers Declared PERSONAS don't mach SPEAKER in SPEECHEs, for example Three Witches. vs. First Witch ... whole xml: http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml --- lib/xml_parser.rb | 4 ++-- spec/test_files/test.xml | 11 +---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 59f66ce..9234c83 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -11,9 +11,9 @@ def lines_by_speaker(speaker) def speakers speakers = [] - @doc.css('PLAY PERSONAE PERSONA').each do |speaker| + @doc.css('PLAY SPEAKER').each do |speaker| speakers << speaker.text end - speakers + speakers.uniq end end diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml index 5812acb..8709cb8 100644 --- a/spec/test_files/test.xml +++ b/spec/test_files/test.xml @@ -2,15 +2,6 @@ - - Speakers - OneLiner - - TwoLiner - ThreeLiner - More Liners - - @@ -23,7 +14,7 @@ World! - ThreeLiners + ThreeLiner One From 4de95a0b9413b294611291697e8702b1f389d145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 13:58:46 +0200 Subject: [PATCH 08/21] shakespeare_analyzer.rb can be now run as script * run it with `ruby lib/shakespeare_analyzer.rb` --- lib/shakespeare_analyzer.rb | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 3d23bdc..8c3394f 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -1,17 +1,12 @@ -require 'xml_parser' require 'open-uri' +require_relative 'xml_parser' class ShakespeareAnalyzer attr_accessor :file_content def initialize(uri) - @file_content = get_content_from_uri(uri) - end - - def print_speakers_sorted_by_line_count - speakers_sorted_by_line_count.each do |speaker, lines| - puts "#{lines} #{speaker}" - end + @file_content = get_content_from_uri(uri) + print_speakers_sorted_by_line_count end private @@ -28,4 +23,14 @@ def speakers_sorted_by_line_count end speakers.sort_by { |_key, value| value }.reverse end + + def print_speakers_sorted_by_line_count + speakers_sorted_by_line_count.each do |speaker, lines| + puts "#{lines} #{speaker}" + end + end +end + +if __FILE__ == $0 + ShakespeareAnalyzer.new('http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml') end From c992144c3e5bde5bf099173ffb770fc1e11f96ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 14:04:42 +0200 Subject: [PATCH 09/21] Add forgotten file from previous commit --- spec/shakespeare_analyzer_spec.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index 3a93c1a..d2fddce 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -9,14 +9,11 @@ analyzer = ShakespeareAnalyzer.new('http://www.example.com/test_file.txt') expect(analyzer.file_content).to eq 'This is just a test file!' end - end - describe '#print_speakers_sorted_by_line_count' do it 'prints list of speakers sorted by line count' do test_file = File.dirname(__FILE__) + '/test_files/test.xml' stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) - analyzer = ShakespeareAnalyzer.new('http://www.example.com/test.xml') - output = capture_stdout { analyzer.print_speakers_sorted_by_line_count } + output = capture_stdout { ShakespeareAnalyzer.new('http://www.example.com/test.xml') } expect(output).to eq "3 ThreeLiner\n2 TwoLiner\n1 OneLiner\n" end end From e123d20343d936ebf2cfb554acb8a673fbfa1d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 14:08:05 +0200 Subject: [PATCH 10/21] Add script output to repository Analyzed file: http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml --- output.txt | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 output.txt diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..f26c8a5 --- /dev/null +++ b/output.txt @@ -0,0 +1,41 @@ +984 MACBETH +265 LADY MACBETH +221 MACDUFF +212 MALCOLM +135 ROSS +113 BANQUO +74 LENNOX +70 DUNCAN +62 First Witch +46 Porter +45 Doctor +41 LADY MACDUFF +39 HECATE +37 SIWARD +35 Sergeant +30 First Murderer +27 Third Witch +27 Second Witch +24 ALL +24 Lord +23 Gentlewoman +23 Messenger +21 ANGUS +20 Son +15 Second Murderer +12 MENTEITH +11 Old Man +11 CAITHNESS +10 DONALBAIN +8 Third Murderer +7 YOUNG SIWARD +5 Third Apparition +5 Servant +5 SEYTON +4 Second Apparition +3 Lords +2 Both Murderers +2 First Apparition +2 FLEANCE +1 Soldiers +1 ATTENDANT From 71030a360b7b969c3e4dc261b52468f31c337ada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 16:58:21 +0200 Subject: [PATCH 11/21] Extract xml_parser method --- lib/shakespeare_analyzer.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 8c3394f..cfc6636 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -17,13 +17,16 @@ def get_content_from_uri(uri) def speakers_sorted_by_line_count speakers = {} - xml_parser = XmlParser.new(@file_content) xml_parser.speakers.each do |speaker| speakers[speaker] = xml_parser.lines_by_speaker(speaker) end speakers.sort_by { |_key, value| value }.reverse end + def xml_parser + XmlParser.new(@file_content) + end + def print_speakers_sorted_by_line_count speakers_sorted_by_line_count.each do |speaker, lines| puts "#{lines} #{speaker}" From 711de4cd221b00fe9660eb6a694712e072c9c289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 17:00:08 +0200 Subject: [PATCH 12/21] Remove baseless attr_accessor for @file_content --- lib/shakespeare_analyzer.rb | 2 -- spec/shakespeare_analyzer_spec.rb | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index cfc6636..f1b9e20 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -2,8 +2,6 @@ require_relative 'xml_parser' class ShakespeareAnalyzer - attr_accessor :file_content - def initialize(uri) @file_content = get_content_from_uri(uri) print_speakers_sorted_by_line_count diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index d2fddce..f5d9264 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -7,7 +7,7 @@ it 'reads provided URI and stores its content to @file_content' do stub_request(:get, 'http://www.example.com/test_file.txt').to_return(body: 'This is just a test file!') analyzer = ShakespeareAnalyzer.new('http://www.example.com/test_file.txt') - expect(analyzer.file_content).to eq 'This is just a test file!' + expect(analyzer.instance_variable_get(:@file_content)).to eq 'This is just a test file!' end it 'prints list of speakers sorted by line count' do From 3558e6e9bd20c211f5ca120492498b92dda4890d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 17:02:03 +0200 Subject: [PATCH 13/21] Add xml_parser caching --- lib/shakespeare_analyzer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index f1b9e20..531bb44 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -22,7 +22,7 @@ def speakers_sorted_by_line_count end def xml_parser - XmlParser.new(@file_content) + @_xml_parser ||= XmlParser.new(@file_content) end def print_speakers_sorted_by_line_count From ec3c3b47e099ccc2b5e009965b0c29f376785dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 17:13:53 +0200 Subject: [PATCH 14/21] Refactor XmlParser * refactor `speakers` method to one line * rename `lines_by_speaker` to `count_lines_by_speaker` --- lib/shakespeare_analyzer.rb | 2 +- lib/xml_parser.rb | 8 ++------ spec/xml_parser_spec.rb | 8 ++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 531bb44..e03920f 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -16,7 +16,7 @@ def get_content_from_uri(uri) def speakers_sorted_by_line_count speakers = {} xml_parser.speakers.each do |speaker| - speakers[speaker] = xml_parser.lines_by_speaker(speaker) + speakers[speaker] = xml_parser.count_lines_by_speaker(speaker) end speakers.sort_by { |_key, value| value }.reverse end diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 9234c83..24e6874 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -5,15 +5,11 @@ def initialize(xml) @doc = Nokogiri.XML(xml) end - def lines_by_speaker(speaker) + def count_lines_by_speaker(speaker) @doc.css("PLAY SPEAKER:contains('#{speaker}') ~ LINE").count end def speakers - speakers = [] - @doc.css('PLAY SPEAKER').each do |speaker| - speakers << speaker.text - end - speakers.uniq + @doc.css('PLAY SPEAKER').map { |speaker| speaker.text }.uniq end end diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 4b61d52..0f232eb 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -2,13 +2,13 @@ require 'xml_parser' describe XmlParser do - describe '#lines_by_speaker' do + describe '#count_lines_by_speaker' do it 'it returns lines count by speaker' do filename = File.dirname(__FILE__) + '/test_files/test.xml' xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.lines_by_speaker('OneLiner')).to be 1 - expect(xml_parser.lines_by_speaker('TwoLiner')).to be 2 - expect(xml_parser.lines_by_speaker('ThreeLiner')).to be 3 + expect(xml_parser.count_lines_by_speaker('OneLiner')).to be 1 + expect(xml_parser.count_lines_by_speaker('TwoLiner')).to be 2 + expect(xml_parser.count_lines_by_speaker('ThreeLiner')).to be 3 end end From 8f4895fea6bbf8bc35a8a827dd278860893ed2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 20:58:09 +0200 Subject: [PATCH 15/21] Add speeches with more speakers to XML test file --- spec/shakespeare_analyzer_spec.rb | 2 +- spec/test_files/test.xml | 6 ++++++ spec/xml_parser_spec.rb | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index f5d9264..bd1f081 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -14,7 +14,7 @@ test_file = File.dirname(__FILE__) + '/test_files/test.xml' stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) output = capture_stdout { ShakespeareAnalyzer.new('http://www.example.com/test.xml') } - expect(output).to eq "3 ThreeLiner\n2 TwoLiner\n1 OneLiner\n" + expect(output).to eq "4 FourLiner\n3 ThreeLiner\n2 TwoLiner\n1 OneLiner\n" end end end diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml index 8709cb8..efc42c6 100644 --- a/spec/test_files/test.xml +++ b/spec/test_files/test.xml @@ -10,6 +10,7 @@ TwoLiner + FourLiner Hello! World! @@ -27,8 +28,13 @@ + ThreeLiner + FourLiner + Three! + + FourLiner Three! diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 0f232eb..16cd5db 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -16,7 +16,7 @@ it 'returs all speakers in play' do filename = File.dirname(__FILE__) + '/test_files/test.xml' xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.speakers).to eq ['OneLiner', 'TwoLiner', 'ThreeLiner'] + expect(xml_parser.speakers).to eq ['OneLiner', 'TwoLiner', 'FourLiner', 'ThreeLiner'] end end end From 87bbbf57abd88054a410d7a5f1ff72a92fa37242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 13 Sep 2013 21:10:06 +0200 Subject: [PATCH 16/21] Extract `speakers` method in `ShakespeareAnalyzer` Almost all methods are now one line long. --- lib/shakespeare_analyzer.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index e03920f..0ec40e5 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -13,22 +13,22 @@ def get_content_from_uri(uri) open(uri, proxy: ENV['http_proxy']).read end - def speakers_sorted_by_line_count - speakers = {} - xml_parser.speakers.each do |speaker| - speakers[speaker] = xml_parser.count_lines_by_speaker(speaker) + def print_speakers_sorted_by_line_count + speakers_sorted_by_line_count.each do |speaker, lines| + puts "#{lines} #{speaker}" end + end + + def speakers_sorted_by_line_count speakers.sort_by { |_key, value| value }.reverse end - def xml_parser - @_xml_parser ||= XmlParser.new(@file_content) + def speakers + Hash[*xml_parser.speakers.map { |s| [s, xml_parser.count_lines_by_speaker(s)] }.flatten] end - def print_speakers_sorted_by_line_count - speakers_sorted_by_line_count.each do |speaker, lines| - puts "#{lines} #{speaker}" - end + def xml_parser + @_xml_parser ||= XmlParser.new(@file_content) end end From b8c402561e9b4fc25e0cc246b659b9f63a7cecad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Sun, 15 Sep 2013 20:39:47 +0200 Subject: [PATCH 17/21] Fix bug in `count_lines_by_speaker` method `count_lines_by_speaker` is now looking for exact match in speaker's name. Example XML: A Line! AAA Line! before: count_lines_by_speaker('A') == 2 now: count_lines_by_speaker('A') == 1 --- lib/xml_parser.rb | 2 +- spec/shakespeare_analyzer_spec.rb | 2 +- spec/test_files/test.xml | 2 +- spec/xml_parser_spec.rb | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 24e6874..6c7f2ba 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -6,7 +6,7 @@ def initialize(xml) end def count_lines_by_speaker(speaker) - @doc.css("PLAY SPEAKER:contains('#{speaker}') ~ LINE").count + @doc.css("PLAY SPEECH:has(SPEAKER[text()='#{speaker}']) LINE").count end def speakers diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index bd1f081..6a465cc 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -14,7 +14,7 @@ test_file = File.dirname(__FILE__) + '/test_files/test.xml' stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) output = capture_stdout { ShakespeareAnalyzer.new('http://www.example.com/test.xml') } - expect(output).to eq "4 FourLiner\n3 ThreeLiner\n2 TwoLiner\n1 OneLiner\n" + expect(output).to eq "4 FourLiner\n3 ThreeLiner\n2 TwoLiner\n1 Liner\n" end end end diff --git a/spec/test_files/test.xml b/spec/test_files/test.xml index efc42c6..d909453 100644 --- a/spec/test_files/test.xml +++ b/spec/test_files/test.xml @@ -5,7 +5,7 @@ - OneLiner + Liner Nothing more to say. diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 16cd5db..0fd1f6c 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -6,7 +6,7 @@ it 'it returns lines count by speaker' do filename = File.dirname(__FILE__) + '/test_files/test.xml' xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.count_lines_by_speaker('OneLiner')).to be 1 + expect(xml_parser.count_lines_by_speaker('Liner')).to be 1 expect(xml_parser.count_lines_by_speaker('TwoLiner')).to be 2 expect(xml_parser.count_lines_by_speaker('ThreeLiner')).to be 3 end @@ -16,7 +16,7 @@ it 'returs all speakers in play' do filename = File.dirname(__FILE__) + '/test_files/test.xml' xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.speakers).to eq ['OneLiner', 'TwoLiner', 'FourLiner', 'ThreeLiner'] + expect(xml_parser.speakers).to eq ['Liner', 'TwoLiner', 'FourLiner', 'ThreeLiner'] end end end From c544b4ffb89b69e60f8cb9cd5211650ddec5084b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Sun, 15 Sep 2013 20:44:34 +0200 Subject: [PATCH 18/21] Update output.txt (for MACBETH) --- output.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/output.txt b/output.txt index f26c8a5..ea84e66 100644 --- a/output.txt +++ b/output.txt @@ -1,7 +1,7 @@ -984 MACBETH +719 MACBETH 265 LADY MACBETH -221 MACDUFF 212 MALCOLM +180 MACDUFF 135 ROSS 113 BANQUO 74 LENNOX @@ -11,15 +11,15 @@ 45 Doctor 41 LADY MACDUFF 39 HECATE -37 SIWARD 35 Sergeant 30 First Murderer +30 SIWARD 27 Third Witch 27 Second Witch 24 ALL -24 Lord 23 Gentlewoman 23 Messenger +21 Lord 21 ANGUS 20 Son 15 Second Murderer @@ -30,12 +30,12 @@ 8 Third Murderer 7 YOUNG SIWARD 5 Third Apparition -5 Servant 5 SEYTON +5 Servant 4 Second Apparition 3 Lords -2 Both Murderers 2 First Apparition 2 FLEANCE -1 Soldiers +2 Both Murderers 1 ATTENDANT +1 Soldiers From 387ca94deb85b4460d2d67987e6a5c6b39bac8d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 20 Sep 2013 20:39:33 +0200 Subject: [PATCH 19/21] Extract part for running as script from lib/ to bin/ --- bin/shakespeare_analyzer.rb | 8 ++++++++ lib/shakespeare_analyzer.rb | 7 +++---- spec/shakespeare_analyzer_spec.rb | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) create mode 100755 bin/shakespeare_analyzer.rb diff --git a/bin/shakespeare_analyzer.rb b/bin/shakespeare_analyzer.rb new file mode 100755 index 0000000..6634f27 --- /dev/null +++ b/bin/shakespeare_analyzer.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby + +require_relative '../lib/shakespeare_analyzer.rb' + +DEFAULT_XML_URL = 'http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml' + +analyzer = ShakespeareAnalyzer.new(DEFAULT_XML_URL) +analyzer.run diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index 0ec40e5..fad9603 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -4,6 +4,9 @@ class ShakespeareAnalyzer def initialize(uri) @file_content = get_content_from_uri(uri) + end + + def run print_speakers_sorted_by_line_count end @@ -31,7 +34,3 @@ def xml_parser @_xml_parser ||= XmlParser.new(@file_content) end end - -if __FILE__ == $0 - ShakespeareAnalyzer.new('http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml') -end diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb index 6a465cc..072d6f4 100644 --- a/spec/shakespeare_analyzer_spec.rb +++ b/spec/shakespeare_analyzer_spec.rb @@ -9,11 +9,14 @@ analyzer = ShakespeareAnalyzer.new('http://www.example.com/test_file.txt') expect(analyzer.instance_variable_get(:@file_content)).to eq 'This is just a test file!' end + end + describe '#run' do it 'prints list of speakers sorted by line count' do test_file = File.dirname(__FILE__) + '/test_files/test.xml' stub_request(:get, 'http://www.example.com/test.xml').to_return(body: File.read(test_file)) - output = capture_stdout { ShakespeareAnalyzer.new('http://www.example.com/test.xml') } + analyzer = ShakespeareAnalyzer.new('http://www.example.com/test.xml') + output = capture_stdout { analyzer.run } expect(output).to eq "4 FourLiner\n3 ThreeLiner\n2 TwoLiner\n1 Liner\n" end end From e25e5a64643b52febbd9e21677acf70e62ef6444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 20 Sep 2013 21:13:55 +0200 Subject: [PATCH 20/21] Move logic from shakespeare_analyzer class to xml_parser class --- lib/shakespeare_analyzer.rb | 10 +--------- lib/xml_parser.rb | 14 ++++++++++++-- spec/xml_parser_spec.rb | 16 +++------------- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb index fad9603..87145b8 100644 --- a/lib/shakespeare_analyzer.rb +++ b/lib/shakespeare_analyzer.rb @@ -17,19 +17,11 @@ def get_content_from_uri(uri) end def print_speakers_sorted_by_line_count - speakers_sorted_by_line_count.each do |speaker, lines| + xml_parser.speakers_sorted_by_line_count.each do |speaker, lines| puts "#{lines} #{speaker}" end end - def speakers_sorted_by_line_count - speakers.sort_by { |_key, value| value }.reverse - end - - def speakers - Hash[*xml_parser.speakers.map { |s| [s, xml_parser.count_lines_by_speaker(s)] }.flatten] - end - def xml_parser @_xml_parser ||= XmlParser.new(@file_content) end diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 6c7f2ba..f6453c7 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -5,11 +5,21 @@ def initialize(xml) @doc = Nokogiri.XML(xml) end - def count_lines_by_speaker(speaker) - @doc.css("PLAY SPEECH:has(SPEAKER[text()='#{speaker}']) LINE").count + def speakers_sorted_by_line_count + speakers_with_line_count.sort_by { |_key, value| value }.reverse + end + + private + + def speakers_with_line_count + Hash[*speakers.map { |speaker| [speaker, count_lines_by_speaker(speaker)] }.flatten] end def speakers @doc.css('PLAY SPEAKER').map { |speaker| speaker.text }.uniq end + + def count_lines_by_speaker(speaker) + @doc.css("PLAY SPEECH:has(SPEAKER[text()='#{speaker}']) LINE").count + end end diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 0fd1f6c..88009e6 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -2,21 +2,11 @@ require 'xml_parser' describe XmlParser do - describe '#count_lines_by_speaker' do - it 'it returns lines count by speaker' do + describe '#speakers_sorted_by_line_count' do + it 'returns hash of speakers with line count sorted by line count' do filename = File.dirname(__FILE__) + '/test_files/test.xml' xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.count_lines_by_speaker('Liner')).to be 1 - expect(xml_parser.count_lines_by_speaker('TwoLiner')).to be 2 - expect(xml_parser.count_lines_by_speaker('ThreeLiner')).to be 3 - end - end - - describe '#speakers' do - it 'returs all speakers in play' do - filename = File.dirname(__FILE__) + '/test_files/test.xml' - xml_parser = XmlParser.new(File.read(filename)) - expect(xml_parser.speakers).to eq ['Liner', 'TwoLiner', 'FourLiner', 'ThreeLiner'] + expect(xml_parser.speakers_sorted_by_line_count.to_a).to eq({'FourLiner' => 4, 'ThreeLiner' => 3, 'TwoLiner' => 2, 'Liner' => 1}.to_a) end end end From b811d05a08f1b423f72f951e28c02b0d3b0bec0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20B=C3=B3na?= Date: Fri, 20 Sep 2013 21:29:57 +0200 Subject: [PATCH 21/21] Add missing test for XmlParser#initialize --- spec/xml_parser_spec.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 88009e6..81248dd 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -2,6 +2,13 @@ require 'xml_parser' describe XmlParser do + describe '#initialize' do + it 'instantiates @doc with Nokogiri XML parser' do + doc = XmlParser.new(nil).instance_variable_get(:@doc) + expect(doc.instance_of?(Nokogiri::XML::Document)).to be true + end + end + describe '#speakers_sorted_by_line_count' do it 'returns hash of speakers with line count sorted by line count' do filename = File.dirname(__FILE__) + '/test_files/test.xml'