From 77971d8f72a1a1c8cd23cf10ce434f4c424b5b06 Mon Sep 17 00:00:00 2001 From: Yang Chung Date: Thu, 27 Feb 2014 16:46:28 -0800 Subject: [PATCH 1/5] initial commit --- Gemfile | 2 + Gemfile.lock | 19 ++++++ lib/shakespeare_analyzer.rb | 22 ++++++ lib/xml_parser.rb | 26 +++++++ spec/fixtures/sample.xml | 110 ++++++++++++++++++++++++++++++ spec/shakespeare_analyzer_spec.rb | 33 +++++++++ spec/xml_parser_spec.rb | 49 +++++++++++++ 7 files changed, 261 insertions(+) create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 lib/shakespeare_analyzer.rb create mode 100644 lib/xml_parser.rb create mode 100644 spec/fixtures/sample.xml create mode 100644 spec/shakespeare_analyzer_spec.rb create mode 100644 spec/xml_parser_spec.rb diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..c8a1447 --- /dev/null +++ b/Gemfile @@ -0,0 +1,2 @@ +gem 'nokogiri' +gem 'webmock' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..94c53b9 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,19 @@ +GEM + specs: + addressable (2.3.5) + crack (0.4.1) + safe_yaml (~> 0.9.0) + mini_portile (0.5.2) + nokogiri (1.6.1) + mini_portile (~> 0.5.0) + safe_yaml (0.9.7) + webmock (1.11.0) + addressable (>= 2.2.7) + crack (>= 0.3.2) + +PLATFORMS + ruby + +DEPENDENCIES + nokogiri + webmock diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb new file mode 100644 index 0000000..5a8d9e5 --- /dev/null +++ b/lib/shakespeare_analyzer.rb @@ -0,0 +1,22 @@ +require 'xml_parser' + +class ShakespeareAnalyzer + + attr_reader :xml_doc + REMOTE_XML_URL = "http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml" + + def initialize + @xml_doc = XmlParser.new(REMOTE_XML_URL) + end + + def parse_speeches + @xml_doc.parse_speeches + end + + def print_speech_counts + @xml_doc.speech_count.each do |name, count| + puts "#{count} #{name}" + end + end + +end \ No newline at end of file diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb new file mode 100644 index 0000000..56482cb --- /dev/null +++ b/lib/xml_parser.rb @@ -0,0 +1,26 @@ +require 'nokogiri' +require 'open-uri' + +class XmlParser + + attr_reader :doc, :speech_count + + def initialize(url_or_path) + @doc = Nokogiri::XML(open(url_or_path)) + @speech_count = {} + end + + def count_lines(element) + node_set = element.xpath(".//LINE") + node_set.count + end + + def parse_speeches + @doc.xpath("//SPEECH").each do |speech| + speaker = speech.xpath(".//SPEAKER").text + @speech_count[speaker] ||= 0 + @speech_count[speaker] += count_lines(speech) + end + end + +end \ No newline at end of file diff --git a/spec/fixtures/sample.xml b/spec/fixtures/sample.xml new file mode 100644 index 0000000..2808851 --- /dev/null +++ b/spec/fixtures/sample.xml @@ -0,0 +1,110 @@ + +The Tragedy of Macbeth + +

+Text placed in the public domain by Moby Lexical Tools, 1992. +

+

SGML markup by Jon Bosak, 1992-1994.

+

XML version by Jon Bosak, 1996-1998.

+

+This work may be freely copied and distributed worldwide. +

+
+ +Dramatis Personae +DUNCAN, king of Scotland. + +MALCOLM +DONALBAIN +his sons. + + +MACBETH +BANQUO +generals of the king's army. + + +MACDUFF +LENNOX +ROSS +MENTEITH +ANGUS +CAITHNESS +noblemen of Scotland. + +FLEANCE, son to Banquo. + +SIWARD, Earl of Northumberland, general of the English forces. + +YOUNG SIWARD, his son. +SEYTON, an officer attending on Macbeth. +Boy, son to Macduff. +An English Doctor. +A Scotch Doctor. +A Soldier. +A Porter. +An Old Man. +LADY MACBETH +LADY MACDUFF +Gentlewoman attending on Lady Macbeth. +HECATE +Three Witches. +Apparitions. + +Lords, Gentlemen, Officers, Soldiers, Murderers, Attendants, and Messengers. + + +SCENE Scotland: England. +MACBETH + +ACT I + +SCENE I. A desert place. +Thunder and lightning. Enter three Witches + +First Witch +When shall we three meet again +In thunder, lightning, or in rain? + + +Second Witch +When the hurlyburly's done, +When the battle's lost and won. + + +Third Witch +That will be ere the set of sun. + + +First Witch +Where the place? + + +Second Witch +Upon the heath. + + +Third Witch +There to meet with Macbeth. + + +First Witch +I come, Graymalkin! + + +Second Witch +Paddock calls. + + +Third Witch +Anon. + + +ALL +Fair is foul, and foul is fair: +Hover through the fog and filthy air. + +Exeunt + + +
\ No newline at end of file diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb new file mode 100644 index 0000000..792db99 --- /dev/null +++ b/spec/shakespeare_analyzer_spec.rb @@ -0,0 +1,33 @@ +require_relative '../lib/shakespeare_analyzer' +require 'rspec' + +describe ShakespeareAnalyzer do + + it { ShakespeareAnalyzer::REMOTE_XML_URL.should_not be_nil } + + describe ".new" do + it "should instantiate with XmlParser instance" do + parser = double(XmlParser) + XmlParser.should_receive(:new).with(ShakespeareAnalyzer::REMOTE_XML_URL).and_return(parser) + sa = ShakespeareAnalyzer.new + sa.xml_doc.should == parser + end + end + + describe "#print_result" do + before do + @parser = double(XmlParser) + XmlParser.stub(:new).and_return(@parser) + @parser.stub(:speech_count).and_return({'Macbeth' => 543, 'Banquo' => 345, 'Duncan' => 220}) + end + + it "should print results" do + STDOUT.should_receive(:puts).with("543 Macbeth") + STDOUT.should_receive(:puts).with("345 Banquo") + STDOUT.should_receive(:puts).with("220 Duncan") + sa = ShakespeareAnalyzer.new + sa.print_speech_counts + end + end + +end \ No newline at end of file diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb new file mode 100644 index 0000000..e2fceb8 --- /dev/null +++ b/spec/xml_parser_spec.rb @@ -0,0 +1,49 @@ +require_relative '../lib/xml_parser' +require 'rspec' +require 'webmock/rspec' + +describe XmlParser do + + let(:xml_doc) { File.dirname(__FILE__) + '/fixtures/sample.xml' } + + before do + # stub_request(:get, XmlParser::REMOTE_XML_URL).to_return(:body => xml_doc, :status => 200) + end + + describe '.new' do + it "should call Nokogiri::XML open to parse remote XML file" do + parser = XmlParser.new(xml_doc) + parser.doc.should_not be_nil + parser.doc.is_a?(Nokogiri::XML::Document).should be_true + end + + it "should initialize speech_count instance variable with empty hash" do + parser = XmlParser.new(xml_doc) + parser.speech_count.should == {} + end + end + + describe '#count_lines' do + it "should count # of LINE children given the XML element" do + element = double() + node_set = double() + element.should_receive(:xpath).with(".//LINE").and_return(node_set) + node_set.should_receive(:count).and_return(5) + + parser = XmlParser.new(xml_doc) + count = parser.count_lines(element) + count.should == 5 + end + end + + describe '#parse_speeches' do + before do + @parser = XmlParser.new(xml_doc) + end + it "should get nodes with SPEECH name and iterate through them" do + @parser.parse_speeches + @parser.speech_count.should_not == {} + end + end + +end \ No newline at end of file From f5753c632205965346175ee7d23f164b957c0dda Mon Sep 17 00:00:00 2001 From: Yang Chung Date: Thu, 27 Feb 2014 21:52:23 -0800 Subject: [PATCH 2/5] Remove webmock and use only XmlParser class --- Gemfile | 5 ++-- Gemfile.lock | 9 +----- lib/shakespeare_analyzer.rb | 22 --------------- lib/xml_parser.rb | 10 +++++++ spec/shakespeare_analyzer_spec.rb | 33 ---------------------- spec/xml_parser_spec.rb | 47 ++++++++++++++++++++----------- 6 files changed, 45 insertions(+), 81 deletions(-) delete mode 100644 lib/shakespeare_analyzer.rb delete mode 100644 spec/shakespeare_analyzer_spec.rb diff --git a/Gemfile b/Gemfile index c8a1447..a077979 100644 --- a/Gemfile +++ b/Gemfile @@ -1,2 +1,3 @@ -gem 'nokogiri' -gem 'webmock' \ No newline at end of file +source 'https://rubygems.org' + +gem 'nokogiri' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index 94c53b9..5df21ba 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,19 +1,12 @@ GEM + remote: https://rubygems.org/ specs: - addressable (2.3.5) - crack (0.4.1) - safe_yaml (~> 0.9.0) mini_portile (0.5.2) nokogiri (1.6.1) mini_portile (~> 0.5.0) - safe_yaml (0.9.7) - webmock (1.11.0) - addressable (>= 2.2.7) - crack (>= 0.3.2) PLATFORMS ruby DEPENDENCIES nokogiri - webmock diff --git a/lib/shakespeare_analyzer.rb b/lib/shakespeare_analyzer.rb deleted file mode 100644 index 5a8d9e5..0000000 --- a/lib/shakespeare_analyzer.rb +++ /dev/null @@ -1,22 +0,0 @@ -require 'xml_parser' - -class ShakespeareAnalyzer - - attr_reader :xml_doc - REMOTE_XML_URL = "http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml" - - def initialize - @xml_doc = XmlParser.new(REMOTE_XML_URL) - end - - def parse_speeches - @xml_doc.parse_speeches - end - - def print_speech_counts - @xml_doc.speech_count.each do |name, count| - puts "#{count} #{name}" - end - end - -end \ No newline at end of file diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 56482cb..40b17c4 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -23,4 +23,14 @@ def parse_speeches end end + def sort_speech_count + (@speech_count.sort_by {|k,v| v}).reverse + end + + def print_result + sort_speech_count.each do |result| + puts "#{result[1]} #{result[0]}" + end + end + end \ No newline at end of file diff --git a/spec/shakespeare_analyzer_spec.rb b/spec/shakespeare_analyzer_spec.rb deleted file mode 100644 index 792db99..0000000 --- a/spec/shakespeare_analyzer_spec.rb +++ /dev/null @@ -1,33 +0,0 @@ -require_relative '../lib/shakespeare_analyzer' -require 'rspec' - -describe ShakespeareAnalyzer do - - it { ShakespeareAnalyzer::REMOTE_XML_URL.should_not be_nil } - - describe ".new" do - it "should instantiate with XmlParser instance" do - parser = double(XmlParser) - XmlParser.should_receive(:new).with(ShakespeareAnalyzer::REMOTE_XML_URL).and_return(parser) - sa = ShakespeareAnalyzer.new - sa.xml_doc.should == parser - end - end - - describe "#print_result" do - before do - @parser = double(XmlParser) - XmlParser.stub(:new).and_return(@parser) - @parser.stub(:speech_count).and_return({'Macbeth' => 543, 'Banquo' => 345, 'Duncan' => 220}) - end - - it "should print results" do - STDOUT.should_receive(:puts).with("543 Macbeth") - STDOUT.should_receive(:puts).with("345 Banquo") - STDOUT.should_receive(:puts).with("220 Duncan") - sa = ShakespeareAnalyzer.new - sa.print_speech_counts - end - end - -end \ No newline at end of file diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index e2fceb8..67135d2 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -5,21 +5,17 @@ describe XmlParser do let(:xml_doc) { File.dirname(__FILE__) + '/fixtures/sample.xml' } - - before do - # stub_request(:get, XmlParser::REMOTE_XML_URL).to_return(:body => xml_doc, :status => 200) - end describe '.new' do it "should call Nokogiri::XML open to parse remote XML file" do - parser = XmlParser.new(xml_doc) - parser.doc.should_not be_nil - parser.doc.is_a?(Nokogiri::XML::Document).should be_true + parsed = XmlParser.new(xml_doc) + parsed.doc.should_not be_nil + parsed.doc.is_a?(Nokogiri::XML::Document).should be_true end it "should initialize speech_count instance variable with empty hash" do - parser = XmlParser.new(xml_doc) - parser.speech_count.should == {} + parsed = XmlParser.new(xml_doc) + parsed.speech_count.should == {} end end @@ -30,19 +26,38 @@ element.should_receive(:xpath).with(".//LINE").and_return(node_set) node_set.should_receive(:count).and_return(5) - parser = XmlParser.new(xml_doc) - count = parser.count_lines(element) + parsed = XmlParser.new(xml_doc) + count = parsed.count_lines(element) count.should == 5 end end describe '#parse_speeches' do - before do - @parser = XmlParser.new(xml_doc) - end it "should get nodes with SPEECH name and iterate through them" do - @parser.parse_speeches - @parser.speech_count.should_not == {} + parsed = XmlParser.new(xml_doc) + parsed.parse_speeches + parsed.speech_count.should_not == {} + end + end + + describe '#sort_speech_count' do + it "should returned speech_count hash sorted by speech count" do + parsed = XmlParser.new(xml_doc) + parsed.parse_speeches + result = parsed.sort_speech_count + result[0][1].should > result[-1][1] + end + end + + describe "#print_result" do + it "should print results" do + STDOUT.should_receive(:puts).with("543 Macbeth") + STDOUT.should_receive(:puts).with("345 Banquo") + STDOUT.should_receive(:puts).with("220 Duncan") + + parsed = XmlParser.new(xml_doc) + parsed.should_receive(:sort_speech_count).and_return([['Macbeth', 543], ['Banquo', 345], ['Duncan',220]]) + parsed.print_result end end From 18f0542f519b12296a8b6c41ba0498195cfebf53 Mon Sep 17 00:00:00 2001 From: Yang Chung Date: Thu, 27 Feb 2014 21:53:39 -0800 Subject: [PATCH 3/5] Use shakespeare_analyzer.rb to instantiate XmlParser and call print_result --- shakespeare_analyzer.rb | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 shakespeare_analyzer.rb diff --git a/shakespeare_analyzer.rb b/shakespeare_analyzer.rb new file mode 100644 index 0000000..8248eb6 --- /dev/null +++ b/shakespeare_analyzer.rb @@ -0,0 +1,5 @@ +require_relative 'lib/xml_parser' + +xml_doc = XmlParser.new("http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml") +xml_doc.parse_speeches +xml_doc.print_result \ No newline at end of file From fe6746cb32328b679d441860db49d401b8c81ce7 Mon Sep 17 00:00:00 2001 From: Yang Chung Date: Thu, 27 Feb 2014 21:56:06 -0800 Subject: [PATCH 4/5] Add a result file --- result.txt | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 result.txt diff --git a/result.txt b/result.txt new file mode 100644 index 0000000..d7666dd --- /dev/null +++ b/result.txt @@ -0,0 +1,42 @@ +718 MACBETH +265 LADY MACBETH +212 MALCOLM +180 MACDUFF +135 ROSS +113 BANQUO +73 LENNOX +70 DUNCAN +62 First Witch +46 Porter +45 Doctor +41 LADY MACDUFF +39 HECATE +35 Sergeant +30 First Murderer +30 SIWARD +27 Third Witch +27 Second Witch +24 ALL +23 Gentlewoman +23 Messenger +21 Lord +21 ANGUS +20 Son +15 Second Murderer +12 MENTEITH +11 Old Man +11 CAITHNESS +10 DONALBAIN +8 Third Murderer +7 YOUNG SIWARD +5 Third Apparition +5 SEYTON +5 Servant +4 Second Apparition +3 Lords +2 First Apparition +2 FLEANCE +2 Both Murderers +1 ATTENDANT +1 Soldiers +1 MACBETHLENNOX \ No newline at end of file From 832739b34ca48e7cd096e9cbcd6fe7fb4692d87c Mon Sep 17 00:00:00 2001 From: Yang Chung Date: Fri, 28 Feb 2014 00:00:52 -0800 Subject: [PATCH 5/5] Add get_speakers method to return speaker array --- lib/xml_parser.rb | 14 +++++++++----- result.txt | 7 +++---- spec/xml_parser_spec.rb | 12 ++++++++++++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/lib/xml_parser.rb b/lib/xml_parser.rb index 40b17c4..b69edc7 100644 --- a/lib/xml_parser.rb +++ b/lib/xml_parser.rb @@ -11,15 +11,19 @@ def initialize(url_or_path) end def count_lines(element) - node_set = element.xpath(".//LINE") - node_set.count + element.xpath(".//LINE").count + end + + def get_speakers(element) + element.xpath(".//SPEAKER").map(&:text) end def parse_speeches @doc.xpath("//SPEECH").each do |speech| - speaker = speech.xpath(".//SPEAKER").text - @speech_count[speaker] ||= 0 - @speech_count[speaker] += count_lines(speech) + get_speakers(speech).each do |speaker| + @speech_count[speaker] ||= 0 + @speech_count[speaker] += count_lines(speech) + end end end diff --git a/result.txt b/result.txt index d7666dd..2fbbca0 100644 --- a/result.txt +++ b/result.txt @@ -1,10 +1,10 @@ -718 MACBETH +719 MACBETH 265 LADY MACBETH 212 MALCOLM 180 MACDUFF 135 ROSS 113 BANQUO -73 LENNOX +74 LENNOX 70 DUNCAN 62 First Witch 46 Porter @@ -38,5 +38,4 @@ 2 FLEANCE 2 Both Murderers 1 ATTENDANT -1 Soldiers -1 MACBETHLENNOX \ No newline at end of file +1 Soldiers \ No newline at end of file diff --git a/spec/xml_parser_spec.rb b/spec/xml_parser_spec.rb index 67135d2..c4da674 100644 --- a/spec/xml_parser_spec.rb +++ b/spec/xml_parser_spec.rb @@ -32,6 +32,18 @@ end end + describe '#get_speakers' do + it "should return speakers in an array in the given element" do + element = double() + node_set = double() + element.should_receive(:xpath).with(".//SPEAKER").and_return([node_set, node_set]) + node_set.should_receive(:text).twice.and_return("blah") + + parsed = XmlParser.new(xml_doc) + parsed.get_speakers(element).should == ["blah", "blah"] + end + end + describe '#parse_speeches' do it "should get nodes with SPEECH name and iterate through them" do parsed = XmlParser.new(xml_doc)