Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source 'https://rubygems.org'

gem 'nokogiri'
12 changes: 12 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
GEM
remote: https://rubygems.org/
specs:
mini_portile (0.5.2)
nokogiri (1.6.1)
mini_portile (~> 0.5.0)

PLATFORMS
ruby

DEPENDENCIES
nokogiri
40 changes: 40 additions & 0 deletions lib/xml_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'nokogiri'
require 'open-uri'

class XmlParser

attr_reader :doc, :speech_count

def initialize(url_or_path)
@doc = Nokogiri::XML(open(url_or_path))
@speech_count = {}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could provide a default value for the hash, like so: Hash.new(0). This would let you simplify the code in parse_speeches.

end

def count_lines(element)
element.xpath(".//LINE").count
end

def get_speakers(element)
element.xpath(".//SPEAKER").map(&:text)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .// at the beginning of ".//SPEAKER" may not be necessary. I think it's automatically scoped to the current node.

end

def parse_speeches
@doc.xpath("//SPEECH").each do |speech|
get_speakers(speech).each do |speaker|
@speech_count[speaker] ||= 0
@speech_count[speaker] += count_lines(speech)
end
end
end

def sort_speech_count
(@speech_count.sort_by {|k,v| v}).reverse
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that somebody else's solution used {|k,v| -v}, avoiding the reverse. I think this way is clearer, though, and efficiency isn't a big concern here.

end

def print_result
sort_speech_count.each do |result|
puts "#{result[1]} #{result[0]}"
end
end

end
41 changes: 41 additions & 0 deletions result.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
719 MACBETH
265 LADY MACBETH
212 MALCOLM
180 MACDUFF
135 ROSS
113 BANQUO
74 LENNOX
70 DUNCAN
62 First Witch
46 Porter
45 Doctor
41 LADY MACDUFF
39 HECATE
35 Sergeant
30 First Murderer
30 SIWARD
27 Third Witch
27 Second Witch
24 ALL
23 Gentlewoman
23 Messenger
21 Lord
21 ANGUS
20 Son
15 Second Murderer
12 MENTEITH
11 Old Man
11 CAITHNESS
10 DONALBAIN
8 Third Murderer
7 YOUNG SIWARD
5 Third Apparition
5 SEYTON
5 Servant
4 Second Apparition
3 Lords
2 First Apparition
2 FLEANCE
2 Both Murderers
1 ATTENDANT
1 Soldiers
5 changes: 5 additions & 0 deletions shakespeare_analyzer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
require_relative 'lib/xml_parser'

xml_doc = XmlParser.new("http://www.ibiblio.org/xml/examples/shakespeare/macbeth.xml")
xml_doc.parse_speeches
xml_doc.print_result
110 changes: 110 additions & 0 deletions spec/fixtures/sample.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
<PLAY>
<TITLE>The Tragedy of Macbeth</TITLE>
<FM>
<P>
Text placed in the public domain by Moby Lexical Tools, 1992.
</P>
<P>SGML markup by Jon Bosak, 1992-1994.</P>
<P>XML version by Jon Bosak, 1996-1998.</P>
<P>
This work may be freely copied and distributed worldwide.
</P>
</FM>
<PERSONAE>
<TITLE>Dramatis Personae</TITLE>
<PERSONA>DUNCAN, king of Scotland.</PERSONA>
<PGROUP>
<PERSONA>MALCOLM</PERSONA>
<PERSONA>DONALBAIN</PERSONA>
<GRPDESCR>his sons.</GRPDESCR>
</PGROUP>
<PGROUP>
<PERSONA>MACBETH</PERSONA>
<PERSONA>BANQUO</PERSONA>
<GRPDESCR>generals of the king's army.</GRPDESCR>
</PGROUP>
<PGROUP>
<PERSONA>MACDUFF</PERSONA>
<PERSONA>LENNOX</PERSONA>
<PERSONA>ROSS</PERSONA>
<PERSONA>MENTEITH</PERSONA>
<PERSONA>ANGUS</PERSONA>
<PERSONA>CAITHNESS</PERSONA>
<GRPDESCR>noblemen of Scotland.</GRPDESCR>
</PGROUP>
<PERSONA>FLEANCE, son to Banquo.</PERSONA>
<PERSONA>
SIWARD, Earl of Northumberland, general of the English forces.
</PERSONA>
<PERSONA>YOUNG SIWARD, his son.</PERSONA>
<PERSONA>SEYTON, an officer attending on Macbeth.</PERSONA>
<PERSONA>Boy, son to Macduff.</PERSONA>
<PERSONA>An English Doctor.</PERSONA>
<PERSONA>A Scotch Doctor.</PERSONA>
<PERSONA>A Soldier.</PERSONA>
<PERSONA>A Porter.</PERSONA>
<PERSONA>An Old Man.</PERSONA>
<PERSONA>LADY MACBETH</PERSONA>
<PERSONA>LADY MACDUFF</PERSONA>
<PERSONA>Gentlewoman attending on Lady Macbeth.</PERSONA>
<PERSONA>HECATE</PERSONA>
<PERSONA>Three Witches.</PERSONA>
<PERSONA>Apparitions.</PERSONA>
<PERSONA>
Lords, Gentlemen, Officers, Soldiers, Murderers, Attendants, and Messengers.
</PERSONA>
</PERSONAE>
<SCNDESCR>SCENE Scotland: England.</SCNDESCR>
<PLAYSUBT>MACBETH</PLAYSUBT>
<ACT>
<TITLE>ACT I</TITLE>
<SCENE>
<TITLE>SCENE I. A desert place.</TITLE>
<STAGEDIR>Thunder and lightning. Enter three Witches</STAGEDIR>
<SPEECH>
<SPEAKER>First Witch</SPEAKER>
<LINE>When shall we three meet again</LINE>
<LINE>In thunder, lightning, or in rain?</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Second Witch</SPEAKER>
<LINE>When the hurlyburly's done,</LINE>
<LINE>When the battle's lost and won.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Third Witch</SPEAKER>
<LINE>That will be ere the set of sun.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>First Witch</SPEAKER>
<LINE>Where the place?</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Second Witch</SPEAKER>
<LINE>Upon the heath.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Third Witch</SPEAKER>
<LINE>There to meet with Macbeth.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>First Witch</SPEAKER>
<LINE>I come, Graymalkin!</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Second Witch</SPEAKER>
<LINE>Paddock calls.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>Third Witch</SPEAKER>
<LINE>Anon.</LINE>
</SPEECH>
<SPEECH>
<SPEAKER>ALL</SPEAKER>
<LINE>Fair is foul, and foul is fair:</LINE>
<LINE>Hover through the fog and filthy air.</LINE>
</SPEECH>
<STAGEDIR>Exeunt</STAGEDIR>
</SCENE>
</ACT>
</PLAY>
76 changes: 76 additions & 0 deletions spec/xml_parser_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
require_relative '../lib/xml_parser'
require 'rspec'
require 'webmock/rspec'

describe XmlParser do

let(:xml_doc) { File.dirname(__FILE__) + '/fixtures/sample.xml' }

describe '.new' do
it "should call Nokogiri::XML open to parse remote XML file" do
parsed = XmlParser.new(xml_doc)
parsed.doc.should_not be_nil
parsed.doc.is_a?(Nokogiri::XML::Document).should be_true
end

it "should initialize speech_count instance variable with empty hash" do
parsed = XmlParser.new(xml_doc)
parsed.speech_count.should == {}
end
end

describe '#count_lines' do
it "should count # of LINE children given the XML element" do
element = double()
node_set = double()
element.should_receive(:xpath).with(".//LINE").and_return(node_set)
node_set.should_receive(:count).and_return(5)

parsed = XmlParser.new(xml_doc)
count = parsed.count_lines(element)
count.should == 5
end
end

describe '#get_speakers' do
it "should return speakers in an array in the given element" do
element = double()
node_set = double()
element.should_receive(:xpath).with(".//SPEAKER").and_return([node_set, node_set])
node_set.should_receive(:text).twice.and_return("blah")

parsed = XmlParser.new(xml_doc)
parsed.get_speakers(element).should == ["blah", "blah"]
end
end

describe '#parse_speeches' do
it "should get nodes with SPEECH name and iterate through them" do
parsed = XmlParser.new(xml_doc)
parsed.parse_speeches
parsed.speech_count.should_not == {}
end
end

describe '#sort_speech_count' do
it "should returned speech_count hash sorted by speech count" do
parsed = XmlParser.new(xml_doc)
parsed.parse_speeches
result = parsed.sort_speech_count
result[0][1].should > result[-1][1]
end
end

describe "#print_result" do
it "should print results" do
STDOUT.should_receive(:puts).with("543 Macbeth")
STDOUT.should_receive(:puts).with("345 Banquo")
STDOUT.should_receive(:puts).with("220 Duncan")

parsed = XmlParser.new(xml_doc)
parsed.should_receive(:sort_speech_count).and_return([['Macbeth', 543], ['Banquo', 345], ['Duncan',220]])
parsed.print_result
end
end

end