This repository was archived by the owner on Mar 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoa_learningenglish.rb
72 lines (64 loc) · 1.82 KB
/
voa_learningenglish.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
require 'kindlecook'
require 'date'
class VOALearningEnglish < KindleCook
def initialize()
argv = ARGV.select { |arg| not arg.start_with?("-") }
if argv.empty?
@end_date = nil
else
@end_date = Date.parse(argv[0])
$stdout.puts "End date: #{@end_date}"
end
end
def root_url
"https://learningenglish.voanews.com"
end
def interval
2
end
def prepare
sections = []
section = nil
last_date = nil
# 4693: Level One
stop = false
(0..4).each do |page|
html = fetch_html("#{root_url}/z/4693?p=#{page}")
@title = html.at_css(".pg-title").text() if page == 0
html.css(".content-body .content").each do |div|
date = Date.parse(div.at_css(".date").text())
if (not @end_date.nil?) and @end_date > date
stop = true
break
end
if last_date.nil? || last_date != date
sections.push(section) unless section.nil?
section = {:title => date.to_s, :articles => []}
last_date = date
end
a = div.at_css("a")
file_name = a["href"].split("/").last
save_article(file_name) do |f|
article = fetch_html(a["href"])
f.write(article.at_css(".pg-title"))
post = article.at_css(".content-offset")
post.search(".embed-player-only").remove # remove mp3 player
post.search("#comments").remove # remove comments
f.write(post)
end
section[:articles].push({:title => a.at_css(".title").text(), :file => file_name})
end
break if stop
end
sections.push(section) unless section.nil?
# 旧在前, 新在后
sections.reverse
end
def document
{
"title" => "VOA Learning English - #{@title}",
"author" => "VOA Learning English",
}
end
end
VOALearningEnglish.cook