-
Notifications
You must be signed in to change notification settings - Fork 3
/
scraper.rb
35 lines (34 loc) · 1.14 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# require 'scraperwiki'
# require "open-uri"
# require 'nokogiri'
# require File.dirname(__FILE__) + "/ruby_pdf_helper/scraper"
#
# info_url = "http://daps.planning.wa.gov.au/8.asp"
# url = "http://www.planning.wa.gov.au/daps/data/Current%20DAP%20Applications/Current%20DAP%20Applications.pdf"
#
# doc = Nokogiri::XML(PdfHelper.pdftoxml(open(url) {|f| f.read}))
#
# doc.search('page').each do |p|
# PdfHelper.extract_table_from_pdf_text(p.search('text')).each do |row|
# unless row[0] == 'No'
#
# description = row[4] ? row[4].gsub("\n", "") : nil
# record = {
# "council_reference" => row[0],
# "description" => description,
# "address" => row[4].split("\n").last + ", WA",
# "date_scraped" => Date.today.to_s,
# "info_url" => info_url,
# "comment_url" => info_url,
# }
# begin
# record["date_received"] = Date.strptime(row[5].gsub("//","/").strip, "%d/%m/%Y").to_s if row[5]
# rescue
# puts row[5]
# end
#
# ScraperWiki.save_sqlite(['council_reference'], record)
# end
# end
# end
puts "Scraper is broken. Code is commented out to reduce noise"