Skip to content

Commit

Permalink
Merge pull request #19 from takahashim/v09
Browse files Browse the repository at this point in the history
big (but stepwise) rewriting
  • Loading branch information
takahashim authored Mar 6, 2017
2 parents c5087c9 + fcd26a9 commit b7765e9
Show file tree
Hide file tree
Showing 88 changed files with 14,175 additions and 13,023 deletions.
53 changes: 3 additions & 50 deletions bin/aozora2html
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,6 @@ require 'aozora2html'
require 'optparse'
require "tempfile"

# override Aozora2Html#push_chars
#
# Original Aozora2Html#push_chars does not convert "'" into '''; it's old behaivor
# of CGI.escapeHTML().
#
class Aozora2Html
def push_chars(obj)
if obj.is_a?(Array)
obj.each{|x|
push_chars(x)
}
elsif obj.is_a?(String)
if obj.length == 1
obj = obj.gsub(/[&\"<>]/, {'&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;'})
end
obj.each_char{|x|
push_char(x)
}
else
push_char(obj)
end
end

def dispatch_gaiji
hook = @stream.peek_char(0)
if hook == "[".encode("shift_jis")
read_char
# embed?
command,raw = read_to_nest("]".encode("shift_jis"))
try_emb = kuten2png(command)
if try_emb != command
try_emb
elsif command.match(/U\+([0-9A-F]{4,5})/) && Embed_Gaiji_tag.use_unicode
unicode_num = $1
ch = Embed_Gaiji_tag.new(self, nil, nil, command)
ch.unicode = unicode_num
ch
else
# Unemb
escape_gaiji(command)
end
else
"※".encode("shift_jis")
end
end
end

opt = OptionParser.new("Usage: aozora2html [options] <text file> [<html file>]\n")
opt.on('--gaiji-dir DIR', 'setting gaiji directory')
opt.on('--css-files FILES', 'setting css directory')
Expand All @@ -68,12 +21,12 @@ if options["css-files"]
end

if options["use-jisx0213"]
Embed_Gaiji_tag.use_jisx0213 = true
Accent_tag.use_jisx0213 = true
Aozora2Html::Tag::EmbedGaiji.use_jisx0213 = true
Aozora2Html::Tag::Accent.use_jisx0213 = true
end

if options["use-unicode"]
Embed_Gaiji_tag.use_unicode = true
Aozora2Html::Tag::EmbedGaiji.use_unicode = true
end

if ARGV.size < 1 || ARGV.size > 2
Expand Down
23 changes: 0 additions & 23 deletions lib/accent_tag.rb

This file was deleted.

4 changes: 0 additions & 4 deletions lib/aozora2html.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
require "aozora2html/version"
require "aozora2html/zip"
require "aozora2html/jis2ucs"
require 't2hs'
require 'embed_gaiji_tag'
require 'accent_tag'

## already defined in t2hs.rb
class Aozora2Html
Expand Down
91 changes: 91 additions & 0 deletions lib/aozora2html/accent_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# encoding: utf-8
require 'aozora2html/ruby_buffer'
class Aozora2Html

# accent特殊文字を生かすための再帰呼び出し
class AccentParser < Aozora2Html

def initialize(input, endchar, chuuki, image)
if not(input.is_a?(Jstream))
raise ArgumentError, "tag_parser must supply Jstream as input"
end
@stream = input
@buffer = []
@ruby_buf = Aozora2Html::RubyBuffer.new
@chuuki_table = chuuki
@images = image # globalな環境を記録するアイテムは共有する必要あり
@endchar = endchar # 改行は越えられない <br />を出力していられない
@closed = nil # 改行での強制撤退チェックフラグ
@encount_accent = nil
end

def general_output # 出力は配列で返す
@ruby_buf.dump(@buffer)
if !@encount_accent
@buffer.unshift("〔".encode("shift_jis"))
end
if @closed and !@encount_accent
@buffer.push("〕".encode("shift_jis"))
elsif not(@closed)
@buffer.push("<br />\r\n")
end
@buffer
end

def parse
first = read_char
if found = Aozora2Html::ACCENT_TABLE[first]
if found2 = found[@stream.peek_char(0)]
if found2.is_a?(Hash)
if found3 = found2[@stream.peek_char(1)]
first = Aozora2Html::Tag::Accent.new(self, *found3)
@encount_accent = true
@chuuki_table[:accent] = true
read_char
read_char
end
elsif found2
first = Aozora2Html::Tag::Accent.new(self, *found2)
@encount_accent = true
read_char
@chuuki_table[:accent] = true
end
end
end
case first
when Aozora2Html::GAIJI_MARK
first = dispatch_gaiji
when "[".encode("shift_jis")
first = dispatch_aozora_command
when Aozora2Html::KU
assign_kunoji
when "《".encode("shift_jis")
first = apply_ruby
end
if first == "\r\n"
if @encount_accent
puts "警告(#{line_number}行目):アクセント分解の亀甲括弧の始めと終わりが、行中で揃っていません".encode("shift_jis")
end
throw :terminate
elsif first == "〕".encode("shift_jis")
@closed = true
throw :terminate
elsif first == RUBY_PREFIX
@ruby_buf.dump(@buffer)
@ruby_buf.protected = true
elsif first != "" and first != nil
illegal_char_check(first, line_number)
push_chars(first)
end
end

def process
catch(:terminate) do
loop do
parse
end
end
general_output
end
end
end
16 changes: 16 additions & 0 deletions lib/aozora2html/error.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require "aozora2html/i18n"

# 例外class
class Aozora2Html
class Error < StandardError

def initialize(message)
@message = message
end

def message(line)
I18n.t(:error_stop, line, @message)
end
end
end

139 changes: 139 additions & 0 deletions lib/aozora2html/header.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# encoding: utf-8
class Aozora2Html
class Header
def initialize()
@header = []
end

def push(line)
@header.push(line)
end

def out_header_info(hash, attr, true_name = nil)
found = hash[attr]
if found
"<h2 class=\"#{true_name or attr}\">#{found}</h2>\r\n"
else
""
end
end

def header_element_type(string)
original = true
string.each_char do |x|
code = x.unpack("H*")[0]
if ("00" <= code and code <= "7f") or # 1byte
("8140" <= code and code <= "8258") or # 1-1, 3-25
("839f" <= code and code <= "8491") # 6-1, 7-81
# continue
else
original = false
break
end
end
if original
:original
elsif string.match(PAT_EDITOR)
:editor
elsif string.match(PAT_HENYAKU)
:henyaku
elsif string.match(PAT_TRANSLATOR)
:translator
end
end

def process_person(string, header_info)
type = header_element_type(string)
case type
when :editor
header_info[:editor] = string
when :translator
header_info[:translator] = string
when :henyaku
header_info[:henyaku] = string
else
type = :author
header_info[:author] = string
end
type
end

def build_title(header_info)
buf = [:author, :translator, :editor, :henyaku,
:title, :original_title,
:subtitle, :original_subtitle].map{|item| header_info[item]}.compact
buf_str = buf.join(" ")
"<title>#{buf_str}</title>"
end

def build_header_info
header_info = {:title => @header[0]}
case @header.length
when 2
process_person(@header[1], header_info)
when 3
if header_element_type(@header[1]) == :original
header_info[:original_title] = @header[1]
process_person(@header[2], header_info)
elsif process_person(@header[2], header_info) == :author
header_info[:subtitle] = @header[1]
else
header_info[:author] = @header[1]
end
when 4
if header_element_type(@header[1]) == :original
header_info[:original_title] = @header[1]
else
header_info[:subtitle] = @header[1]
end
if process_person(@header[3], header_info) == :author
header_info[:subtitle] = @header[2]
else
header_info[:author] = @header[2]
end
when 5
header_info[:original_title] = @header[1]
header_info[:subtitle] = @header[2]
header_info[:author] = @header[3]
if process_person(@header[4], header_info) == :author
raise Aozora2Html::Error.new("parser encounted author twice")
end
when 6
header_info[:original_title] = @header[1]
header_info[:subtitle] = @header[2]
header_info[:original_subtitle] = @header[3]
header_info[:author] = @header[4]
if process_person(@header[5], header_info) == :author
raise Aozora2Html::Error.new("parser encounted author twice")
end
end
header_info
end

def to_html
header_info = build_header_info()

# <title> 行を構築
html_title = build_title(header_info)

# 出力
out_buf = []
out_buf.push("<?xml version=\"1.0\" encoding=\"Shift_JIS\"?>\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\r\n \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"ja\" >\r\n<head>\r\n <meta http-equiv=\"Content-Type\" content=\"text/html;charset=Shift_JIS\" />\r\n <meta http-equiv=\"content-style-type\" content=\"text/css\" />\r\n")
$css_files.each do |css|
out_buf.push("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"" + css + "\" />\r\n")
end
out_buf.push("\t#{html_title}\r\n <script type=\"text/javascript\" src=\"../../jquery-1.4.2.min.js\"></script>\r\n <link rel=\"Schema.DC\" href=\"http://purl.org/dc/elements/1.1/\" />\r\n <meta name=\"DC.Title\" content=\"#{header_info[:title]}\" />\r\n <meta name=\"DC.Creator\" content=\"#{header_info[:author]}\" />\r\n <meta name=\"DC.Publisher\" content=\"#{AOZORABUNKO}\" />\r\n</head>\r\n<body>\r\n<div class=\"metadata\">\r\n")
out_buf.push("<h1 class=\"title\">#{header_info[:title]}</h1>\r\n" +
out_header_info(header_info, :original_title) +
out_header_info(header_info, :subtitle) +
out_header_info(header_info, :original_subtitle) +
out_header_info(header_info, :author) +
out_header_info(header_info, :editor) +
out_header_info(header_info, :translator) +
out_header_info(header_info, :henyaku, "editor-translator"))
out_buf.push("<br />\r\n<br />\r\n</div>\r\n<div id=\"contents\" style=\"display:none\"></div><div class=\"main_text\">")
out_buf.join("")
end

end
end
16 changes: 16 additions & 0 deletions lib/aozora2html/i18n.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# encoding: utf-8
class Aozora2Html
class I18n
MSG = {
:tag_syntax_error => "注記を重ねる際の原則、「狭い範囲を先に、広い範囲を後に」が守られていません。リンク先の指針を参考に、書き方をあらためてください",
:undefined_header => "未定義な見出しです",
:use_crlf => "改行コードを、「CR+LF」にあらためてください",
:error_stop => "エラー(%d行目):%s. \r\n処理を停止します",
:invalid_font_size => "文字サイズの指定が不正です"
}

def self.t(msg, *args)
(MSG[msg].encode("shift_jis") % args)
end
end
end
Loading

0 comments on commit b7765e9

Please sign in to comment.