From 50b305711d1a4401975843a88cd6315ea2d1f9df Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sat, 13 Jan 2024 22:34:40 +0900 Subject: [PATCH] Add Source#match? which returns only true/false instead of match result/nil. [Why] To suppress `@scanner.captures` calls when the result of `source.match` is not used. --- lib/rexml/parsers/baseparser.rb | 44 ++++++++++++++++----------------- lib/rexml/source.rb | 32 ++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 8cbde7c3..721059de 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -223,13 +223,13 @@ def pull_event return process_instruction when DOCTYPE_START base_error_message = "Malformed DOCTYPE" - @source.match(DOCTYPE_START, true) + @source.match?(DOCTYPE_START, true) @nsstack.unshift(curr_ns=Set.new) name = parse_name(base_error_message) - if @source.match(/\A\s*\[/um, true) + if @source.match?(/\A\s*\[/um, true) id = [nil, nil, nil] @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) + elsif @source.match?(/\A\s*>/um, true) id = [nil, nil, nil] @document_status = :after_doctype else @@ -240,9 +240,9 @@ def pull_event # For backward compatibility id[1], id[2] = id[2], nil end - if @source.match(/\A\s*\[/um, true) + if @source.match?(/\A\s*\[/um, true) @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) + elsif @source.match?(/\A\s*>/um, true) @document_status = :after_doctype else message = "#{base_error_message}: garbage after external ID" @@ -251,7 +251,7 @@ def pull_event end args = [:start_doctype, name, *id] if @document_status == :after_doctype - @source.match(/\A\s*/um, true) + @source.match?(/\A\s*/um, true) @stack << [ :end_doctype ] end return args @@ -322,8 +322,8 @@ def pull_event return [ :attlistdecl, element, pairs, contents ] when NOTATIONDECL_START base_error_message = "Malformed notation declaration" - unless @source.match(/\A\s*/um) + unless @source.match?(/\A\s*/um) message = "#{base_error_message}: name is missing" else message = "#{base_error_message}: invalid declaration name" @@ -334,19 +334,19 @@ def pull_event id = parse_id(base_error_message, accept_external_id: true, accept_public_id: true) - unless @source.match(/\A\s*>/um, true) + unless @source.match?(/\A\s*>/um, true) message = "#{base_error_message}: garbage before end >" raise REXML::ParseException.new(message, @source) end return [:notationdecl, name, *id] when DOCTYPE_END @document_status = :after_doctype - @source.match( DOCTYPE_END, true ) + @source.match?( DOCTYPE_END, true ) return [ :end_doctype ] end end if @document_status == :after_doctype - @source.match(/\A\s*/um, true) + @source.match?(/\A\s*/um, true) end begin @source.read if @source.buffer.size<2 @@ -417,7 +417,7 @@ def pull_event md = @source.match( TEXT_PATTERN, true ) text = md[1] if md[0].length == 0 - @source.match( /(\s+)/, true ) + @source.match?( /(\s+)/, true ) end return [ :text, text ] end @@ -499,7 +499,7 @@ def need_source_encoding_update?(xml_declaration_encoding) def parse_name(base_error_message) md = @source.match(/\A\s*#{NAME}/um, true) unless md - if @source.match(/\A\s*\S/um) + if @source.match?(/\A\s*\S/um) message = "#{base_error_message}: invalid name" else message = "#{base_error_message}: name is missing" @@ -541,34 +541,34 @@ def parse_id_invalid_details(accept_external_id:, accept_public_id:) public = /\A\s*PUBLIC/um system = /\A\s*SYSTEM/um - if (accept_external_id or accept_public_id) and @source.match(/#{public}/um) - if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um) + if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um) return "public ID literal is missing" end - unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um) + unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um) return "invalid public ID literal" end if accept_public_id - if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) return "system ID literal is missing" end - unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else "garbage after public ID literal" end - elsif accept_external_id and @source.match(/#{system}/um) - if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + elsif accept_external_id and @source.match?(/#{system}/um) + if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um) return "system literal is missing" end - unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) + unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um) return "invalid system literal" end "garbage after system literal" else - unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um) return "invalid ID type" end "ID type is missing" diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 474b2aae..0c2ee539 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -77,6 +77,16 @@ def match(pattern, cons=false) @scanner.matched? ? [@scanner.matched, *@scanner.captures] : nil end + def match?(pattern, cons=false) + if cons + @scanner.scan(pattern) + else + @scanner.check(pattern) + end + + @scanner.matched? + end + # @return true if the Source is exhausted def empty? @scanner.eos? @@ -178,6 +188,28 @@ def match( pattern, cons=false ) @scanner.matched? ? [@scanner.matched, *@scanner.captures] : nil end + def match?( pattern, cons=false ) + if cons + @scanner.scan(pattern) + else + @scanner.check(pattern) + end + while !@scanner.matched? and @source + begin + @scanner << readline + if cons + @scanner.scan(pattern) + else + @scanner.check(pattern) + end + rescue + @source = nil + end + end + + @scanner.matched? + end + def empty? super and ( @source.nil? || @source.eof? ) end