From df9cf99300b889d71df77a7783fb039da81daed7 Mon Sep 17 00:00:00 2001 From: Todd Sundsted Date: Sun, 23 Nov 2025 07:21:00 -0500 Subject: [PATCH 1/2] Free parser context after use. --- src/xml.cr | 32 ++++++++++++++++++++++++-------- src/xml/libxml2.cr | 2 ++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/xml.cr b/src/xml.cr index de8f15aa21ed..cb0d904a2571 100644 --- a/src/xml.cr +++ b/src/xml.cr @@ -57,8 +57,12 @@ module XML def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Document raise XML::Error.new("Document is empty", 0) if string.empty? ctxt = LibXML.xmlNewParserCtxt - from_ptr(ctxt) do - LibXML.xmlCtxtReadMemory(ctxt, string, string.bytesize, nil, nil, options) + begin + from_ptr(ctxt) do + LibXML.xmlCtxtReadMemory(ctxt, string, string.bytesize, nil, nil, options) + end + ensure + LibXML.xmlFreeParserCtxt(ctxt) end end @@ -67,8 +71,12 @@ module XML # See `ParserOptions.default` for default options. def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Document ctxt = LibXML.xmlNewParserCtxt - from_ptr(ctxt) do - LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options) + begin + from_ptr(ctxt) do + LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options) + end + ensure + LibXML.xmlFreeParserCtxt(ctxt) end end @@ -78,8 +86,12 @@ module XML def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Document raise XML::Error.new("Document is empty", 0) if string.empty? ctxt = LibXML.htmlNewParserCtxt - from_ptr(ctxt) do - LibXML.htmlCtxtReadMemory(ctxt, string, string.bytesize, nil, "utf-8", options) + begin + from_ptr(ctxt) do + LibXML.htmlCtxtReadMemory(ctxt, string, string.bytesize, nil, "utf-8", options) + end + ensure + LibXML.htmlFreeParserCtxt(ctxt) end end @@ -88,8 +100,12 @@ module XML # See `HTMLParserOptions.default` for default options. def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Document ctxt = LibXML.htmlNewParserCtxt - from_ptr(ctxt) do - LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options) + begin + from_ptr(ctxt) do + LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options) + end + ensure + LibXML.htmlFreeParserCtxt(ctxt) end end diff --git a/src/xml/libxml2.cr b/src/xml/libxml2.cr index 67bd23984755..b70163ae14d6 100644 --- a/src/xml/libxml2.cr +++ b/src/xml/libxml2.cr @@ -175,10 +175,12 @@ lib LibXML fun xmlNewParserCtxt : ParserCtxt fun xmlCtxtReadIO(ParserCtxt, ioread : InputReadCallback, ioclose : InputCloseCallback, ioctx : Void*, url : UInt8*, encoding : UInt8*, options : XML::ParserOptions) : Doc* fun xmlCtxtReadMemory(ParserCtxt, buffer : UInt8*, size : Int, url : UInt8*, encoding : UInt8*, options : XML::ParserOptions) : Doc* + fun xmlFreeParserCtxt(ctxt : ParserCtxt) fun htmlNewParserCtxt : HTMLParserCtxt fun htmlCtxtReadMemory(HTMLParserCtxt, buffer : UInt8*, size : Int, url : UInt8*, encoding : UInt8*, options : XML::HTMLParserOptions) : Doc* fun htmlCtxtReadIO(HTMLParserCtxt, ioread : InputReadCallback, ioclose : InputCloseCallback, ioctx : Void*, url : UInt8*, encoding : UInt8*, options : XML::HTMLParserOptions) : Doc* + fun htmlFreeParserCtxt(ctxt : HTMLParserCtxt) fun xmlDocGetRootElement(doc : Doc*) : Node* fun xmlXPathNodeSetCreate(node : Node*) : NodeSet* From 36c769a1fd8c1822c345fc4014276f76fedcf249 Mon Sep 17 00:00:00 2001 From: Todd Sundsted Date: Mon, 24 Nov 2025 12:36:24 -0500 Subject: [PATCH 2/2] Refactor parser context management into helpers. --- src/xml.cr | 58 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/xml.cr b/src/xml.cr index cb0d904a2571..e286c94b2034 100644 --- a/src/xml.cr +++ b/src/xml.cr @@ -51,18 +51,35 @@ require "./xml/libxml2" # string # => "\n\n Jane\n Doe\n\n" # ``` module XML + # Wraps a block that uses a XML parser context. Allocates the + # context and ensures the context is freed after use. + private def self.parse_xml(&) + ctxt = LibXML.xmlNewParserCtxt + begin + from_ptr(ctxt) { yield ctxt } + ensure + LibXML.xmlFreeParserCtxt(ctxt) + end + end + + # Wraps a block that uses a HTML parser context. Allocates the + # context and ensures the context is freed after use. + private def self.parse_html(&) + ctxt = LibXML.htmlNewParserCtxt + begin + from_ptr(ctxt) { yield ctxt } + ensure + LibXML.htmlFreeParserCtxt(ctxt) + end + end + # Parses an XML document from *string* with *options* into an `XML::Node`. # # See `ParserOptions.default` for default options. def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Document raise XML::Error.new("Document is empty", 0) if string.empty? - ctxt = LibXML.xmlNewParserCtxt - begin - from_ptr(ctxt) do - LibXML.xmlCtxtReadMemory(ctxt, string, string.bytesize, nil, nil, options) - end - ensure - LibXML.xmlFreeParserCtxt(ctxt) + parse_xml do |ctxt| + LibXML.xmlCtxtReadMemory(ctxt, string, string.bytesize, nil, nil, options) end end @@ -70,13 +87,8 @@ module XML # # See `ParserOptions.default` for default options. def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Document - ctxt = LibXML.xmlNewParserCtxt - begin - from_ptr(ctxt) do - LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options) - end - ensure - LibXML.xmlFreeParserCtxt(ctxt) + parse_xml do |ctxt| + LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options) end end @@ -85,13 +97,8 @@ module XML # See `HTMLParserOptions.default` for default options. def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Document raise XML::Error.new("Document is empty", 0) if string.empty? - ctxt = LibXML.htmlNewParserCtxt - begin - from_ptr(ctxt) do - LibXML.htmlCtxtReadMemory(ctxt, string, string.bytesize, nil, "utf-8", options) - end - ensure - LibXML.htmlFreeParserCtxt(ctxt) + parse_html do |ctxt| + LibXML.htmlCtxtReadMemory(ctxt, string, string.bytesize, nil, "utf-8", options) end end @@ -99,13 +106,8 @@ module XML # # See `HTMLParserOptions.default` for default options. def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Document - ctxt = LibXML.htmlNewParserCtxt - begin - from_ptr(ctxt) do - LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options) - end - ensure - LibXML.htmlFreeParserCtxt(ctxt) + parse_html do |ctxt| + LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options) end end