From bc8862ca7c94ad7704fd134d7208223814825acc Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 8 Apr 2015 17:38:13 +0300 Subject: [PATCH] Fix #45, bump version. --- CHANGELOG.md | 3 + lib/{tree_construction => common}/doctype.js | 243 ++++++++++--------- lib/serialization/serializer.js | 19 +- lib/tree_adapters/htmlparser2.js | 16 +- lib/tree_construction/parser.js | 2 +- package.json | 2 +- test/fixtures/parser_test.js | 9 + 7 files changed, 154 insertions(+), 140 deletions(-) rename lib/{tree_construction => common}/doctype.js (90%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c042497..a563e29c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 1.4.2 + * Fix: htmlparser2 tree adapter `DocumentType.data` property rendering (GH [#45](https://github.com/inikulin/parse5/issues/45)). + ## 1.4.1 * Fix: Location info handling for the implicitly generated `` and `` elements (GH [#44](https://github.com/inikulin/parse5/issues/44)). diff --git a/lib/tree_construction/doctype.js b/lib/common/doctype.js similarity index 90% rename from lib/tree_construction/doctype.js rename to lib/common/doctype.js index 413c05a6a..6cf3c0ac4 100644 --- a/lib/tree_construction/doctype.js +++ b/lib/common/doctype.js @@ -1,109 +1,134 @@ -'use strict'; - -//Const -var VALID_DOCTYPE_NAME = 'html', - QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', - QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ - "+//silmaril//dtd html pro v0r11 19970101//en", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en", - "-//as//dtd html 3.0 aswedit + extensions//en", - "-//ietf//dtd html 2.0 level 1//en", - "-//ietf//dtd html 2.0 level 2//en", - "-//ietf//dtd html 2.0 strict level 1//en", - "-//ietf//dtd html 2.0 strict level 2//en", - "-//ietf//dtd html 2.0 strict//en", - "-//ietf//dtd html 2.0//en", - "-//ietf//dtd html 2.1e//en", - "-//ietf//dtd html 3.0//en", - "-//ietf//dtd html 3.0//en//", - "-//ietf//dtd html 3.2 final//en", - "-//ietf//dtd html 3.2//en", - "-//ietf//dtd html 3//en", - "-//ietf//dtd html level 0//en", - "-//ietf//dtd html level 0//en//2.0", - "-//ietf//dtd html level 1//en", - "-//ietf//dtd html level 1//en//2.0", - "-//ietf//dtd html level 2//en", - "-//ietf//dtd html level 2//en//2.0", - "-//ietf//dtd html level 3//en", - "-//ietf//dtd html level 3//en//3.0", - "-//ietf//dtd html strict level 0//en", - "-//ietf//dtd html strict level 0//en//2.0", - "-//ietf//dtd html strict level 1//en", - "-//ietf//dtd html strict level 1//en//2.0", - "-//ietf//dtd html strict level 2//en", - "-//ietf//dtd html strict level 2//en//2.0", - "-//ietf//dtd html strict level 3//en", - "-//ietf//dtd html strict level 3//en//3.0", - "-//ietf//dtd html strict//en", - "-//ietf//dtd html strict//en//2.0", - "-//ietf//dtd html strict//en//3.0", - "-//ietf//dtd html//en", - "-//ietf//dtd html//en//2.0", - "-//ietf//dtd html//en//3.0", - "-//metrius//dtd metrius presentational//en", - "-//microsoft//dtd internet explorer 2.0 html strict//en", - "-//microsoft//dtd internet explorer 2.0 html//en", - "-//microsoft//dtd internet explorer 2.0 tables//en", - "-//microsoft//dtd internet explorer 3.0 html strict//en", - "-//microsoft//dtd internet explorer 3.0 html//en", - "-//microsoft//dtd internet explorer 3.0 tables//en", - "-//netscape comm. corp.//dtd html//en", - "-//netscape comm. corp.//dtd strict html//en", - "-//o'reilly and associates//dtd html 2.0//en", - "-//o'reilly and associates//dtd html extended 1.0//en", - "-//spyglass//dtd html 2.0 extended//en", - "-//sq//dtd html 2.0 hotmetal + extensions//en", - "-//sun microsystems corp.//dtd hotjava html//en", - "-//sun microsystems corp.//dtd hotjava strict html//en", - "-//w3c//dtd html 3 1995-03-24//en", - "-//w3c//dtd html 3.2 draft//en", - "-//w3c//dtd html 3.2 final//en", - "-//w3c//dtd html 3.2//en", - "-//w3c//dtd html 3.2s draft//en", - "-//w3c//dtd html 4.0 frameset//en", - "-//w3c//dtd html 4.0 transitional//en", - "-//w3c//dtd html experimental 19960712//en", - "-//w3c//dtd html experimental 970421//en", - "-//w3c//dtd w3 html//en", - "-//w3o//dtd w3 html 3.0//en", - "-//w3o//dtd w3 html 3.0//en//", - "-//webtechs//dtd mozilla html 2.0//en", - "-//webtechs//dtd mozilla html//en" - ], - QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ - '-//w3c//dtd html 4.01 frameset//', - '-//w3c//dtd html 4.01 transitional//' - ], - QUIRKS_MODE_PUBLIC_IDS = [ - '-//w3o//dtd w3 html strict 3.0//en//', - '-/w3c/dtd html 4.0 transitional/en', - 'html' - ]; - -exports.isQuirks = function (name, publicId, systemId) { - if (name !== VALID_DOCTYPE_NAME) - return true; - - if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) - return true; - - if (publicId !== null) { - publicId = publicId.toLowerCase(); - - if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) - return true; - - var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES; - - if (systemId === null) - prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES); - - for (var i = 0; i < prefixes.length; i++) { - if (publicId.indexOf(prefixes[i]) === 0) - return true; - } - } - - return false; -}; +'use strict'; + +//Const +var VALID_DOCTYPE_NAME = 'html', + QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', + QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ + "+//silmaril//dtd html pro v0r11 19970101//en", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en", + "-//as//dtd html 3.0 aswedit + extensions//en", + "-//ietf//dtd html 2.0 level 1//en", + "-//ietf//dtd html 2.0 level 2//en", + "-//ietf//dtd html 2.0 strict level 1//en", + "-//ietf//dtd html 2.0 strict level 2//en", + "-//ietf//dtd html 2.0 strict//en", + "-//ietf//dtd html 2.0//en", + "-//ietf//dtd html 2.1e//en", + "-//ietf//dtd html 3.0//en", + "-//ietf//dtd html 3.0//en//", + "-//ietf//dtd html 3.2 final//en", + "-//ietf//dtd html 3.2//en", + "-//ietf//dtd html 3//en", + "-//ietf//dtd html level 0//en", + "-//ietf//dtd html level 0//en//2.0", + "-//ietf//dtd html level 1//en", + "-//ietf//dtd html level 1//en//2.0", + "-//ietf//dtd html level 2//en", + "-//ietf//dtd html level 2//en//2.0", + "-//ietf//dtd html level 3//en", + "-//ietf//dtd html level 3//en//3.0", + "-//ietf//dtd html strict level 0//en", + "-//ietf//dtd html strict level 0//en//2.0", + "-//ietf//dtd html strict level 1//en", + "-//ietf//dtd html strict level 1//en//2.0", + "-//ietf//dtd html strict level 2//en", + "-//ietf//dtd html strict level 2//en//2.0", + "-//ietf//dtd html strict level 3//en", + "-//ietf//dtd html strict level 3//en//3.0", + "-//ietf//dtd html strict//en", + "-//ietf//dtd html strict//en//2.0", + "-//ietf//dtd html strict//en//3.0", + "-//ietf//dtd html//en", + "-//ietf//dtd html//en//2.0", + "-//ietf//dtd html//en//3.0", + "-//metrius//dtd metrius presentational//en", + "-//microsoft//dtd internet explorer 2.0 html strict//en", + "-//microsoft//dtd internet explorer 2.0 html//en", + "-//microsoft//dtd internet explorer 2.0 tables//en", + "-//microsoft//dtd internet explorer 3.0 html strict//en", + "-//microsoft//dtd internet explorer 3.0 html//en", + "-//microsoft//dtd internet explorer 3.0 tables//en", + "-//netscape comm. corp.//dtd html//en", + "-//netscape comm. corp.//dtd strict html//en", + "-//o'reilly and associates//dtd html 2.0//en", + "-//o'reilly and associates//dtd html extended 1.0//en", + "-//spyglass//dtd html 2.0 extended//en", + "-//sq//dtd html 2.0 hotmetal + extensions//en", + "-//sun microsystems corp.//dtd hotjava html//en", + "-//sun microsystems corp.//dtd hotjava strict html//en", + "-//w3c//dtd html 3 1995-03-24//en", + "-//w3c//dtd html 3.2 draft//en", + "-//w3c//dtd html 3.2 final//en", + "-//w3c//dtd html 3.2//en", + "-//w3c//dtd html 3.2s draft//en", + "-//w3c//dtd html 4.0 frameset//en", + "-//w3c//dtd html 4.0 transitional//en", + "-//w3c//dtd html experimental 19960712//en", + "-//w3c//dtd html experimental 970421//en", + "-//w3c//dtd w3 html//en", + "-//w3o//dtd w3 html 3.0//en", + "-//w3o//dtd w3 html 3.0//en//", + "-//webtechs//dtd mozilla html 2.0//en", + "-//webtechs//dtd mozilla html//en" + ], + QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ + '-//w3c//dtd html 4.01 frameset//', + '-//w3c//dtd html 4.01 transitional//' + ], + QUIRKS_MODE_PUBLIC_IDS = [ + '-//w3o//dtd w3 html strict 3.0//en//', + '-/w3c/dtd html 4.0 transitional/en', + 'html' + ]; + + +//Utils +function enquoteDoctypeId(id) { + var quote = id.indexOf('"') !== -1 ? '\'' : '"'; + + return quote + id + quote; +} + + +//API +exports.isQuirks = function (name, publicId, systemId) { + if (name !== VALID_DOCTYPE_NAME) + return true; + + if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) + return true; + + if (publicId !== null) { + publicId = publicId.toLowerCase(); + + if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) + return true; + + var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES; + + if (systemId === null) + prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES); + + for (var i = 0; i < prefixes.length; i++) { + if (publicId.indexOf(prefixes[i]) === 0) + return true; + } + } + + return false; +}; + +exports.serializeContent = function (name, publicId, systemId) { + var str = '!DOCTYPE ' + name; + + if (publicId !== null) + str += ' PUBLIC ' + enquoteDoctypeId(publicId); + + else if (systemId !== null) + str += ' SYSTEM'; + + if (systemId !== null) + str += ' ' + enquoteDoctypeId(systemId); + + return str; +}; diff --git a/lib/serialization/serializer.js b/lib/serialization/serializer.js index ddf278b26..e80822e74 100644 --- a/lib/serialization/serializer.js +++ b/lib/serialization/serializer.js @@ -1,6 +1,7 @@ 'use strict'; var DefaultTreeAdapter = require('../tree_adapters/default'), + Doctype = require('../common/doctype'), Utils = require('../common/utils'), HTML = require('../common/html'); @@ -40,11 +41,7 @@ function escapeString(str, attrMode) { //Enquote doctype ID -function enquoteDoctypeId(id) { - var quote = id.indexOf('"') !== -1 ? '\'' : '"'; - return quote + id + quote; -} //Serializer @@ -178,17 +175,5 @@ Serializer.prototype._serializeDocumentTypeNode = function (node) { publicId = this.treeAdapter.getDocumentTypeNodePublicId(node), systemId = this.treeAdapter.getDocumentTypeNodeSystemId(node); - this.html += ''; + this.html += '<' + Doctype.serializeContent(name, publicId, systemId) + '>'; }; diff --git a/lib/tree_adapters/htmlparser2.js b/lib/tree_adapters/htmlparser2.js index ff7c2de0f..ab10ee6bc 100644 --- a/lib/tree_adapters/htmlparser2.js +++ b/lib/tree_adapters/htmlparser2.js @@ -1,5 +1,7 @@ 'use strict'; +var Doctype = require('../common/doctype'); + //Conversion tables for DOM Level1 structure emulation var nodeTypes = { element: 1, @@ -119,18 +121,8 @@ var createTextNode = function (value) { //Tree mutation exports.setDocumentType = function (document, name, publicId, systemId) { - var data = '!DOCTYPE'; - - if (name) - data += ' ' + name; - - if (publicId) - data += ' PUBLIC "' + publicId + '"'; - - if (systemId) - data += ' "' + systemId + '"'; - - var doctypeNode = null; + var data = Doctype.serializeContent(name, publicId, systemId), + doctypeNode = null; for (var i = 0; i < document.children.length; i++) { if (document.children[i].type === 'directive' && document.children[i].name === '!doctype') { diff --git a/lib/tree_construction/parser.js b/lib/tree_construction/parser.js index 9c964b78f..d9c404829 100644 --- a/lib/tree_construction/parser.js +++ b/lib/tree_construction/parser.js @@ -4,8 +4,8 @@ var Tokenizer = require('../tokenization/tokenizer'), OpenElementStack = require('./open_element_stack'), FormattingElementList = require('./formatting_element_list'), LocationInfoMixin = require('./location_info_mixin'), - Doctype = require('./doctype'), DefaultTreeAdapter = require('../tree_adapters/default'), + Doctype = require('../common/doctype'), ForeignContent = require('../common/foreign_content'), Utils = require('../common/utils'), UNICODE = require('../common/unicode'), diff --git a/package.json b/package.json index 23b437d1b..4e98ee65b 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "parse5", "description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node and io.js.", - "version": "1.4.1", + "version": "1.4.2", "author": "Ivan Nikulin (https://github.com/inikulin)", "contributors": [ "Sebastian Mayr (http://blog.smayr.name)", diff --git a/test/fixtures/parser_test.js b/test/fixtures/parser_test.js index 459a3c6c3..ed4506445 100644 --- a/test/fixtures/parser_test.js +++ b/test/fixtures/parser_test.js @@ -101,3 +101,12 @@ TestUtils.generateTestsForEachTreeAdapter(module.exports, function (_test, treeA }); +exports['Regression - HTML5 Legacy Doctype Misparsed with htmlparser2 tree adapter (GH-45)'] = function () { + var html = 'Hi there!', + parser = new Parser(parse5.TreeAdapters.htmlparser2), + document = parser.parse(html); + + assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"'); +}; + +