Skip to content

Commit

Permalink
Fix #45, bump version.
Browse files Browse the repository at this point in the history
  • Loading branch information
inikulin committed Apr 8, 2015
1 parent 362b6a4 commit bc8862c
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 140 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 1.4.2
* Fix: htmlparser2 tree adapter `DocumentType.data` property rendering (GH [#45](https://github.com/inikulin/parse5/issues/45)).

## 1.4.1
* Fix: Location info handling for the implicitly generated `<html>` and `<body>` elements (GH [#44](https://github.com/inikulin/parse5/issues/44)).

Expand Down
243 changes: 134 additions & 109 deletions lib/tree_construction/doctype.js → lib/common/doctype.js
Original file line number Diff line number Diff line change
@@ -1,109 +1,134 @@
'use strict';

//Const
var VALID_DOCTYPE_NAME = 'html',
QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd',
QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
"+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en"
],
QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
'-//w3c//dtd html 4.01 frameset//',
'-//w3c//dtd html 4.01 transitional//'
],
QUIRKS_MODE_PUBLIC_IDS = [
'-//w3o//dtd w3 html strict 3.0//en//',
'-/w3c/dtd html 4.0 transitional/en',
'html'
];

exports.isQuirks = function (name, publicId, systemId) {
if (name !== VALID_DOCTYPE_NAME)
return true;

if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID)
return true;

if (publicId !== null) {
publicId = publicId.toLowerCase();

if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1)
return true;

var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES;

if (systemId === null)
prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES);

for (var i = 0; i < prefixes.length; i++) {
if (publicId.indexOf(prefixes[i]) === 0)
return true;
}
}

return false;
};
'use strict';

//Const
var VALID_DOCTYPE_NAME = 'html',
QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd',
QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
"+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en"
],
QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
'-//w3c//dtd html 4.01 frameset//',
'-//w3c//dtd html 4.01 transitional//'
],
QUIRKS_MODE_PUBLIC_IDS = [
'-//w3o//dtd w3 html strict 3.0//en//',
'-/w3c/dtd html 4.0 transitional/en',
'html'
];


//Utils
function enquoteDoctypeId(id) {
var quote = id.indexOf('"') !== -1 ? '\'' : '"';

return quote + id + quote;
}


//API
exports.isQuirks = function (name, publicId, systemId) {
if (name !== VALID_DOCTYPE_NAME)
return true;

if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID)
return true;

if (publicId !== null) {
publicId = publicId.toLowerCase();

if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1)
return true;

var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES;

if (systemId === null)
prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES);

for (var i = 0; i < prefixes.length; i++) {
if (publicId.indexOf(prefixes[i]) === 0)
return true;
}
}

return false;
};

exports.serializeContent = function (name, publicId, systemId) {
var str = '!DOCTYPE ' + name;

if (publicId !== null)
str += ' PUBLIC ' + enquoteDoctypeId(publicId);

else if (systemId !== null)
str += ' SYSTEM';

if (systemId !== null)
str += ' ' + enquoteDoctypeId(systemId);

return str;
};
19 changes: 2 additions & 17 deletions lib/serialization/serializer.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

var DefaultTreeAdapter = require('../tree_adapters/default'),
Doctype = require('../common/doctype'),
Utils = require('../common/utils'),
HTML = require('../common/html');

Expand Down Expand Up @@ -40,11 +41,7 @@ function escapeString(str, attrMode) {


//Enquote doctype ID
function enquoteDoctypeId(id) {
var quote = id.indexOf('"') !== -1 ? '\'' : '"';

return quote + id + quote;
}


//Serializer
Expand Down Expand Up @@ -178,17 +175,5 @@ Serializer.prototype._serializeDocumentTypeNode = function (node) {
publicId = this.treeAdapter.getDocumentTypeNodePublicId(node),
systemId = this.treeAdapter.getDocumentTypeNodeSystemId(node);

this.html += '<!DOCTYPE ' + name;

if (publicId !== null)
this.html += ' PUBLIC ' + enquoteDoctypeId(publicId);

else if (systemId !== null)
this.html += ' SYSTEM';

if (systemId !== null)
this.html += ' ' + enquoteDoctypeId(systemId);


this.html += '>';
this.html += '<' + Doctype.serializeContent(name, publicId, systemId) + '>';
};
16 changes: 4 additions & 12 deletions lib/tree_adapters/htmlparser2.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict';

var Doctype = require('../common/doctype');

//Conversion tables for DOM Level1 structure emulation
var nodeTypes = {
element: 1,
Expand Down Expand Up @@ -119,18 +121,8 @@ var createTextNode = function (value) {

//Tree mutation
exports.setDocumentType = function (document, name, publicId, systemId) {
var data = '!DOCTYPE';

if (name)
data += ' ' + name;

if (publicId)
data += ' PUBLIC "' + publicId + '"';

if (systemId)
data += ' "' + systemId + '"';

var doctypeNode = null;
var data = Doctype.serializeContent(name, publicId, systemId),
doctypeNode = null;

for (var i = 0; i < document.children.length; i++) {
if (document.children[i].type === 'directive' && document.children[i].name === '!doctype') {
Expand Down
2 changes: 1 addition & 1 deletion lib/tree_construction/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ var Tokenizer = require('../tokenization/tokenizer'),
OpenElementStack = require('./open_element_stack'),
FormattingElementList = require('./formatting_element_list'),
LocationInfoMixin = require('./location_info_mixin'),
Doctype = require('./doctype'),
DefaultTreeAdapter = require('../tree_adapters/default'),
Doctype = require('../common/doctype'),
ForeignContent = require('../common/foreign_content'),
Utils = require('../common/utils'),
UNICODE = require('../common/unicode'),
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "parse5",
"description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node and io.js.",
"version": "1.4.1",
"version": "1.4.2",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
"contributors": [
"Sebastian Mayr <sebmaster16@gmail.com> (http://blog.smayr.name)",
Expand Down
9 changes: 9 additions & 0 deletions test/fixtures/parser_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,12 @@ TestUtils.generateTestsForEachTreeAdapter(module.exports, function (_test, treeA
});


exports['Regression - HTML5 Legacy Doctype Misparsed with htmlparser2 tree adapter (GH-45)'] = function () {
var html = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html><head></head><body>Hi there!</body></html>',
parser = new Parser(parse5.TreeAdapters.htmlparser2),
document = parser.parse(html);

assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"');
};


0 comments on commit bc8862c

Please sign in to comment.