diff --git a/Frameworks/Foundation/JSAttributedString.js b/Frameworks/Foundation/JSAttributedString.js index b423226f..40a0e96e 100644 --- a/Frameworks/Foundation/JSAttributedString.js +++ b/Frameworks/Foundation/JSAttributedString.js @@ -18,6 +18,7 @@ // #import "JSTextAttachment.js" // #import "JSFont.js" // #import "JSColor.js" +// #import "JSHTMLTextParser.js" 'use strict'; (function(){ @@ -197,6 +198,37 @@ JSClass("JSAttributedString", JSObject, { this.initWithAttachment(attachment); }, + initWithHTML: function(html, options){ + if (options === undefined){ + options = {}; + } + var string = ""; + var run = JSAttributedStringRun(JSRange.Zero, {}); + var runs = [run]; + if (html !== null && html !== undefined){ + var parser = JSHTMLTextParser.initWithHTML(html); + parser.delegate = { + htmlTextParserDidFindAttributedText: function(parser, text, attributes){ + if (run.range.length === 0){ + run.attributes = JSCopy(attributes); + run.range.length += text.length; + }else if (run.onlyContainsEqualAttributes(attributes)){ + run.range.length += text.length; + }else{ + run = JSAttributedStringRun(JSRange(run.range.end, text.length), attributes); + runs.push(run); + } + string += text; + } + }; + parser.state.preserveWhitespace = options.preserveWhitespace === true; + parser.state.baseURL = options.baseURL || null; + parser.parse(); + } + this._string = string; + this._runs = runs; + }, + // MARK: - Getting the unattributed string value getString: function(){ diff --git a/Frameworks/Foundation/JSHTMLTextParser.js b/Frameworks/Foundation/JSHTMLTextParser.js new file mode 100644 index 00000000..1810e6fd --- /dev/null +++ b/Frameworks/Foundation/JSHTMLTextParser.js @@ -0,0 +1,584 @@ +// #import "JSObject.js" +// #import "JSProtocol.js" +// #import "JSFont.js" +"use strict"; + +JSProtocol("JSHTMLTextParserDelegate", JSProtocol, { + + htmlTextParserDidFindAttributedText: function(parser, text, attributes){ + }, + +}); + +(function(){ + +JSClass("JSHTMLTextParser", JSObject, { + + html: null, + offset: null, + length: null, + delegate: null, + + initWithHTML: function(html){ + this.html = html; + this.offset = 0; + this.length = this.html.length; + this.state = Object.create(JSHTMLTextParser.State); + this.state.textAttributes = {}; + this.stack = [this.state]; + }, + + parse: function(){ + var html = this.html; + var i = this.offset; + var l = this.length; + if (html === null || html === undefined){ + return; + } + var elementName; + var entityName; + var attrName; + var attrValue; + var quote; + var elementAttributes; + var selfClosing = false; + while (i < l){ + if (this.state.ignoringUntilEndTag){ + if (html[i] === "<"){ + ++i; + if (i < l && html[i] === "/"){ + ++i; + if (html.substr(i, this.state.elementName.length).toLowerCase() === this.state.elementName){ + i += this.state.elementName.length; + if (i < l && html[i] === ">"){ + ++i; + this.popState(); + } + } + } + }else{ + ++i; + } + }else if (html[i] === "<"){ + ++i; + if (i < l){ + if (isAlphabetic(html[i])){ + elementName = ""; + while (i < l && html[i] !== "/" && html[i] !== ">" && !isWhitespace(html[i])){ + if ((html[i] >= "A" && html[i] <= "Z")){ + elementName += html[i].toLowerCase(); + }else{ + elementName += html[i]; + } + ++i; + } + elementAttributes = {}; + selfClosing = isVoidElement(elementName); + if (i < l && html[i] === "/"){ + ++i; + } + while (i < l && html[i] !== ">"){ + while (i < l && isWhitespace(html[i])){ + ++i; + } + attrName = ""; + attrValue = ""; + while (i < l && html[i] !== "=" && html[i] !== "/" && html[i] !== ">" && !isWhitespace(html[i])){ + attrName += html[i]; + ++i; + } + while (i < l && isWhitespace(html[i])){ + ++i; + } + if (i < l && html[i] === "="){ + ++i; + if (i < l && html[i] !== ">"){ + if (html[i] === "\"" || html[i] === "'"){ + quote = html[i]; + ++i; + while (i < l && html[i] !== quote){ + if (html[i] === "&"){ + entityName = ""; + while (i < l && (isEntityCharacter(html[i]))){ + entityName += html[i]; + ++i; + } + if (i < l && html[i] === ";"){ + ++i; + attrValue += decodedEntity(entityName); + }else{ + attrValue += "&" + entityName; + } + }else{ + attrValue += html[i]; + ++i; + } + } + ++i; + }else{ + while (i < l && html[i] !== "/" && html[i] !== ">" && !isWhitespace(html[i])){ + attrValue += html[i]; + ++i; + } + } + } + } + if (i < l && html[i] === "/"){ + ++i; + } + elementAttributes[attrName] = attrValue; + } + ++i; + this.beginElement(elementName, elementAttributes, selfClosing); + }else if (html[i] === "/"){ + ++i; + elementName = ""; + if (isAlphabetic(html[i])){ + while (i < l && html[i] !== "/" && html[i] !== ">" && !isWhitespace(html[i])){ + if ((html[i] >= "A" && html[i] <= "Z")){ + elementName += html[i].toLowerCase(); + }else{ + elementName += html[i]; + } + ++i; + } + } + while (i < l && html[i] !== ">"){ + ++i; + } + ++i; + this.endElement(elementName); + }else if (html[i] === "!" || html[i] === "?"){ + // valid comments or bogus markup...skipping + //