-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcontent.go
88 lines (73 loc) · 2 KB
/
content.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package telegraph
// Copy from https://github.com/toby3d/telegraph/blob/master/content.go
// and Modifiy something.
import (
"bytes"
"errors"
"io"
"strings"
"golang.org/x/net/html"
)
// 将字符串HTML转换为HTML Node数据结构
func contentFormat(data interface{}) (n []Node, err error) {
var dst *html.Node
switch src := data.(type) {
case string:
dst, err = html.Parse(strings.NewReader(src))
case []byte:
dst, err = html.Parse(bytes.NewReader(src))
case io.Reader:
dst, err = html.Parse(src)
default:
return nil, errors.New("invalid data type")
}
if err != nil {
return nil, err
}
switch node := domToNode(dst.FirstChild).(type) {
case *NodeElement:
// 在返回的时候,因为多了几层无效的结构(对应html-head/body),所以这里直接读取body下的children,然后返回。
switch bodyChild := node.Children[1].(type) {
case *NodeElement:
n = bodyChild.Children
}
case nil:
n = append(n, &NodeElement{
Tag: "p",
Children: []Node{"没有内容"},
})
case string:
n = append(n, &NodeElement{
Tag: "p",
Children: []Node{node},
})
}
return n, nil
}
// 递归解析DOM,返回Node数据
func domToNode(domNode *html.Node) interface{} {
if domNode.Type == html.TextNode {
return domNode.Data
}
if domNode.Type != html.ElementNode {
return nil
}
nodeElement := new(NodeElement)
switch strings.ToLower(domNode.Data) {
case "a", "aside", "b", "blockquote", "br", "code", "em", "figcaption", "figure", "h3", "h4", "hr", "i",
"iframe", "img", "li", "ol", "p", "pre", "s", "strong", "u", "ul", "video":
nodeElement.Tag = domNode.Data
for i := range domNode.Attr {
switch strings.ToLower(domNode.Attr[i].Key) {
case "href", "src":
nodeElement.Attrs = map[string]string{domNode.Attr[i].Key: domNode.Attr[i].Val}
default:
continue
}
}
}
for child := domNode.FirstChild; child != nil; child = child.NextSibling {
nodeElement.Children = append(nodeElement.Children, domToNode(child))
}
return nodeElement
}