-
Notifications
You must be signed in to change notification settings - Fork 2
/
sanitizer.go
110 lines (90 loc) · 2.72 KB
/
sanitizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package htmlsanitizer
import (
"bytes"
"io"
"net/url"
)
// DefaultURLSanitizer is a default and strict sanitizer.
// It only accepts
// * URL with scheme http or https
// * relative URL, such as abc, abc?xxx=1, abc#123
// * absolute URL, such as /abc, /abc?xxx=1, /abc#123
func DefaultURLSanitizer(rawURL string) (sanitzed string, ok bool) {
u, err := url.Parse(rawURL)
if err != nil {
return
}
if len(u.Opaque) > 0 {
return
}
switch u.Scheme {
case "http", "https", "":
default:
return
}
sanitzed = u.String()
ok = true
return
}
// HTMLSanitizer is a super fast HTML sanitizer for arbitrary HTML content.
// This is a allowlist-based santizer, of which the time complexity is O(n).
type HTMLSanitizer struct {
*AllowList
// URLSanitizer is a func used to sanitize all the URLAttr.
// URLSanitizer returns a sanitzed URL and a bool var indicating
// whether the current attribute is acceptable. If not acceptable,
// the current attribute will be ignored.
// If the func is nil, then DefaultURLSanitizer will be used.
URLSanitizer func(rawURL string) (sanitzed string, ok bool)
}
// NewHTMLSanitizer creates a new HTMLSanitizer with the clone of
// the DefaultAllowList.
func NewHTMLSanitizer() *HTMLSanitizer {
return &HTMLSanitizer{
AllowList: DefaultAllowList.Clone(),
}
}
func (f *HTMLSanitizer) urlSanitizer(rawURL string) (sanitzed string, ok bool) {
if f.URLSanitizer != nil {
return f.URLSanitizer(rawURL)
}
return DefaultURLSanitizer(rawURL)
}
// NewWriter returns a new Writer writing sanitized HTML content to w.
func (f *HTMLSanitizer) NewWriter(w io.Writer) io.Writer {
return &writer{
HTMLSanitizer: f,
w: w,
}
}
// Sanitize the HTML data and return the sanitzed HTML.
func (f *HTMLSanitizer) Sanitize(data []byte) ([]byte, error) {
buf := new(bytes.Buffer)
if _, err := f.NewWriter(buf).Write(data); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// SanitizeString sanitizes the HTML string and return the sanitzed HTML.
func (f *HTMLSanitizer) SanitizeString(data string) (string, error) {
ret, err := f.Sanitize([]byte(data))
var retStr string
if ret != nil {
retStr = string(ret)
}
return retStr, err
}
var defaultHTMLSanitizer = NewHTMLSanitizer()
// NewWriter returns a new Writer, with DefaultAllowList,
// writing sanitized HTML content to w.
func NewWriter(w io.Writer) io.Writer {
return defaultHTMLSanitizer.NewWriter(w)
}
// Sanitize uses the DefaultAllowList to sanitize the HTML data.
func Sanitize(data []byte) ([]byte, error) {
return defaultHTMLSanitizer.Sanitize(data)
}
// SanitizeString uses the DefaultAllowList to sanitize the HTML string.
func SanitizeString(data string) (string, error) {
return defaultHTMLSanitizer.SanitizeString(data)
}