From ce71109b4d0ab8a876ffd99c4e4dda9918fdf849 Mon Sep 17 00:00:00 2001 From: guonaihong Date: Tue, 2 Feb 2021 19:23:45 +0800 Subject: [PATCH] bugfix #1 #2 --- README.md | 2 +- parser.go | 72 +++++++++++++------- parser_issue_fix_test.go | 140 +++++++++++++++++++++++++++++++++++++++ parser_req_test.go | 120 +++++++++++++++++++++++++++++++++ setting.go | 12 +++- 5 files changed, 319 insertions(+), 27 deletions(-) create mode 100644 parser_issue_fix_test.go diff --git a/README.md b/README.md index ba12aac..776abcb 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ make example.run ``` ### return value * err != nil 错误 -* sucess== len(data) 所有数据成功解析 +* sucess == len(data) 所有数据成功解析 * sucess < len(data) 只解析部分数据,未解析的数据需再送一次 ### 吞吐量 diff --git a/parser.go b/parser.go index 7373785..4c2b865 100644 --- a/parser.go +++ b/parser.go @@ -91,14 +91,14 @@ func (p *Parser) Init(t ReqOrRsp) { // 响应行 // https://tools.ietf.org/html/rfc7230#section-3.1.2 状态行 // status-line = HTTP-version SP status-code SP reason-phrase CRLF -// 注意: -// 调用必须保证status-line的数据包是完整的,不需要担心读不全status-line的情况基本不会发生 -// (mtu 大约是1530左右,而status-line不会超过1个mtu)。 // 请求行 // https://tools.ietf.org/html/rfc7230#section-3.1.1 // method SP request-target SP HTTP-version CRLF +// 设计思路修改 +// 为了适应流量解析的场景,状态机的状态会更碎一点 + func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) { currState := p.currState @@ -107,9 +107,11 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) i := 0 c := byte(0) + for ; i < len(buf); i++ { c = buf[i] - next: + + reExec: switch currState { case startReqOrRsp: if c == 'H' { @@ -120,11 +122,12 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) continue } currState = startReq - goto next + fallthrough case startReq: if token[c] == 0 { return 0, ErrReqMethod } + currState = reqMethod if setting.MessageBegin != nil { setting.MessageBegin() @@ -157,9 +160,9 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) case reqURLAfterSP: if c != ' ' && c != '\t' { - currState = reqHTTP + currState = reqHTTPVersion } - case reqHTTP: + case reqHTTPVersion: if c == '\r' { currState = reqRequestLineAlomstDone } @@ -218,7 +221,7 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) } currState = rspStatus - goto next + goto reExec case rspStatus: start := i @@ -234,6 +237,7 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) return 0, ErrRspStatusLine } + //TODO单独状态 switch { case buf[end] == '\r' && buf[end+1] == '\n': i = end + 1 @@ -258,6 +262,8 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) if int32(len(buf[i:])) > p.maxHeaderSize { return 0, ErrHeaderOverflow } + + p.currState = headerField return i, nil } @@ -286,13 +292,11 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) currState = headerValueDiscardWs case headerValueDiscardWs: // 只跳过一个' ' or '\t' + currState = headerValue if c == ' ' || c == '\t' { - currState = headerValue continue } - currState = headerValue - // 解析http value case headerValue: end := bytes.IndexAny(buf[i:], "\r\n") @@ -300,6 +304,7 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) if int32(len(buf[i:])) > p.maxHeaderSize { return 0, ErrHeaderOverflow } + p.currState = headerValueDiscardWs return i, nil } @@ -326,15 +331,28 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) p.hasTransferEncoding = true } - // TODO 这里的\r\n 可以单独拎一个状态出来 i += end - switch { - case buf[i] == '\r' && buf[i+1] == '\n': - i++ - case buf[i] == '\r' || buf[i] == '\n': + + c = buf[i] + currState = headerValueStartOWS + // 进入header value的OWS + fallthrough + case headerValueStartOWS: + if c == '\r' { + currState = headerValueOWS + continue } + // 不是'\r'的情况,继续往下判断 + fallthrough + case headerValueOWS: currState = headerField + if c == '\n' { + continue + } + + // 不是'\n'也许是headerField的数据 + goto reExec case headerDone: if c != '\n' { @@ -447,12 +465,15 @@ func (p *Parser) Execute(setting *Setting, buf []byte) (success int, err error) currState = chunkedDataDone case chunkedDataDone: currState = chunkedSizeStart - //case messageAlmostDone: - // currState = messageDone - //case messageDone: - // if setting.MessageComplete != nil { - // setting.MessageComplete() - // } + case messageDone: + // 规范的chunked包是以\r\n结尾的 + if c == '\r' || c == '\n' { + continue + } + + currState = startReqOrRsp + p.Reset() + goto reExec } } @@ -467,7 +488,7 @@ func (p *Parser) SetMaxHeaderSize(size int32) { } func (p *Parser) Reset() { - //p.currState = + p.currState = startReqOrRsp p.headerCurrState = hGeneral p.major = 0 p.minor = 0 @@ -478,6 +499,11 @@ func (p *Parser) Reset() { p.hasTransferEncoding = false } +// debug专用 +func (p *Parser) Status() string { + return stateTab[p.currState] +} + func (p *Parser) Eof() bool { return p.currState == messageDone } diff --git a/parser_issue_fix_test.go b/parser_issue_fix_test.go new file mode 100644 index 0000000..6345938 --- /dev/null +++ b/parser_issue_fix_test.go @@ -0,0 +1,140 @@ +package httparser + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_Isuse1(t *testing.T) { + var data = []byte( + "POST /joyent/http-parser HTTP/1.1\r\n" + + "Host: github.com\r\n" + + "DNT: 1\r\n" + + "Accept-Encoding: gzip, deflate, sdch\r\n" + + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/39.0.2171.65 Safari/537.36\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + + "image/webp,*/*;q=0.8\r\n" + + "Referer: https://github.com/joyent/http-parser\r\n" + + "Connection: keep-alive\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n" + + + "POST /joyent/http-parser HTTP/1.1\r\n" + + "Host: github.com\r\n" + + "DNT: 1\r\n" + + "Accept-Encoding: gzip, deflate, sdch\r\n" + + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/39.0.2171.65 Safari/537.36\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + + "image/webp,*/*;q=0.8\r\n" + + "Referer: https://github.com/joyent/http-parser\r\n" + + "Connection: keep-alive\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n") + + body := []byte{} + var setting = Setting{ + MessageBegin: func() { + fmt.Println("---- begin") + }, + URL: func(buf []byte) { + }, + Status: func([]byte) { + // 响应包才需要用到 + }, + HeaderField: func(buf []byte) { + }, + HeaderValue: func(buf []byte) { + }, + HeadersComplete: func() { + }, + Body: func(buf []byte) { + body = append(body, buf...) + }, + MessageComplete: func() { + }, + } + + p := New(REQUEST) + fmt.Printf("req_len=%d\n", len(data)/2) + // 一个POST 518,一共两个POST,第一次解析600字节,第二次解析剩余的 + data1, data2 := data[:600], data[600:] + _, err := p.Execute(&setting, data1) + if err != nil { + panic(err.Error()) + } + + _, err = p.Execute(&setting, data2) + if err != nil { + panic(err.Error()) + } + + assert.Equal(t, body, []byte("hello worldhello world")) + p.Reset() + +} + +func Test_Issue2(t *testing.T) { + + var data = []byte( + "POST /joyent/http-parser HTTP/1.1\r\n" + + "Host: github.com\r\n" + + "DNT: 1\r\n" + + "Accept-Encoding: gzip, deflate, sdch\r\n" + + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/39.0.2171.65 Safari/537.36\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + + "image/webp,*/*;q=0.8\r\n" + + "Referer: https://github.com/joyent/http-parser\r\n" + + "Connection: keep-alive\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n") + + var body []byte + var setting = Setting{ + MessageBegin: func() { + }, + URL: func(buf []byte) { + }, + Status: func([]byte) { + }, + HeaderField: func(buf []byte) { + }, + HeaderValue: func(buf []byte) { + }, + HeadersComplete: func() { + }, + Body: func(buf []byte) { + body = append(body, buf...) + }, + MessageComplete: func() { + fmt.Println("---- complete") + }, + } + + p := New(REQUEST) + fmt.Printf("req_len=%d\n", len(data)) + // 一个POST 518,一共两个POST,第一次解析600字节,第二次解析剩余的 + data1, data2 := data[:300], data[300:] + sucess, err := p.Execute(&setting, data1) + if err != nil { + panic(err.Error()) + } + + sucess, err = p.Execute(&setting, append(data1[sucess:], data2...)) + if err != nil { + panic(err.Error()) + } + + p.Reset() + +} diff --git a/parser_req_test.go b/parser_req_test.go index 0c99b73..da6f024 100644 --- a/parser_req_test.go +++ b/parser_req_test.go @@ -221,3 +221,123 @@ func Test_ParserResponse_RequestBody2(t *testing.T) { //fmt.Printf("##:%s", stateTab[p.currState]) } + +//https://github.com/antlabs/httparser/issues/1 +func Test_ParserRequest_chunked_segment(t *testing.T) { + var data = []byte( + "POST /joyent/http-parser HTTP/1.1\r\n" + + "Host: github.com\r\n" + + "DNT: 1\r\n" + + "Accept-Encoding: gzip, deflate, sdch\r\n" + + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/39.0.2171.65 Safari/537.36\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + + "image/webp,*/*;q=0.8\r\n" + + "Referer: https://github.com/joyent/http-parser\r\n" + + "Connection: keep-alive\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n" + + "POST /joyent/http-parser HTTP/1.1\r\n" + + "Host: github.com\r\n" + + "DNT: 1\r\n" + + "Accept-Encoding: gzip, deflate, sdch\r\n" + + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/39.0.2171.65 Safari/537.36\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + + "image/webp,*/*;q=0.8\r\n" + + "Referer: https://github.com/joyent/http-parser\r\n" + + "Connection: keep-alive\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n") + + var body []byte + var setting = Setting{ + MessageBegin: func() { + //fmt.Println("---- begin") + }, + URL: func(buf []byte) { + }, + Status: func([]byte) { + // 响应包才需要用到 + }, + HeaderField: func(buf []byte) { + }, + HeaderValue: func(buf []byte) { + }, + HeadersComplete: func() { + }, + Body: func(buf []byte) { + //fmt.Printf("###:%s\n", buf) + body = append(body, buf...) + }, + MessageComplete: func() { + //fmt.Println("---- complete") + }, + } + + p := New(REQUEST) + + for size := 120; size < 2*len(data); size++ { + + // 双缓冲buffer + // 左边放溢出的,右边放本次读入数据, 这么设计可以减少内存拷贝 + doubleBuffer := make([]byte, size*2) + + left := size + body = []byte{} + totalSentBuf := []byte{} //存放送入Execute的总数据 + + for leftSize := 0; leftSize < len(data); leftSize += size { + //计算最小右边 边际 + right := min(int32(leftSize+size), int32(len(data))) + + //拷贝右边数据 + n := copy(doubleBuffer[size:], data[leftSize:right]) //模拟从异步io里面填充一块buffer + + // 把溢出数据包含进来 + // 左边放需要重新解析数据,右边放新塞的buffer + currSentData := doubleBuffer[left : size+n] + + //解析 + success, err := p.Execute(&setting, currSentData) + if err != nil { + //panic(err.Error() + fmt.Sprintf(" size:%d", size)) + } + + if success != len(currSentData) { + // 测试用, 把送入解析器的buffer累加起来,最后验证下数据送得对不对 + totalSentBuf = append(totalSentBuf, currSentData[:success]...) + + left = size - (len(currSentData) - success) + n = copy(doubleBuffer[left:], currSentData[success:]) + if n >= size { + panic("abnormal") + } + + } else { + // 测试用 + totalSentBuf = append(totalSentBuf, currSentData...) + + left = size + } + + } + + b := assert.Equal(t, string(data), string(totalSentBuf)) + if !b { + return + } + + b = assert.Equal(t, body, []byte("hello worldhello world")) + if !b { + return + } + } + + p.Reset() + +} diff --git a/setting.go b/setting.go index 4a16a89..7bc9521 100644 --- a/setting.go +++ b/setting.go @@ -41,7 +41,7 @@ const ( // 请求URL后面的SP reqURLAfterSP // - reqHTTP + reqHTTPVersion // request-line \r的位置 reqRequestLineAlomstDone // response状态 @@ -65,6 +65,10 @@ const ( headerValueDiscardWs // 进入http value headerValue + // 刚开始进入http value后面的OWS + headerValueStartOWS + // 快要离开http value后面的OWS + headerValueOWS // 进入http body httpBody // 开始进入到chunked 数字解析 @@ -94,8 +98,8 @@ var stateTab = []string{ reqMethodAfterSP: "reqMethodAfterSP", reqURL: "reqURL", reqURLAfterSP: "reqURLAfterSP", - reqHTTP: "reqHTTP", - reqRequestLineAlomstDone: "reqRequestLineAlomstDon", + reqHTTPVersion: "reqHTTPVersion", + reqRequestLineAlomstDone: "reqRequestLineAlomstDone", startRsp: "startRsp", rspHTTP: "rspHTTP", rspHTTPVersionNum: "rspHTTPVersionNum", @@ -106,6 +110,8 @@ var stateTab = []string{ headerField: "headerField", headerValueDiscardWs: "headerValueDiscardWs", headerValue: "headerValue", + headerValueStartOWS: "headerValueStartOWS", + headerValueOWS: "headerValueOWS", httpBody: "httpBody", chunkedSizeStart: "chunkedSizeStart", chunkedSize: "chunkedSize",