Skip to content

Commit

Permalink
refactor(tools/string): speed up strip (whitespace) (#13168)
Browse files Browse the repository at this point in the history
### Summary

With simple microbenchmark:
```lua
ngx.update_time()
local s = ngx.now()
for i = 1, 100000 do
    local a = strip("           \t \ndogestr   \f\t\r ")
end
ngx.update_time()
local e = ngx.now()

print("took: ", (e * 1000) - (s * 1000), " ms")
```

I get these results:
Current: `took: 57 ms`
     PR: `took: 7 ms`

Signed-off-by: Aapo Talvensaari <aapo.talvensaari@gmail.com>
(cherry picked from commit 582d5ac)
  • Loading branch information
bungle committed Jun 12, 2024
1 parent f25a663 commit d08029e
Showing 1 changed file with 57 additions and 15 deletions.
72 changes: 57 additions & 15 deletions kong/tools/string.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,20 @@
local pl_stringx = require "pl.stringx"


local type = type
local ipairs = ipairs
local tostring = tostring
local lower = string.lower
local fmt = string.format
local find = string.find
local gsub = string.gsub
local type = type
local ipairs = ipairs
local tostring = tostring
local lower = string.lower
local sub = string.sub
local fmt = string.format
local find = string.find
local gsub = string.gsub
local byte = string.byte


local SPACE_BYTE = byte(" ")
local TAB_BYTE = byte("\t")
local CR_BYTE = byte("\r")


local _M = {}
Expand All @@ -31,16 +38,52 @@ _M.split = pl_stringx.split

--- strips whitespace from a string.
-- @function strip
_M.strip = function(str)
if str == nil then
_M.strip = function(value)
if value == nil then
return ""
end
str = tostring(str)
if #str > 200 then
return str:gsub("^%s+", ""):reverse():gsub("^%s+", ""):reverse()
else
return str:match("^%s*(.-)%s*$")

-- TODO: do we want to operate on non-string values (kept for backward compatibility)?
if type(value) ~= "string" then
value = tostring(value) or ""
end

if value == "" then
return ""
end

local len = #value
local s = 1 -- position of the leftmost non-whitespace char
for i = 1, len do
local b = byte(value, i)
if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then
s = s + 1
else
break
end
end

if s > len then
return ""
end

local e = len -- position of the rightmost non-whitespace char
if s < e then
for i = e, 1, -1 do
local b = byte(value, i)
if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then
e = e - 1
else
break
end
end
end

if s ~= 1 or e ~= len then
value = sub(value, s, e)
end

return value
end


Expand Down Expand Up @@ -187,4 +230,3 @@ _M.replace_dashes_lower = replace_dashes_lower


return _M

0 comments on commit d08029e

Please sign in to comment.