Skip to content

Commit

Permalink
Speed up Data.ByteString.Short.unpack (haskell#526)
Browse files Browse the repository at this point in the history
* Add benchmarks for pack/unpack (ShortByteString)

* Add benchmarks for lazy folds

* Improve performance of pack/unpack/folds in ShortByteString

* Use `GHC.Exts.build (unpackFoldr sbs)` for unpack and fix laziness

* Don't inline 'break' with zero arguments

Co-authored-by: Matthew Craven <clyring@gmail.com>

Co-authored-by: Matthew Craven <clyring@gmail.com>
  • Loading branch information
2 people authored and Bodigrim committed Nov 26, 2022
1 parent 421c561 commit ad1dfbf
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 13 deletions.
53 changes: 42 additions & 11 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
-- Not all architectures are forgiving of unaligned accesses; whitelist ones
-- which are known not to trap (either to the kernel for emulation, or crash).
#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
Expand Down Expand Up @@ -268,7 +267,6 @@ import Prelude

import qualified Data.ByteString.Internal as BS

import qualified Data.Foldable as Foldable
import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
Expand Down Expand Up @@ -339,15 +337,15 @@ instance Read ShortByteString where
instance GHC.Exts.IsList ShortByteString where
type Item ShortByteString = Word8
fromList = packBytes
toList = unpackBytes
toList = unpack

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"
Expand Down Expand Up @@ -546,7 +544,20 @@ pack = packBytes

-- | /O(n)/. Convert a 'ShortByteString' into a list.
unpack :: ShortByteString -> [Word8]
unpack = unpackBytes
unpack sbs = GHC.Exts.build (unpackFoldr sbs)
{-# INLINE unpack #-}

--
-- Have unpack fuse with good list consumers
--
unpackFoldr :: ShortByteString -> (Word8 -> a -> a) -> a -> a
unpackFoldr sbs k z = foldr k z sbs
{-# INLINE [0] unpackFoldr #-}

{-# RULES
"ShortByteString unpack-list" [1] forall bs .
unpackFoldr bs (:) [] = unpackBytes bs
#-}

packChars :: [Char] -> ShortByteString
packChars = \cs -> packLenBytes (List.length cs) (List.map BS.c2w cs)
Expand Down Expand Up @@ -581,6 +592,7 @@ unpackChars sbs = unpackAppendCharsLazy sbs []
unpackBytes :: ShortByteString -> [Word8]
unpackBytes sbs = unpackAppendBytesLazy sbs []


-- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
-- takes just shy of 4k which seems like a reasonable amount.
-- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
Expand Down Expand Up @@ -613,15 +625,15 @@ unpackAppendBytesLazy sbs = go 0 (length sbs)
unpackAppendCharsStrict :: ShortByteString -> Int -> Int -> [Char] -> [Char]
unpackAppendCharsStrict !sbs off len = go (off-1) (off-1 + len)
where
go !sentinal !i !acc
go !sentinal !i acc
| i == sentinal = acc
| otherwise = let !c = indexCharArray (asBA sbs) i
in go sentinal (i-1) (c:acc)

unpackAppendBytesStrict :: ShortByteString -> Int -> Int -> [Word8] -> [Word8]
unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
where
go !sentinal !i !acc
go !sentinal !i acc
| i == sentinal = acc
| otherwise = let !w = indexWord8Array (asBA sbs) i
in go sentinal (i-1) (w:acc)
Expand Down Expand Up @@ -912,13 +924,27 @@ foldl' f v = List.foldl' f v . unpack
--
-- @since 0.11.3.0
foldr :: (Word8 -> a -> a) -> a -> ShortByteString -> a
foldr f v = List.foldr f v . unpack
foldr k v = \sbs ->
let l = length sbs
ba = asBA sbs
w = indexWord8Array ba
go !n | n >= l = v
| otherwise = k (w n) (go (n + 1))
in go 0
{-# INLINE foldr #-}

-- | 'foldr'' is like 'foldr', but strict in the accumulator.
--
-- @since 0.11.3.0
foldr' :: (Word8 -> a -> a) -> a -> ShortByteString -> a
foldr' k v = Foldable.foldr' k v . unpack
foldr' k v = \sbs ->
let l = length sbs
ba = asBA sbs
w = indexWord8Array ba
go !ix !v' | ix < 0 = v'
| otherwise = go (ix - 1) (k (w ix) v')
in go (l - 1) v
{-# INLINE foldr' #-}

-- | 'foldl1' is a variant of 'foldl' that has no starting value
-- argument, and thus must be applied to non-empty 'ShortByteString's.
Expand Down Expand Up @@ -1104,7 +1130,8 @@ breakEnd p = \sbs -> splitAt (findFromEndUntil p sbs) sbs
--
-- @since 0.11.3.0
break :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
break = \p -> \sbs -> case findIndexOrLength p sbs of n -> (take n sbs, drop n sbs)
break p = \sbs -> case findIndexOrLength p sbs of n -> (take n sbs, drop n sbs)
{-# INLINE break #-}

-- | Similar to 'Prelude.span',
-- returns the longest (possibly empty) prefix of elements
Expand Down Expand Up @@ -1297,6 +1324,7 @@ unfoldrN i f = \x0 ->
Just (w, x'') -> do
writeWord8Array mba n' w
go' x'' (n'+1)
{-# INLINE unfoldrN #-}



Expand Down Expand Up @@ -1456,6 +1484,7 @@ filter k = \sbs -> let l = length sbs
go' (br+1) (bw+1)
else
go' (br+1) bw
{-# INLINE filter #-}

-- | /O(n)/ The 'find' function takes a predicate and a ShortByteString,
-- and returns the first element in matching the predicate, or 'Nothing'
Expand All @@ -1468,6 +1497,7 @@ find :: (Word8 -> Bool) -> ShortByteString -> Maybe Word8
find f = \sbs -> case findIndex f sbs of
Just n -> Just (sbs `index` n)
_ -> Nothing
{-# INLINE find #-}

-- | /O(n)/ The 'partition' function takes a predicate a ShortByteString and returns
-- the pair of ShortByteStrings with elements which do and do not satisfy the
Expand Down Expand Up @@ -1549,6 +1579,7 @@ findIndex k = \sbs ->
| k (w n) = Just n
| otherwise = go (n + 1)
in go 0
{-# INLINE findIndex #-}


-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the
Expand Down
19 changes: 17 additions & 2 deletions bench/BenchShort.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

module BenchShort (benchShort) where

import Control.DeepSeq (force)
import Data.Foldable (foldMap)
import Data.Maybe (listToMaybe)
import Data.Monoid
Expand Down Expand Up @@ -107,6 +108,9 @@ w = fromIntegral
hashWord8 :: Word8 -> Word8
hashWord8 = fromIntegral . hashInt . fromIntegral

foldInputs' :: [[Word8]]
foldInputs' = force (S.unpack <$> foldInputs)

foldInputs :: [S.ShortByteString]
foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]

Expand Down Expand Up @@ -188,8 +192,12 @@ benchShort = bgroup "ShortByteString"
]
, bgroup "folds"
[ bgroup "strict"
[ bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
[ bgroup "foldl" $ map (\s -> bench (show $ S.length s) $
nf (S.foldl (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
, bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
, bgroup "foldr" $ map (\s -> bench (show $ S.length s) $
nf (S.foldr (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
, bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $
nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
, bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $
Expand Down Expand Up @@ -231,5 +239,12 @@ benchShort = bgroup "ShortByteString"
, bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong
, bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong
]
, bgroup "ShortByteString conversions" $
[ bgroup "unpack" $ map (\s -> bench (show $ S.length s) $
nf (\x -> S.unpack x) s) foldInputs
, bgroup "pack" $ map (\s -> bench (show $ length s) $
nf S.pack s) foldInputs'
, bench "unpack and get last element" $ nf (\x -> last . S.unpack $ x) absurdlong
, bench "unpack and get first 120 elements" $ nf (\x -> take 120 . S.unpack $ x) absurdlong
]
]

0 comments on commit ad1dfbf

Please sign in to comment.