From 4c33cfb1f19310fb9ac4c15e0a64ab33aeb6aa23 Mon Sep 17 00:00:00 2001 From: Simon Hengel Date: Mon, 25 Sep 2023 13:39:19 +0700 Subject: [PATCH] Remove code duplication Use `unsafeDrop` and friends instead of explicitly constructing values with `Data.ByteString.Internal.BS`. - All those primitives are marked with `INLINE`. - This does not change the generated core. --- Changelog.md | 6 ++++ Data/ByteString.hs | 58 +++++++++++++++++--------------- Data/ByteString/Char8.hs | 12 +++---- Data/ByteString/Lazy.hs | 4 +-- Data/ByteString/Lazy/Internal.hs | 2 +- Data/ByteString/Lazy/ReadInt.hs | 11 +++--- Data/ByteString/Lazy/ReadNat.hs | 11 +++--- Data/ByteString/Unsafe.hs | 17 ++++++++++ 8 files changed, 74 insertions(+), 47 deletions(-) diff --git a/Changelog.md b/Changelog.md index 69e4fbb24..9c506da89 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,9 @@ +[0.12.1.0]: + +* API additions: + * New primitives `Data.ByteString.Unsafe.unsafeTakeEnd` and + `Data.ByteString.Unsafe.unsafeDropEnd` + [0.12.0.2] — August 2023 * Bug fixes: diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 1e39ae67f..ce4c94fb4 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -423,19 +423,19 @@ head (BS x l) -- -- This is a partial function, consider using 'uncons' instead. tail :: HasCallStack => ByteString -> ByteString -tail (BS p l) - | l <= 0 = errorEmptyList "tail" - | otherwise = BS (plusForeignPtr p 1) (l-1) +tail ps + | length ps <= 0 = errorEmptyList "tail" + | otherwise = unsafeDrop 1 ps {-# INLINE tail #-} -- | /O(1)/ Extract the 'head' and 'tail' of a ByteString, returning 'Nothing' -- if it is empty. uncons :: ByteString -> Maybe (Word8, ByteString) -uncons (BS x l) +uncons ps@(BS x l) | l <= 0 = Nothing | otherwise = Just (accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peek p, - BS (plusForeignPtr x 1) (l-1)) + unsafeDrop 1 ps) {-# INLINE uncons #-} -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. @@ -454,17 +454,17 @@ last ps@(BS x l) -- -- This is a partial function, consider using 'unsnoc' instead. init :: HasCallStack => ByteString -> ByteString -init ps@(BS p l) +init ps | null ps = errorEmptyList "init" - | otherwise = BS p (l-1) + | otherwise = unsafeDropEnd 1 ps {-# INLINE init #-} -- | /O(1)/ Extract the 'init' and 'last' of a ByteString, returning 'Nothing' -- if it is empty. unsnoc :: ByteString -> Maybe (ByteString, Word8) -unsnoc (BS x l) +unsnoc ps@(BS x l) | l <= 0 = Nothing - | otherwise = Just (BS x (l-1), + | otherwise = Just (unsafeDropEnd 1 ps, accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)) {-# INLINE unsnoc #-} @@ -921,10 +921,10 @@ unfoldrN i f x0 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. take :: Int -> ByteString -> ByteString -take n ps@(BS x l) +take n ps@(BS _ l) | n <= 0 = empty | n >= l = ps - | otherwise = BS x n + | otherwise = unsafeTake n ps {-# INLINE take #-} -- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. @@ -939,19 +939,19 @@ take n ps@(BS x l) -- -- @since 0.11.1.0 takeEnd :: Int -> ByteString -> ByteString -takeEnd n ps@(BS x len) +takeEnd n ps@(BS _ len) | n >= len = ps | n <= 0 = empty - | otherwise = BS (plusForeignPtr x (len - n)) n + | otherwise = unsafeTakeEnd n ps {-# INLINE takeEnd #-} -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ -- elements, or 'empty' if @n > 'length' xs@. drop :: Int -> ByteString -> ByteString -drop n ps@(BS x l) +drop n ps@(BS _ l) | n <= 0 = ps | n >= l = empty - | otherwise = BS (plusForeignPtr x n) (l-n) + | otherwise = unsafeDrop n ps {-# INLINE drop #-} -- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. @@ -966,18 +966,18 @@ drop n ps@(BS x l) -- -- @since 0.11.1.0 dropEnd :: Int -> ByteString -> ByteString -dropEnd n ps@(BS x len) +dropEnd n ps@(BS _ len) | n <= 0 = ps | n >= len = empty - | otherwise = BS x (len - n) + | otherwise = unsafeDropEnd n ps {-# INLINE dropEnd #-} -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. splitAt :: Int -> ByteString -> (ByteString, ByteString) -splitAt n ps@(BS x l) +splitAt n ps@(BS _ l) | n <= 0 = (empty, ps) | n >= l = (ps, empty) - | otherwise = (BS x n, BS (plusForeignPtr x n) (l-n)) + | otherwise = (unsafeTake n ps, unsafeDrop n ps) {-# INLINE splitAt #-} -- | Similar to 'Prelude.takeWhile', @@ -1151,18 +1151,17 @@ splitWith _ (BS _ 0) = [] splitWith predicate (BS fp len) = splitWith0 0 len fp where splitWith0 !off' !len' !fp' = accursedUnutterablePerformIO $ - splitLoop fp 0 off' len' fp' + splitLoop 0 off' len' fp' - splitLoop :: ForeignPtr Word8 - -> Int -> Int -> Int + splitLoop :: Int -> Int -> Int -> ForeignPtr Word8 -> IO [ByteString] - splitLoop p idx2 off' len' fp' = go idx2 + splitLoop idx2 off' len' fp' = go idx2 where go idx' | idx' >= len' = return [BS (plusForeignPtr fp' off') idx'] | otherwise = do - w <- peekFpByteOff p (off'+idx') + w <- peekFpByteOff fp (off'+idx') if predicate w then return (BS (plusForeignPtr fp' off') idx' : splitWith0 (off'+idx'+1) (len'-idx'-1) fp') @@ -1188,19 +1187,22 @@ splitWith predicate (BS fp len) = splitWith0 0 len fp -- split :: Word8 -> ByteString -> [ByteString] split _ (BS _ 0) = [] -split w (BS x l) = loop 0 +split w ps@(BS x l) = loop 0 where loop !n = let q = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> memchr (p `plusPtr` n) w (fromIntegral (l-n)) in if q == nullPtr - then [BS (plusForeignPtr x n) (l-n)] + then [unsafeDrop n ps] else let i = q `minusPtr` unsafeForeignPtrToPtr x - in BS (plusForeignPtr x n) (i-n) : loop (i+1) + in unsafeSlice n i ps : loop (i+1) {-# INLINE split #-} +unsafeSlice :: Int -> Int -> ByteString -> ByteString +unsafeSlice a b (BS x _) = BS (plusForeignPtr x a) (b - a) +{-# INLINE unsafeSlice #-} -- | The 'group' function takes a ByteString and returns a list of -- ByteStrings such that the concatenation of the result is equal to the @@ -1716,7 +1718,7 @@ inits bs = NE.toList $! initsNE bs -- @since 0.11.4.0 initsNE :: ByteString -> NonEmpty ByteString -- see Note [Avoid NonEmpty combinators] -initsNE (BS x len) = empty :| [BS x n | n <- [1..len]] +initsNE ps = empty :| [unsafeTake n ps | n <- [1..length ps]] -- | /O(n)/ Returns all final segments of the given 'ByteString', longest first. tails :: ByteString -> [ByteString] diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 1d4d9accd..94deb8742 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -874,12 +874,12 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) -- > break isSpace == breakSpace -- breakSpace :: ByteString -> (ByteString,ByteString) -breakSpace (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do +breakSpace ps@(BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do i <- firstspace p 0 l return $! case () of {_ - | i == 0 -> (empty, BS x l) - | i == l -> (BS x l, empty) - | otherwise -> (BS x i, BS (plusForeignPtr x i) (l-i)) + | i == 0 -> (empty, ps) + | i == l -> (ps, empty) + | otherwise -> B.splitAt i ps } {-# INLINE breakSpace #-} @@ -897,9 +897,9 @@ firstspace !ptr !n !m -- -- @since 0.10.12.0 dropSpace :: ByteString -> ByteString -dropSpace (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do +dropSpace ps@(BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do i <- firstnonspace p 0 l - return $! if i == l then empty else BS (plusForeignPtr x i) (l-i) + return $! if i == l then empty else B.unsafeDrop i ps {-# INLINE dropSpace #-} firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index ca34f1aa1..1e4acae8c 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -1462,8 +1462,8 @@ initsNE = (Empty :|) . inits' id inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString] -- inits' f bs === map f (tail (inits bs)) inits' _ Empty = [] - inits' f (Chunk c@(S.BS x len) cs) - = [f (S.BS x n `Chunk` Empty) | n <- [1..len]] + inits' f (Chunk c cs) + = [f (S.unsafeTake n c `Chunk` Empty) | n <- [1..S.length c]] ++ inits' (f . Chunk c) cs -- | /O(n)/ Returns all final segments of the given 'ByteString', longest first. diff --git a/Data/ByteString/Lazy/Internal.hs b/Data/ByteString/Lazy/Internal.hs index 9b3cd76ac..22ec13d6a 100644 --- a/Data/ByteString/Lazy/Internal.hs +++ b/Data/ByteString/Lazy/Internal.hs @@ -331,7 +331,7 @@ toStrict = \cs -> goLen0 cs cs -- closures which would result in unnecessary closure allocation. where -- It's still possible that the result is empty - goLen0 _ Empty = S.BS S.nullForeignPtr 0 + goLen0 _ Empty = S.empty goLen0 cs0 (Chunk c cs) = goLen1 cs0 c cs -- It's still possible that the result is a single chunk diff --git a/Data/ByteString/Lazy/ReadInt.hs b/Data/ByteString/Lazy/ReadInt.hs index 234bfe9fe..036721a0d 100644 --- a/Data/ByteString/Lazy/ReadInt.hs +++ b/Data/ByteString/Lazy/ReadInt.hs @@ -27,6 +27,7 @@ module Data.ByteString.Lazy.ReadInt ) where import qualified Data.ByteString.Internal as BI +import Data.ByteString.Unsafe #ifdef BYTESTRING_STRICT import Data.ByteString #else @@ -34,7 +35,7 @@ import Data.ByteString.Lazy import Data.ByteString.Lazy.Internal #endif import Data.Bits (FiniteBits, isSigned) -import Data.ByteString.Internal (pattern BS, plusForeignPtr) +import Data.ByteString.Internal (pattern BS) import Data.Int import Data.Word import Foreign.ForeignPtr (ForeignPtr) @@ -177,18 +178,18 @@ _readDecimal !r = consume where consume :: ByteString -> Word64 -> Maybe (a, ByteString) #ifdef BYTESTRING_STRICT - consume (BS fp len) a = case _digits q r fp len a of + consume ps@(BS fp len) a = case _digits q r fp len a of Result used acc | used == len -> convert acc empty | otherwise - -> convert acc $ BS (fp `plusForeignPtr` used) (len - used) + -> convert acc $ unsafeDrop used ps _ -> Nothing #else -- All done consume Empty acc = convert acc Empty -- Process next chunk - consume (Chunk (BS fp len) cs) acc + consume (Chunk ps@(BS fp len) cs) acc = case _digits q r fp len acc of Result used acc' | used == len @@ -197,7 +198,7 @@ _readDecimal !r = consume | otherwise -- ran into a non-digit -> convert acc' $ - Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs + Chunk (unsafeDrop used ps) cs _ -> Nothing #endif convert :: Word64 -> ByteString -> Maybe (a, ByteString) diff --git a/Data/ByteString/Lazy/ReadNat.hs b/Data/ByteString/Lazy/ReadNat.hs index 3a6030fcc..b45951467 100644 --- a/Data/ByteString/Lazy/ReadNat.hs +++ b/Data/ByteString/Lazy/ReadNat.hs @@ -21,6 +21,7 @@ module Data.ByteString.Lazy.ReadNat ) where import qualified Data.ByteString.Internal as BI +import Data.ByteString.Unsafe #ifdef BYTESTRING_STRICT import Data.ByteString #else @@ -28,7 +29,7 @@ import Data.ByteString.Lazy import Data.ByteString.Lazy.Internal #endif import Data.Bits (finiteBitSize) -import Data.ByteString.Internal (pattern BS, plusForeignPtr) +import Data.ByteString.Internal (pattern BS) import Data.Word import Foreign.ForeignPtr (ForeignPtr) import Foreign.Ptr (Ptr, minusPtr, plusPtr) @@ -127,7 +128,7 @@ _readDecimal = consume :: [Natural] -> Int -> Word -> ByteString -> (Natural, ByteString) #ifdef BYTESTRING_STRICT - consume ns cnt acc (BS fp len) = + consume ns cnt acc ps@(BS fp len) = -- Having read one digit, we're about to read the 2nd -- So the digit count up to 'safeLog' starts at 2. case natdigits fp len acc cnt ns of @@ -136,18 +137,18 @@ _readDecimal = -> convert acc' cnt' ns' $ empty | otherwise -> convert acc' cnt' ns' $ - BS (fp `plusForeignPtr` used) (len - used) + unsafeDrop used ps #else -- All done consume ns cnt acc Empty = convert acc cnt ns Empty -- Process next chunk - consume ns cnt acc (Chunk (BS fp len) cs) + consume ns cnt acc (Chunk ps@(BS fp len) cs) = case natdigits fp len acc cnt ns of Result used acc' cnt' ns' | used == len -- process more chunks -> consume ns' cnt' acc' cs | otherwise -- ran into a non-digit - -> let c = Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs + -> let c = Chunk (unsafeDrop used ps) cs in convert acc' cnt' ns' c #endif convert !acc !cnt !ns rest = diff --git a/Data/ByteString/Unsafe.hs b/Data/ByteString/Unsafe.hs index f470ab77a..4b57f9cea 100644 --- a/Data/ByteString/Unsafe.hs +++ b/Data/ByteString/Unsafe.hs @@ -25,7 +25,9 @@ module Data.ByteString.Unsafe ( unsafeLast, unsafeIndex, unsafeTake, + unsafeTakeEnd, unsafeDrop, + unsafeDropEnd, -- * Low level interaction with CStrings -- ** Using ByteStrings with functions for CStrings @@ -113,12 +115,27 @@ unsafeTake :: Int -> ByteString -> ByteString unsafeTake n (BS x l) = assert (0 <= n && n <= l) $ BS x n {-# INLINE unsafeTake #-} +-- | A variety of 'takeEnd' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +-- +-- @since 0.12.1.0 +unsafeTakeEnd :: Int -> ByteString -> ByteString +unsafeTakeEnd n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x (l-n)) n +{-# INLINE unsafeTakeEnd #-} + -- | A variety of 'drop' which omits the checks on @n@ so there is an -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. unsafeDrop :: Int -> ByteString -> ByteString unsafeDrop n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x n) (l-n) {-# INLINE unsafeDrop #-} +-- | A variety of 'dropEnd' which omits the checks on @n@ so there is an +-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. +-- +-- @since 0.12.1.0 +unsafeDropEnd :: Int -> ByteString -> ByteString +unsafeDropEnd n (BS x l) = assert (0 <= n && n <= l) $ BS x (l-n) +{-# INLINE unsafeDropEnd #-} -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of -- 'ByteString's, which is ideal for string literals. It packs a sequence