diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index f0d79d83..aff418d5 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -112,24 +112,24 @@ module System.OsPath.MODULE_NAME.Internal {- HLINT ignore "Use fewer imports" -} import Prelude (Char, Bool(..), Maybe(..), (.), (&&), (<=), not, fst, maybe, (||), (==), ($), otherwise, fmap, mempty, (>=), (/=), (++), snd) +import Data.Bifunctor (first) import Data.Semigroup ((<>)) import qualified Prelude as P -import Data.Maybe(isJust) +import Data.Maybe(fromMaybe, isJust) import qualified Data.List as L #ifndef OS_PATH import Data.String (fromString) import System.Environment(getEnv) -import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, takeWhile, take, all, elem, any, span) +import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, take, all, elem, any, span) import Data.Char(toLower, toUpper, isAsciiLower, isAsciiUpper) -import Data.List(stripPrefix, isSuffixOf, uncons) +import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd) #define CHAR Char #define STRING String #define FILEPATH FilePath #else import Prelude (fromIntegral) import Control.Exception ( SomeException, evaluate, try, displayException ) -import Data.Bifunctor (first) import Control.DeepSeq (force) import GHC.IO (unsafePerformIO) import qualified Data.Char as C @@ -290,13 +290,24 @@ getSearchPath = fmap splitSearchPath (getEnv "PATH") -- > splitExtension "file.txt/boris.ext" == ("file.txt/boris",".ext") -- > splitExtension "file/path.txt.bob.fred" == ("file/path.txt.bob",".fred") -- > splitExtension "file/path.txt/" == ("file/path.txt/","") + +-- A naive implementation would be to use @splitFileName_@ first, +-- then break filename into basename and extension, then recombine dir and basename. +-- This is way too expensive, see @splitFileName_@ comment for discussion. +-- +-- Instead we speculatively split on the extension separator first, then check +-- whether results are well-formed. splitExtension :: FILEPATH -> (STRING, STRING) -splitExtension x = if null nameDot - then (x, mempty) - else (dir <> init nameDot, singleton extSeparator <> ext) - where - (dir,file) = splitFileName_ x - (nameDot,ext) = breakEnd isExtSeparator file +splitExtension x + -- Imagine x = "no-dots", then nameDot = "" + | null nameDot = (x, mempty) + -- Imagine x = "\\shared.with.dots\no-dots" + | isWindows && null (dropDrive nameDot) = (x, mempty) + -- Imagine x = "dir.with.dots/no-dots" + | any isPathSeparator ext = (x, mempty) + | otherwise = (init nameDot, extSeparator `cons` ext) + where + (nameDot, ext) = breakEnd isExtSeparator x -- | Get the extension of a file, returns @\"\"@ for no extension, @.ext@ otherwise. -- @@ -358,7 +369,7 @@ addExtension file xs = case uncons xs of Just (x, _) -> joinDrive a res where res = if isExtSeparator x then b <> xs - else b <> singleton extSeparator <> xs + else b <> (extSeparator `cons` xs) (a,b) = splitDrive file @@ -383,7 +394,7 @@ isExtensionOf :: STRING -> FILEPATH -> Bool isExtensionOf ext = \fp -> case uncons ext of Just (x, _) | x == _period -> isSuffixOf ext . takeExtensions $ fp - _ -> isSuffixOf (singleton _period <> ext) . takeExtensions $ fp + _ -> isSuffixOf (_period `cons` ext) . takeExtensions $ fp -- | Drop the given extension from a FILEPATH, and the @\".\"@ preceding it. -- Returns 'Nothing' if the FILEPATH does not have the given extension, or @@ -403,7 +414,7 @@ isExtensionOf ext = \fp -> case uncons ext of -- > stripExtension "" x == Just x stripExtension :: STRING -> FILEPATH -> Maybe FILEPATH stripExtension ext path = case uncons ext of - Just (x, _) -> let dotExt = if isExtSeparator x then ext else singleton _period <> ext + Just (x, _) -> let dotExt = if isExtSeparator x then ext else _period `cons` ext in stripSuffix dotExt path Nothing -> Just path @@ -506,19 +517,21 @@ readDriveUNC bs = case unpack bs of {- c:\ -} readDriveLetter :: STRING -> Maybe (FILEPATH, FILEPATH) -readDriveLetter bs = case unpack bs of - (x:c:y:xs) - | c == _colon && isLetter x && isPathSeparator y -> Just $ addSlash (pack [x,_colon]) (pack (y:xs)) - (x:c:xs) - | c == _colon && isLetter x -> Just (pack [x,_colon], pack xs) - _ -> Nothing +readDriveLetter bs = case uncons2 bs of + Nothing -> Nothing + Just (x, c, ys) + | isLetter x, c == _colon -> Just $ case uncons ys of + Just (y, _) + | isPathSeparator y -> addSlash (pack [x,_colon]) ys + _ -> (pack [x,_colon], ys) + | otherwise -> Nothing {- \\sharename\ -} readDriveShare :: STRING -> Maybe (FILEPATH, FILEPATH) readDriveShare bs = case unpack bs of (s1:s2:xs) | isPathSeparator s1 && isPathSeparator s2 -> let (a, b) = readDriveShareName (pack xs) - in Just (singleton s1 <> singleton s2 <> a,b) + in Just (s1 `cons` (s2 `cons` a), b) _ -> Nothing {- assume you have already seen \\ -} @@ -594,7 +607,7 @@ splitFileName x = if null path else (path, file) where (path, file) = splitFileName_ x - dotSlash = singleton _period <> singleton _slash + dotSlash = _period `cons` singleton _slash -- version of splitFileName where, if the FILEPATH has no directory -- component, the returned directory is "" rather than "./". This @@ -602,11 +615,45 @@ splitFileName x = if null path -- directory to make a valid FILEPATH, and having a "./" appear would -- look strange and upset simple equality properties. See -- e.g. replaceFileName. +-- +-- A naive implementation is +-- +-- splitFileName_ fp = (drv <> dir, file) +-- where +-- (drv, pth) = splitDrive fp +-- (dir, file) = breakEnd isPathSeparator pth +-- +-- but it is undesirable for two reasons: +-- * splitDrive is very slow on Windows, +-- * we unconditionally allocate 5 FilePath objects where only 2 would normally suffice. +-- +-- In the implementation below we first speculatively split the input by the last path +-- separator. In the vast majority of cases this is already the answer, except +-- two exceptional cases explained below. +-- splitFileName_ :: FILEPATH -> (STRING, STRING) -splitFileName_ fp = (drv <> dir, file) +splitFileName_ fp + -- If dirSlash is empty, @fp@ is either a genuine filename without any dir, + -- or just a Windows drive name without slash like "c:". + -- Run readDriveLetter to figure out. + | isWindows + , null dirSlash + = fromMaybe (mempty, fp) (readDriveLetter fp) + -- Another Windows quirk is that @fp@ could have been a shared drive "\\share" + -- or UNC location "\\?\UNC\foo", where path separator is a part of the drive name. + -- We can test this by trying dropDrive and falling back to splitDrive. + | isWindows + , Just (s1, _s2, bs') <- uncons2 dirSlash + , isPathSeparator s1 + -- If bs' is empty, then s2 as the last character of dirSlash must be a path separator, + -- so we are in the middle of shared drive. + -- Otherwise, since s1 is a path separator, we might be in the middle of UNC path. + , null bs' || maybe False (null . snd) (readDriveUNC dirSlash) + = (fp, mempty) + | otherwise + = (dirSlash, file) where - (drv, pth) = splitDrive fp - (dir, file) = breakEnd isPathSeparator pth + (dirSlash, file) = breakEnd isPathSeparator fp -- | Set the filename. -- @@ -736,7 +783,7 @@ combineAlways a b | null a = b [a1, a2] | isWindows , isLetter a1 , a2 == _colon -> a <> b - _ -> a <> singleton pathSeparator <> b + _ -> a <> (pathSeparator `cons` b) -- | Combine two paths with a path separator. @@ -1068,7 +1115,7 @@ makeValid path | isPosix = map (\x -> if x == _nul then _underscore else x) path | isJust (readDriveShare drv) && all isPathSeparator drv = take 2 drv <> fromString "drive" | isJust (readDriveUNC drv) && not (hasTrailingPathSeparator drv) = - makeValid (drv <> singleton pathSeparator <> pth) + makeValid (drv <> (pathSeparator `cons` pth)) | otherwise = joinDrive drv $ validElements $ validCHARs pth where @@ -1129,18 +1176,9 @@ isAbsolute = not . isRelative #ifndef OS_PATH ----------------------------------------------------------------------------- --- dropWhileEnd (>2) [1,2,3,4,1,2,3,4] == [1,2,3,4,1,2]) --- Note that Data.List.dropWhileEnd is only available in base >= 4.5. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = reverse . dropWhile p . reverse - --- takeWhileEnd (>2) [1,2,3,4,1,2,3,4] == [3,4]) -takeWhileEnd :: (a -> Bool) -> [a] -> [a] -takeWhileEnd p = reverse . takeWhile p . reverse - -- spanEnd (>2) [1,2,3,4,1,2,3,4] = ([1,2,3,4,1,2], [3,4]) spanEnd :: (a -> Bool) -> [a] -> ([a], [a]) -spanEnd p xs = (dropWhileEnd p xs, takeWhileEnd p xs) +spanEnd p = L.foldr (\x (pref, suff) -> if null pref && p x then (pref, x : suff) else (x : pref, suff)) ([], []) -- breakEnd (< 2) [1,2,3,4,1,2,3,4] == ([1,2,3,4,1],[2,3,4]) breakEnd :: (a -> Bool) -> [a] -> ([a], [a]) @@ -1152,11 +1190,16 @@ breakEnd p = spanEnd (not . p) stripSuffix :: Eq a => [a] -> [a] -> Maybe [a] stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys) +cons :: a -> [a] -> [a] +cons = (:) unsnoc :: [a] -> Maybe ([a], a) -unsnoc [] = Nothing -unsnoc xs = Just (init xs, last xs) +unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing +uncons2 :: [a] -> Maybe (a, a, [a]) +uncons2 [] = Nothing +uncons2 [_] = Nothing +uncons2 (x : y : zs) = Just (x, y, zs) _period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: Char _period = '.' diff --git a/System/OsPath/Data/ByteString/Short.hs b/System/OsPath/Data/ByteString/Short.hs index 176ae012..f3a666ff 100644 --- a/System/OsPath/Data/ByteString/Short.hs +++ b/System/OsPath/Data/ByteString/Short.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} -- | -- Module : System.OsPath.Data.ByteString.Short @@ -81,6 +82,7 @@ module System.OsPath.Data.ByteString.Short ( last, tail, uncons, + uncons2, head, init, unsnoc, @@ -173,3 +175,17 @@ module System.OsPath.Data.ByteString.Short ( ) where import Data.ByteString.Short.Internal +import System.OsPath.Data.ByteString.Short.Internal + +import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) +import Data.Word (Word8) + +uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) +uncons2 = \sbs -> + let l = length sbs + nl = l - 2 + in if | l <= 1 -> Nothing + | otherwise -> let h = indexWord8Array (asBA sbs) 0 + h' = indexWord8Array (asBA sbs) 1 + t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl + in Just (h, h', t) diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/System/OsPath/Data/ByteString/Short/Internal.hs index b2428503..493b447b 100644 --- a/System/OsPath/Data/ByteString/Short/Internal.hs +++ b/System/OsPath/Data/ByteString/Short/Internal.hs @@ -20,6 +20,7 @@ module System.OsPath.Data.ByteString.Short.Internal where import Control.Monad.ST import Control.Exception (assert, throwIO) +import Data.Bits (Bits(..)) import Data.ByteString.Short.Internal (ShortByteString(..), length) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup @@ -284,15 +285,21 @@ writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of s' -> (# s', () #)) +indexWord8Array :: BA + -> Int -- ^ Word8 index + -> Word8 +indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) + -- | This isn't strictly Word16 array read. Instead it's two Word8 array reads -- to avoid endianness issues due to primops doing automatic alignment based -- on host platform. We expect the byte array to be LE always. indexWord16Array :: BA -> Int -- ^ Word8 index (not Word16) -> Word16 -indexWord16Array (BA# ba#) (I# i#) = - case (# indexWord8Array# ba# i#, indexWord8Array# ba# (i# +# 1#) #) of - (# lsb#, msb# #) -> W16# (decodeWord16LE# (# lsb#, msb# #)) +indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8) + where + lsb = indexWord8Array ba i + msb = indexWord8Array ba (i + 1) #if !MIN_VERSION_base(4,16,0) diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/System/OsPath/Data/ByteString/Short/Word16.hs index f6e591bc..9fdec459 100644 --- a/System/OsPath/Data/ByteString/Short/Word16.hs +++ b/System/OsPath/Data/ByteString/Short/Word16.hs @@ -46,6 +46,7 @@ module System.OsPath.Data.ByteString.Short.Word16 ( last, tail, uncons, + uncons2, head, init, unsnoc, @@ -260,6 +261,18 @@ uncons = \(assertEven -> sbs) -> t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl in Just (h, t) +-- | /O(n)/ Extract first two elements and the rest of a ByteString, +-- returning Nothing if it is shorter than two elements. +uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) +uncons2 = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 4 + in if | l <= 2 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + h' = indexWord16Array (asBA sbs) 2 + t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl + in Just (h, h', t) + -- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. -- An exception will be thrown in the case of an empty ShortByteString. head :: HasCallStack => ShortByteString -> Word16