Skip to content

Commit

Permalink
Merge remote-tracking branch 'github/pr/183'
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Feb 24, 2023
2 parents bb0e5cd + 5370bb4 commit 9d4edc2
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 41 deletions.
119 changes: 81 additions & 38 deletions System/FilePath/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,24 @@ module System.OsPath.MODULE_NAME.Internal

{- HLINT ignore "Use fewer imports" -}
import Prelude (Char, Bool(..), Maybe(..), (.), (&&), (<=), not, fst, maybe, (||), (==), ($), otherwise, fmap, mempty, (>=), (/=), (++), snd)
import Data.Bifunctor (first)
import Data.Semigroup ((<>))
import qualified Prelude as P
import Data.Maybe(isJust)
import Data.Maybe(fromMaybe, isJust)
import qualified Data.List as L

#ifndef OS_PATH
import Data.String (fromString)
import System.Environment(getEnv)
import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, takeWhile, take, all, elem, any, span)
import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, take, all, elem, any, span)
import Data.Char(toLower, toUpper, isAsciiLower, isAsciiUpper)
import Data.List(stripPrefix, isSuffixOf, uncons)
import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd)
#define CHAR Char
#define STRING String
#define FILEPATH FilePath
#else
import Prelude (fromIntegral)
import Control.Exception ( SomeException, evaluate, try, displayException )
import Data.Bifunctor (first)
import Control.DeepSeq (force)
import GHC.IO (unsafePerformIO)
import qualified Data.Char as C
Expand Down Expand Up @@ -290,13 +290,24 @@ getSearchPath = fmap splitSearchPath (getEnv "PATH")
-- > splitExtension "file.txt/boris.ext" == ("file.txt/boris",".ext")
-- > splitExtension "file/path.txt.bob.fred" == ("file/path.txt.bob",".fred")
-- > splitExtension "file/path.txt/" == ("file/path.txt/","")

-- A naive implementation would be to use @splitFileName_@ first,
-- then break filename into basename and extension, then recombine dir and basename.
-- This is way too expensive, see @splitFileName_@ comment for discussion.
--
-- Instead we speculatively split on the extension separator first, then check
-- whether results are well-formed.
splitExtension :: FILEPATH -> (STRING, STRING)
splitExtension x = if null nameDot
then (x, mempty)
else (dir <> init nameDot, singleton extSeparator <> ext)
where
(dir,file) = splitFileName_ x
(nameDot,ext) = breakEnd isExtSeparator file
splitExtension x
-- Imagine x = "no-dots", then nameDot = ""
| null nameDot = (x, mempty)
-- Imagine x = "\\shared.with.dots\no-dots"
| isWindows && null (dropDrive nameDot) = (x, mempty)
-- Imagine x = "dir.with.dots/no-dots"
| any isPathSeparator ext = (x, mempty)
| otherwise = (init nameDot, extSeparator `cons` ext)
where
(nameDot, ext) = breakEnd isExtSeparator x

-- | Get the extension of a file, returns @\"\"@ for no extension, @.ext@ otherwise.
--
Expand Down Expand Up @@ -358,7 +369,7 @@ addExtension file xs = case uncons xs of
Just (x, _) -> joinDrive a res
where
res = if isExtSeparator x then b <> xs
else b <> singleton extSeparator <> xs
else b <> (extSeparator `cons` xs)

(a,b) = splitDrive file

Expand All @@ -383,7 +394,7 @@ isExtensionOf :: STRING -> FILEPATH -> Bool
isExtensionOf ext = \fp -> case uncons ext of
Just (x, _)
| x == _period -> isSuffixOf ext . takeExtensions $ fp
_ -> isSuffixOf (singleton _period <> ext) . takeExtensions $ fp
_ -> isSuffixOf (_period `cons` ext) . takeExtensions $ fp

-- | Drop the given extension from a FILEPATH, and the @\".\"@ preceding it.
-- Returns 'Nothing' if the FILEPATH does not have the given extension, or
Expand All @@ -403,7 +414,7 @@ isExtensionOf ext = \fp -> case uncons ext of
-- > stripExtension "" x == Just x
stripExtension :: STRING -> FILEPATH -> Maybe FILEPATH
stripExtension ext path = case uncons ext of
Just (x, _) -> let dotExt = if isExtSeparator x then ext else singleton _period <> ext
Just (x, _) -> let dotExt = if isExtSeparator x then ext else _period `cons` ext
in stripSuffix dotExt path
Nothing -> Just path

Expand Down Expand Up @@ -506,19 +517,21 @@ readDriveUNC bs = case unpack bs of

{- c:\ -}
readDriveLetter :: STRING -> Maybe (FILEPATH, FILEPATH)
readDriveLetter bs = case unpack bs of
(x:c:y:xs)
| c == _colon && isLetter x && isPathSeparator y -> Just $ addSlash (pack [x,_colon]) (pack (y:xs))
(x:c:xs)
| c == _colon && isLetter x -> Just (pack [x,_colon], pack xs)
_ -> Nothing
readDriveLetter bs = case uncons2 bs of
Nothing -> Nothing
Just (x, c, ys)
| isLetter x, c == _colon -> Just $ case uncons ys of
Just (y, _)
| isPathSeparator y -> addSlash (pack [x,_colon]) ys
_ -> (pack [x,_colon], ys)
| otherwise -> Nothing

{- \\sharename\ -}
readDriveShare :: STRING -> Maybe (FILEPATH, FILEPATH)
readDriveShare bs = case unpack bs of
(s1:s2:xs) | isPathSeparator s1 && isPathSeparator s2 ->
let (a, b) = readDriveShareName (pack xs)
in Just (singleton s1 <> singleton s2 <> a,b)
in Just (s1 `cons` (s2 `cons` a), b)
_ -> Nothing

{- assume you have already seen \\ -}
Expand Down Expand Up @@ -594,19 +607,53 @@ splitFileName x = if null path
else (path, file)
where
(path, file) = splitFileName_ x
dotSlash = singleton _period <> singleton _slash
dotSlash = _period `cons` singleton _slash

-- version of splitFileName where, if the FILEPATH has no directory
-- component, the returned directory is "" rather than "./". This
-- is used in cases where we are going to combine the returned
-- directory to make a valid FILEPATH, and having a "./" appear would
-- look strange and upset simple equality properties. See
-- e.g. replaceFileName.
--
-- A naive implementation is
--
-- splitFileName_ fp = (drv <> dir, file)
-- where
-- (drv, pth) = splitDrive fp
-- (dir, file) = breakEnd isPathSeparator pth
--
-- but it is undesirable for two reasons:
-- * splitDrive is very slow on Windows,
-- * we unconditionally allocate 5 FilePath objects where only 2 would normally suffice.
--
-- In the implementation below we first speculatively split the input by the last path
-- separator. In the vast majority of cases this is already the answer, except
-- two exceptional cases explained below.
--
splitFileName_ :: FILEPATH -> (STRING, STRING)
splitFileName_ fp = (drv <> dir, file)
splitFileName_ fp
-- If dirSlash is empty, @fp@ is either a genuine filename without any dir,
-- or just a Windows drive name without slash like "c:".
-- Run readDriveLetter to figure out.
| isWindows
, null dirSlash
= fromMaybe (mempty, fp) (readDriveLetter fp)
-- Another Windows quirk is that @fp@ could have been a shared drive "\\share"
-- or UNC location "\\?\UNC\foo", where path separator is a part of the drive name.
-- We can test this by trying dropDrive and falling back to splitDrive.
| isWindows
, Just (s1, _s2, bs') <- uncons2 dirSlash
, isPathSeparator s1
-- If bs' is empty, then s2 as the last character of dirSlash must be a path separator,
-- so we are in the middle of shared drive.
-- Otherwise, since s1 is a path separator, we might be in the middle of UNC path.
, null bs' || maybe False (null . snd) (readDriveUNC dirSlash)
= (fp, mempty)
| otherwise
= (dirSlash, file)
where
(drv, pth) = splitDrive fp
(dir, file) = breakEnd isPathSeparator pth
(dirSlash, file) = breakEnd isPathSeparator fp

-- | Set the filename.
--
Expand Down Expand Up @@ -736,7 +783,7 @@ combineAlways a b | null a = b
[a1, a2] | isWindows
, isLetter a1
, a2 == _colon -> a <> b
_ -> a <> singleton pathSeparator <> b
_ -> a <> (pathSeparator `cons` b)


-- | Combine two paths with a path separator.
Expand Down Expand Up @@ -1068,7 +1115,7 @@ makeValid path
| isPosix = map (\x -> if x == _nul then _underscore else x) path
| isJust (readDriveShare drv) && all isPathSeparator drv = take 2 drv <> fromString "drive"
| isJust (readDriveUNC drv) && not (hasTrailingPathSeparator drv) =
makeValid (drv <> singleton pathSeparator <> pth)
makeValid (drv <> (pathSeparator `cons` pth))
| otherwise = joinDrive drv $ validElements $ validCHARs pth

where
Expand Down Expand Up @@ -1129,18 +1176,9 @@ isAbsolute = not . isRelative
#ifndef OS_PATH

-----------------------------------------------------------------------------
-- dropWhileEnd (>2) [1,2,3,4,1,2,3,4] == [1,2,3,4,1,2])
-- Note that Data.List.dropWhileEnd is only available in base >= 4.5.
dropWhileEnd :: (a -> Bool) -> [a] -> [a]
dropWhileEnd p = reverse . dropWhile p . reverse

-- takeWhileEnd (>2) [1,2,3,4,1,2,3,4] == [3,4])
takeWhileEnd :: (a -> Bool) -> [a] -> [a]
takeWhileEnd p = reverse . takeWhile p . reverse

-- spanEnd (>2) [1,2,3,4,1,2,3,4] = ([1,2,3,4,1,2], [3,4])
spanEnd :: (a -> Bool) -> [a] -> ([a], [a])
spanEnd p xs = (dropWhileEnd p xs, takeWhileEnd p xs)
spanEnd p = L.foldr (\x (pref, suff) -> if null pref && p x then (pref, x : suff) else (x : pref, suff)) ([], [])

-- breakEnd (< 2) [1,2,3,4,1,2,3,4] == ([1,2,3,4,1],[2,3,4])
breakEnd :: (a -> Bool) -> [a] -> ([a], [a])
Expand All @@ -1152,11 +1190,16 @@ breakEnd p = spanEnd (not . p)
stripSuffix :: Eq a => [a] -> [a] -> Maybe [a]
stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys)

cons :: a -> [a] -> [a]
cons = (:)

unsnoc :: [a] -> Maybe ([a], a)
unsnoc [] = Nothing
unsnoc xs = Just (init xs, last xs)
unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing

uncons2 :: [a] -> Maybe (a, a, [a])
uncons2 [] = Nothing
uncons2 [_] = Nothing
uncons2 (x : y : zs) = Just (x, y, zs)

_period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: Char
_period = '.'
Expand Down
16 changes: 16 additions & 0 deletions System/OsPath/Data/ByteString/Short.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NoImplicitPrelude #-}
-- |
-- Module : System.OsPath.Data.ByteString.Short
Expand Down Expand Up @@ -81,6 +82,7 @@ module System.OsPath.Data.ByteString.Short (
last,
tail,
uncons,
uncons2,
head,
init,
unsnoc,
Expand Down Expand Up @@ -173,3 +175,17 @@ module System.OsPath.Data.ByteString.Short (
) where

import Data.ByteString.Short.Internal
import System.OsPath.Data.ByteString.Short.Internal

import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise)
import Data.Word (Word8)

uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString)
uncons2 = \sbs ->
let l = length sbs
nl = l - 2
in if | l <= 1 -> Nothing
| otherwise -> let h = indexWord8Array (asBA sbs) 0
h' = indexWord8Array (asBA sbs) 1
t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl
in Just (h, h', t)
13 changes: 10 additions & 3 deletions System/OsPath/Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ module System.OsPath.Data.ByteString.Short.Internal where

import Control.Monad.ST
import Control.Exception (assert, throwIO)
import Data.Bits (Bits(..))
import Data.ByteString.Short.Internal (ShortByteString(..), length)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
Expand Down Expand Up @@ -284,15 +285,21 @@ writeWord16Array (MBA# mba#) (I# i#) (W16# w#) =
ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of
s' -> (# s', () #))

indexWord8Array :: BA
-> Int -- ^ Word8 index
-> Word8
indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#)

-- | This isn't strictly Word16 array read. Instead it's two Word8 array reads
-- to avoid endianness issues due to primops doing automatic alignment based
-- on host platform. We expect the byte array to be LE always.
indexWord16Array :: BA
-> Int -- ^ Word8 index (not Word16)
-> Word16
indexWord16Array (BA# ba#) (I# i#) =
case (# indexWord8Array# ba# i#, indexWord8Array# ba# (i# +# 1#) #) of
(# lsb#, msb# #) -> W16# (decodeWord16LE# (# lsb#, msb# #))
indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8)
where
lsb = indexWord8Array ba i
msb = indexWord8Array ba (i + 1)

#if !MIN_VERSION_base(4,16,0)

Expand Down
13 changes: 13 additions & 0 deletions System/OsPath/Data/ByteString/Short/Word16.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ module System.OsPath.Data.ByteString.Short.Word16 (
last,
tail,
uncons,
uncons2,
head,
init,
unsnoc,
Expand Down Expand Up @@ -260,6 +261,18 @@ uncons = \(assertEven -> sbs) ->
t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl
in Just (h, t)

-- | /O(n)/ Extract first two elements and the rest of a ByteString,
-- returning Nothing if it is shorter than two elements.
uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString)
uncons2 = \(assertEven -> sbs) ->
let l = BS.length sbs
nl = l - 4
in if | l <= 2 -> Nothing
| otherwise -> let h = indexWord16Array (asBA sbs) 0
h' = indexWord16Array (asBA sbs) 2
t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl
in Just (h, h', t)

-- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16.
-- An exception will be thrown in the case of an empty ShortByteString.
head :: HasCallStack => ShortByteString -> Word16
Expand Down

0 comments on commit 9d4edc2

Please sign in to comment.