@@ -45,6 +45,7 @@ module Streamly.Internal.Unicode.Stream
4545 , decodeUtf8ArraysD
4646 , decodeUtf8ArraysLenientD
4747 , foldUtf8With
48+ , escapeFoldUtf8With
4849
4950 -- * Transformation
5051 , stripStart
@@ -813,9 +814,13 @@ oDecode table state codep byte = do
813814data Fp m s = FreshPoint ! CodePoint ! DecodeState (m s )
814815
815816{-# INLINE_NORMAL foldUtf8WithE #-}
816- foldUtf8WithE :: Monad m => CodingFailureMode -> Fold m Char container -> Fold m Word8 container
817+ foldUtf8WithE
818+ :: Monad m
819+ => CodingFailureMode
820+ -> Fold m Char container
821+ -> Fold m Word8 container
817822foldUtf8WithE cfm (FL. Fold arrayStep arrayInit arrayExtract) =
818- let A. Array p _ _ = utf8d
823+ let A. Array p _ = utf8d
819824 ! ptr = (unsafeForeignPtrToPtr p)
820825 in FL. Fold (step' ptr) (return $ FreshPoint 0 0 arrayInit) extract
821826 where
@@ -877,6 +882,111 @@ foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
877882 aSt <- arrayState
878883 if statePtr /= 0 then inputUnderflow aSt else arrayExtract aSt
879884
885+ data EscapeState m s
886+ = EscapeState (m s )
887+ | NotEscapeState ! CodePoint ! DecodeState (m s )
888+
889+ {-# INLINE_NORMAL escapeFoldUtf8WithE #-}
890+ escapeFoldUtf8WithE
891+ :: Monad m
892+ => CodingFailureMode
893+ -> Word8
894+ -> (Word8 -> Maybe Word8 )
895+ -> Fold m Char container
896+ -> Fold m Word8 container
897+ escapeFoldUtf8WithE cfm escape trans (FL. Fold arrayStep arrayInit arrayExtract) =
898+ let A. Array p _ = utf8d
899+ ! ptr = (unsafeForeignPtrToPtr p)
900+ in FL. Fold (step' ptr) (return $ NotEscapeState 0 0 arrayInit) extract
901+ where
902+ {-# INLINE transliterateOrError #-}
903+ transliterateOrError e arrayState =
904+ case cfm of
905+ ErrorOnCodingFailure -> error e
906+ TransliterateCodingFailure ->
907+ return $ NotEscapeState 0 0 (arrayStep arrayState replacementChar)
908+
909+ {-# INLINE transliterateOrError1 #-}
910+ transliterateOrError1 table statePtr codepointPtr e arrayState x =
911+ case cfm of
912+ ErrorOnCodingFailure -> error e
913+ TransliterateCodingFailure -> do
914+ aS <- arrayState
915+ aSt <- arrayStep aS replacementChar
916+ if x <= 0x7f
917+ then return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
918+ else
919+ let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
920+ in
921+ case sv of
922+ 12 -> transliterateOrError e aSt
923+ 0 -> return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
924+ _ -> return $ NotEscapeState cp sv (return aSt)
925+
926+ step' _ (NotEscapeState _ _ arrayState) c
927+ | c == escape = return $ EscapeState arrayState
928+
929+ step' table (NotEscapeState codepointPtr statePtr arrayState) x =
930+ if statePtr == 0 && x <= 0x7f
931+ then do
932+ aSt <- arrayState
933+ return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
934+ else
935+ let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
936+ in
937+ case sv of
938+ 12 ->
939+ transliterateOrError1
940+ table
941+ statePtr
942+ codepointPtr
943+ " Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
944+ arrayState
945+ x
946+ 0 -> do
947+ aSt <- arrayState
948+ return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
949+ _ -> return $ NotEscapeState cp sv arrayState
950+
951+ step' _ (EscapeState arrayState) c = do
952+ aSt <- arrayState
953+ case trans c of
954+ Just x ->
955+ return $
956+ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
957+ Nothing ->
958+ transliterateOrError
959+ " Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
960+ aSt
961+
962+ {-# INLINE inputUnderflow #-}
963+ inputUnderflow arrayState =
964+ case cfm of
965+ ErrorOnCodingFailure ->
966+ error " Streamly.Internal.Data.Stream.StreamD.escapeFoldUtf8With: Input Underflow"
967+ TransliterateCodingFailure -> do
968+ aSt <- arrayStep arrayState replacementChar
969+ arrayExtract aSt
970+
971+ extract (NotEscapeState _ statePtr arrayState) = do
972+ aSt <- arrayState
973+ if statePtr /= 0
974+ then inputUnderflow aSt
975+ else arrayExtract aSt
976+
977+ extract (EscapeState arrayState) = do
978+ aSt <- arrayState
979+ inputUnderflow aSt
980+
880981{-# INLINE foldUtf8With #-}
881982foldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
882983foldUtf8With = foldUtf8WithE ErrorOnCodingFailure
984+
985+ {-# INLINE escapeFoldUtf8With #-}
986+ escapeFoldUtf8With ::
987+ Monad m
988+ => Word8
989+ -> (Word8 -> Maybe Word8 )
990+ -> Fold m Char container
991+ -> Fold m Word8 container
992+ escapeFoldUtf8With = escapeFoldUtf8WithE ErrorOnCodingFailure
0 commit comments