Skip to content

Commit 5e09286

Browse files
author
pranaysashank
committed
Properly escape characters in JSON strings.
1 parent 4f9fb94 commit 5e09286

File tree

3 files changed

+136
-9
lines changed

3 files changed

+136
-9
lines changed

src/Streamly/Internal/Data/Json/Stream.hs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import Streamly.Internal.Data.Array (Array)
3131
import Streamly.Internal.Data.Fold.Types (Fold(..))
3232
import Streamly.Internal.Data.Strict (Tuple' (..))
3333
import qualified Streamly.Internal.Data.Parser as PR
34+
import qualified Streamly.Internal.Data.Parser.ParserK.Types as K
3435
import qualified Streamly.Internal.Data.Parser.ParserD as P
3536
import qualified Streamly.Internal.Data.Array as A
3637
import qualified Streamly.Internal.Data.Fold as IFL
@@ -67,6 +68,22 @@ instance Enum a => Hashable (A.Array a) where
6768
hashWithSalt salt arr = fromIntegral $ runIdentity $
6869
IUF.fold A.read (IFL.rollingHashWithSalt $ fromIntegral salt) arr
6970

71+
{-# INLINE jsonEscapes #-}
72+
jsonEscapes :: Word8 -> Maybe Word8
73+
jsonEscapes 34 = Just 34 -- \" -> "
74+
jsonEscapes 92 = Just 92 -- \\ -> \
75+
jsonEscapes 47 = Just 47 -- \/ -> /
76+
jsonEscapes 98 = Just 8 -- \b -> BS
77+
jsonEscapes 102 = Just 12 -- \f -> FF
78+
jsonEscapes 110 = Just 10 -- \n -> LF
79+
jsonEscapes 114 = Just 13 -- \r -> CR
80+
jsonEscapes 116 = Just 9 -- \t -> TAB
81+
jsonEscapes _ = Nothing
82+
83+
{-# INLINE escapeFoldUtf8With #-}
84+
escapeFoldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
85+
escapeFoldUtf8With = Uni.escapeFoldUtf8With 92 jsonEscapes
86+
7087
type JsonString = Array Char
7188

7289
type JsonArray = Array Value
@@ -168,13 +185,13 @@ parseJsonString = do
168185
w <- P.peek
169186
case w of
170187
DOUBLE_QUOTE -> skip 1 >> return s
171-
BACKSLASH -> (fmap (s <>) escapeParseJsonString) <* skip 1
188+
BACKSLASH -> fmap (s <>) escapeParseJsonString
172189
_ -> do
173190
P.die $ [(chr . fromIntegral) w] ++ " : String without end."
174191

175192
{-# INLINE escapeParseJsonString #-}
176193
escapeParseJsonString :: MonadCatch m => Parser m Word8 JsonString
177-
escapeParseJsonString = P.scan startState go (Uni.foldUtf8With A.unsafeWrite)
194+
escapeParseJsonString = P.scan startState go (escapeFoldUtf8With A.unsafeWrite)
178195
where
179196
startState = False
180197
go s a
@@ -236,15 +253,15 @@ parseJsonArray = do
236253
{-# INLINE parseJsonEOF #-}
237254
parseJsonEOF :: MonadCatch m => PR.Parser m Word8 Value
238255
parseJsonEOF =
239-
P.toParserK $ do
256+
K.toParserK $ do
240257
v <- parseJsonValue
241258
skipSpace
242259
P.eof
243260
return v
244261

245262
{-# INLINE parseJson #-}
246263
parseJson :: MonadCatch m => PR.Parser m Word8 Value
247-
parseJson = P.toParserK $ parseJsonValue
264+
parseJson = K.toParserK $ parseJsonValue
248265

249266
{-
250267

src/Streamly/Internal/Data/Parser.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ dieM = K.toParserK . D.dieM
293293

294294
{-# INLINE peekMaybe #-}
295295
peekMaybe :: MonadCatch m => Parser m a (Maybe a)
296-
peekMaybe = D.toParserK D.peekMaybe
296+
peekMaybe = K.toParserK D.peekMaybe
297297

298298
-------------------------------------------------------------------------------
299299
-- Failing Parsers
@@ -520,7 +520,7 @@ sliceSepByP _cond = undefined -- K.toParserK . D.sliceSepByP cond
520520
--
521521
{-# INLINE scan #-}
522522
scan :: MonadCatch m => s -> (s -> a -> Maybe s) -> Fold m a b -> Parser m a b
523-
scan s f fl = D.toParserK $ D.scan s f fl
523+
scan s f fl = K.toParserK $ D.scan s f fl
524524

525525
-- | @sepBy fl p sep@ collects zero or more stream elements separated by @sep@.
526526
--
@@ -538,7 +538,7 @@ sepBy :: MonadCatch m
538538
-> Parser m a b
539539
-> Parser m a sep
540540
-> Parser m a c
541-
sepBy fl pa = D.toParserK . D.sepBy fl (D.fromParserK pa) . D.fromParserK
541+
sepBy fl pa = K.toParserK . D.sepBy fl (K.fromParserK pa) . K.fromParserK
542542

543543
-- Note: Keep this consistent with S.splitOn. In fact we should eliminate
544544
-- S.splitOn in favor of the parser.

src/Streamly/Internal/Unicode/Stream.hs

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ module Streamly.Internal.Unicode.Stream
4545
, decodeUtf8ArraysD
4646
, decodeUtf8ArraysLenientD
4747
, foldUtf8With
48+
, escapeFoldUtf8With
4849

4950
-- * Transformation
5051
, stripStart
@@ -813,9 +814,13 @@ oDecode table state codep byte = do
813814
data Fp m s = FreshPoint !CodePoint !DecodeState (m s)
814815

815816
{-# INLINE_NORMAL foldUtf8WithE #-}
816-
foldUtf8WithE :: Monad m => CodingFailureMode -> Fold m Char container -> Fold m Word8 container
817+
foldUtf8WithE
818+
:: Monad m
819+
=> CodingFailureMode
820+
-> Fold m Char container
821+
-> Fold m Word8 container
817822
foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
818-
let A.Array p _ _ = utf8d
823+
let A.Array p _ = utf8d
819824
!ptr = (unsafeForeignPtrToPtr p)
820825
in FL.Fold (step' ptr) (return $ FreshPoint 0 0 arrayInit) extract
821826
where
@@ -877,6 +882,111 @@ foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
877882
aSt <- arrayState
878883
if statePtr /= 0 then inputUnderflow aSt else arrayExtract aSt
879884

885+
data EscapeState m s
886+
= EscapeState (m s)
887+
| NotEscapeState !CodePoint !DecodeState (m s)
888+
889+
{-# INLINE_NORMAL escapeFoldUtf8WithE #-}
890+
escapeFoldUtf8WithE
891+
:: Monad m
892+
=> CodingFailureMode
893+
-> Word8
894+
-> (Word8 -> Maybe Word8)
895+
-> Fold m Char container
896+
-> Fold m Word8 container
897+
escapeFoldUtf8WithE cfm escape trans (FL.Fold arrayStep arrayInit arrayExtract) =
898+
let A.Array p _ = utf8d
899+
!ptr = (unsafeForeignPtrToPtr p)
900+
in FL.Fold (step' ptr) (return $ NotEscapeState 0 0 arrayInit) extract
901+
where
902+
{-# INLINE transliterateOrError #-}
903+
transliterateOrError e arrayState =
904+
case cfm of
905+
ErrorOnCodingFailure -> error e
906+
TransliterateCodingFailure ->
907+
return $ NotEscapeState 0 0 (arrayStep arrayState replacementChar)
908+
909+
{-# INLINE transliterateOrError1 #-}
910+
transliterateOrError1 table statePtr codepointPtr e arrayState x =
911+
case cfm of
912+
ErrorOnCodingFailure -> error e
913+
TransliterateCodingFailure -> do
914+
aS <- arrayState
915+
aSt <- arrayStep aS replacementChar
916+
if x <= 0x7f
917+
then return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
918+
else
919+
let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
920+
in
921+
case sv of
922+
12 -> transliterateOrError e aSt
923+
0 -> return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
924+
_ -> return $ NotEscapeState cp sv (return aSt)
925+
926+
step' _ (NotEscapeState _ _ arrayState) c
927+
| c == escape = return $ EscapeState arrayState
928+
929+
step' table (NotEscapeState codepointPtr statePtr arrayState) x =
930+
if statePtr == 0 && x <= 0x7f
931+
then do
932+
aSt <- arrayState
933+
return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
934+
else
935+
let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
936+
in
937+
case sv of
938+
12 ->
939+
transliterateOrError1
940+
table
941+
statePtr
942+
codepointPtr
943+
"Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
944+
arrayState
945+
x
946+
0 -> do
947+
aSt <- arrayState
948+
return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
949+
_ -> return $ NotEscapeState cp sv arrayState
950+
951+
step' _ (EscapeState arrayState) c = do
952+
aSt <- arrayState
953+
case trans c of
954+
Just x ->
955+
return $
956+
NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
957+
Nothing ->
958+
transliterateOrError
959+
"Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
960+
aSt
961+
962+
{-# INLINE inputUnderflow #-}
963+
inputUnderflow arrayState =
964+
case cfm of
965+
ErrorOnCodingFailure ->
966+
error "Streamly.Internal.Data.Stream.StreamD.escapeFoldUtf8With: Input Underflow"
967+
TransliterateCodingFailure -> do
968+
aSt <- arrayStep arrayState replacementChar
969+
arrayExtract aSt
970+
971+
extract (NotEscapeState _ statePtr arrayState) = do
972+
aSt <- arrayState
973+
if statePtr /= 0
974+
then inputUnderflow aSt
975+
else arrayExtract aSt
976+
977+
extract (EscapeState arrayState) = do
978+
aSt <- arrayState
979+
inputUnderflow aSt
980+
880981
{-# INLINE foldUtf8With #-}
881982
foldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
882983
foldUtf8With = foldUtf8WithE ErrorOnCodingFailure
984+
985+
{-# INLINE escapeFoldUtf8With #-}
986+
escapeFoldUtf8With ::
987+
Monad m
988+
=> Word8
989+
-> (Word8 -> Maybe Word8)
990+
-> Fold m Char container
991+
-> Fold m Word8 container
992+
escapeFoldUtf8With = escapeFoldUtf8WithE ErrorOnCodingFailure

0 commit comments

Comments
 (0)