Skip to content

Commit ab92028

Browse files
authored
Support commas in structure strings (#15)
1 parent 99858c2 commit ab92028

File tree

6 files changed

+105
-23
lines changed

6 files changed

+105
-23
lines changed

package.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
name: parthenon
22
version: 0.1.1
3-
synopsis: Athena Structure Parser
3+
synopsis: AWS Athena Parser
44
description:
5-
Please see the README on GitHub at <https://github.com/AntoineGagne/parthenon#readme>
5+
A parser for AWS Athena schemas and terms.
6+
category: Development
67
homepage: https://github.com/AntoineGagne/parthenon-hs
78
github: "AntoineGagne/parthenon-hs"
89
license: BSD3

parthenon.cabal

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ cabal-version: 2.0
66

77
name: parthenon
88
version: 0.1.1
9-
synopsis: Athena Structure Parser
10-
description: Please see the README on GitHub at <https://github.com/AntoineGagne/parthenon#readme>
9+
synopsis: AWS Athena Parser
10+
description: A parser for AWS Athena schemas and terms.
11+
category: Development
1112
homepage: https://github.com/AntoineGagne/parthenon-hs
1213
bug-reports: https://github.com/AntoineGagne/parthenon-hs/issues
1314
author: Antoine Gagné

src/Parthenon/Decoder.hs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@ module Parthenon.Decoder
66
bigInt,
77
array,
88
struct,
9-
specialString,
9+
structString,
1010
Athena (..),
1111
)
1212
where
1313

1414
import Control.Monad.Combinators
1515
import Data.Functor (($>))
1616
import Data.Text (Text)
17+
import qualified Data.Text as Text
1718
import Data.Void
1819
import Parthenon.Types (Athena (..))
1920
import Text.Megaparsec
@@ -31,7 +32,7 @@ struct entries' =
3132
struct' = between leftBrace rightBrace entries
3233

3334
entries :: Parser [(Text, Athena)]
34-
entries = (choice decoders <|> unknownDecoder) `sepBy` comma
35+
entries = (try (choice decoders) <|> unknownDecoder) `sepBy` comma
3536

3637
decoders :: [Parser (Text, Athena)]
3738
decoders = map decoder entries'
@@ -47,7 +48,7 @@ struct entries' =
4748
unknownDecoder = do
4849
key' <- key
4950
_ <- equal
50-
schema' <- try string
51+
schema' <- try structString
5152
pure (key', schema')
5253

5354
key :: Parser Text
@@ -62,8 +63,24 @@ array decoder' = null' <|> (AArray <$> array')
6263
array' :: Parser [Athena]
6364
array' = between leftSquare rightSquare (decoder' `sepBy` comma)
6465

65-
specialString :: Parser Athena
66-
specialString = null'
66+
structString :: Parser Athena
67+
structString = null' <|> AString <$> characters
68+
where
69+
characters :: Parser Text
70+
characters = do
71+
input <- getInput
72+
let n = case (Text.findIndex (== '=') input, Text.findIndex (== '}') input) of
73+
(Nothing, Nothing) -> 0
74+
(Just _, Nothing) -> 0
75+
(Nothing, Just n') -> n'
76+
(Just m, Just n')
77+
| m < n' ->
78+
let untilEqual = Text.take m input
79+
(taken', _) = Text.breakOnEnd "," untilEqual
80+
in Text.length taken' - 1
81+
(Just _, Just n') ->
82+
n' - 1
83+
takeP (Just "characters") n
6784

6885
string :: Parser Athena
6986
string = null' <|> (AString <$> characters)

src/Parthenon/Schema.hs

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,26 +34,50 @@ struct = do
3434
keyValue = do
3535
key <- characters
3636
_ <- symbol ":"
37-
decoder <- encoder
37+
decoder <- structEncoder
3838
pure (key, decoder)
3939

40+
structEncoder :: Parser (Parser Athena)
41+
structEncoder =
42+
try
43+
( integer
44+
<|> bigInt
45+
<|> boolean
46+
<|> double
47+
<|> string
48+
<|> struct
49+
<|> array
50+
)
51+
where
52+
string :: Parser (Parser Athena)
53+
string = symbol "string" $> Decoder.structString
54+
4055
encoder :: Parser (Parser Athena)
41-
encoder = try (integer <|> bigInt <|> boolean <|> double <|> string <|> struct <|> array)
56+
encoder =
57+
try
58+
( integer
59+
<|> bigInt
60+
<|> boolean
61+
<|> double
62+
<|> string
63+
<|> struct
64+
<|> array
65+
)
4266
where
4367
string :: Parser (Parser Athena)
4468
string = symbol "string" $> Decoder.string
4569

46-
integer :: Parser (Parser Athena)
47-
integer = symbol "int" $> Decoder.integer
70+
integer :: Parser (Parser Athena)
71+
integer = symbol "int" $> Decoder.integer
4872

49-
bigInt :: Parser (Parser Athena)
50-
bigInt = symbol "bigint" $> Decoder.bigInt
73+
bigInt :: Parser (Parser Athena)
74+
bigInt = symbol "bigint" $> Decoder.bigInt
5175

52-
double :: Parser (Parser Athena)
53-
double = symbol "double" $> Decoder.double
76+
double :: Parser (Parser Athena)
77+
double = symbol "double" $> Decoder.double
5478

55-
boolean :: Parser (Parser Athena)
56-
boolean = symbol "boolean" $> Decoder.boolean
79+
boolean :: Parser (Parser Athena)
80+
boolean = symbol "boolean" $> Decoder.boolean
5781

5882
betweenAngleBrackets :: Parser a -> Parser a
5983
betweenAngleBrackets = between leftAngle rightAngle

test/Parthenon/DecoderSpec.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,10 @@ spec = parallel $ do
189189
)
190190
]
191191
)
192-
xit "can decode a struct with string that contain commas, spaces and special characters" $
192+
it "can decode a struct with string that contain commas, spaces and special characters" $
193193
parseMaybe
194194
( Decoder.struct
195-
[ ("a", Decoder.string)
195+
[ ("a", Decoder.structString)
196196
]
197197
)
198198
"{a = Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36}"
@@ -203,10 +203,10 @@ spec = parallel $ do
203203
)
204204
]
205205
)
206-
xit "can decode a struct with multiple fields with string that contain commas, spaces and special characters" $
206+
it "can decode a struct with multiple fields with string that contain commas, spaces and special characters" $
207207
parseMaybe
208208
( Decoder.struct
209-
[ ("a", Decoder.string),
209+
[ ("a", Decoder.structString),
210210
("b", Decoder.integer)
211211
]
212212
)

test/Parthenon/SchemaSpec.hs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,45 @@ spec = parallel $ do
7373
)
7474
]
7575
)
76+
it "can decode a struct with string that contains commas" $
77+
parseMaybe
78+
"struct<b:string>"
79+
"{b=foo bar, jane smith}"
80+
`shouldBe` Just
81+
( AStruct
82+
[ ("b", AString "foo bar, jane smith")
83+
]
84+
)
85+
it "can decode a struct with string that contains commas and unknown keys" $
86+
parseMaybe
87+
"struct<b:string>"
88+
"{a=123, b=foo bar, jane smith, c=123, d=true, e=-2.0}"
89+
`shouldBe` Just
90+
( AStruct
91+
[ ("a", AString "123"),
92+
("b", AString "foo bar, jane smith"),
93+
("c", AString "123"),
94+
("d", AString "true"),
95+
("e", AString "-2.0")
96+
]
97+
)
98+
it "can decode a nested struct with string that contains commas and unknown keys" $
99+
parseMaybe
100+
"struct<a:struct<b:string>>"
101+
"{a={a=123, b=foo bar, jane smith, c=123, d=true, e=-2.0}}"
102+
`shouldBe` Just
103+
( AStruct
104+
[ ( "a",
105+
AStruct
106+
[ ("a", AString "123"),
107+
("b", AString "foo bar, jane smith"),
108+
("c", AString "123"),
109+
("d", AString "true"),
110+
("e", AString "-2.0")
111+
]
112+
)
113+
]
114+
)
76115

77116
parseMaybe :: Text -> Text -> Maybe Athena
78117
parseMaybe rawSchema input = do

0 commit comments

Comments
 (0)