diff --git a/qc/Main.hs b/qc/Main.hs index 0e86d3d..6b997fb 100644 --- a/qc/Main.hs +++ b/qc/Main.hs @@ -6,12 +6,12 @@ import Test.QuickCheck import Text.ABNF.ABNF.Canonicalizer (dedupNonterminals) import Text.ABNF.ABNF.Parser (parseABNF) import Text.ABNF.ABNF.Types +import Text.ABNF.ABNF.CFG (normalizeRules, eliminateGroups) import qualified Data.Map.Strict as Map import qualified Data.Text as Text import qualified Data.Text.Lazy as LText import Data.List (partition) import Text.Pretty.Simple (pShow) -import Numeric.Natural (Natural) instance Arbitrary (Rule ('Parsed 'Raw)) where arbitrary = Rule <$> arbitraryRuleName <*> arbitrary <*> arbitrary @@ -31,9 +31,6 @@ instance Arbitrary (Repetition ('Parsed 'Raw)) where , RepetitionSingle <$> arbitrary ] -instance Arbitrary Natural where - arbitrary = fromIntegral <$> (choose (0, 100) :: Gen Int) - instance Arbitrary Repeat where arbitrary = do lower <- fromIntegral <$> (choose (0, 5) :: Gen Int) @@ -147,9 +144,43 @@ arbitraryABNFBody = oneof return $ Text.pack "\"" <> lit <> Text.pack "\"" ] +-- | Property: After normalization and group elimination, +-- no ProductSpec should have an empty list of repetitions +prop_no_empty_productspecs :: Property +prop_no_empty_productspecs = prettyProp $ \(Rules rules) -> + let inputSize = length rules + result = case dedupNonterminals rules of + Nothing -> True -- Malformed ABNF, property holds vacuously + Just ruleMap -> + let normalized = normalizeRules ruleMap + eliminated = eliminateGroups normalized + passing = all checkRule (Map.elems eliminated) + in passing + in result + where + checkRule :: Rule ('CFG 'GroupElimination) -> Bool + checkRule (Rule name _ sumSpec) = checkSumSpec sumSpec + + checkSumSpec :: SumSpec ('CFG 'GroupElimination) -> Bool + checkSumSpec (SumSpec prods) = all checkProductSpec prods + + checkProductSpec :: ProductSpec ('CFG 'GroupElimination) -> Bool + checkProductSpec (ProductSpec []) = False -- Empty ProductSpec is invalid! + checkProductSpec (ProductSpec reps) = all checkRepetition reps + + checkRepetition :: Repetition ('CFG 'GroupElimination) -> Bool + checkRepetition (RepetitionSingle elem) = checkElement elem + + checkElement :: Element ('CFG 'GroupElimination) -> Bool + checkElement (RuleElement' _) = True + checkElement (LiteralElement _) = True + checkElement EmptyElement = True + main :: IO () main = do putStrLn "Testing prop_dedup_no_adds:" quickCheck prop_dedup_no_adds putStrLn "\nTesting prop_hanging_adds_detection:" quickCheck prop_hanging_adds_detection + putStrLn "\nTesting prop_no_empty_productspecs:" + quickCheck prop_no_empty_productspecs diff --git a/src/Text/ABNF/ABNF/CFG.hs b/src/Text/ABNF/ABNF/CFG.hs index 3b9ca7f..38609af 100644 --- a/src/Text/ABNF/ABNF/CFG.hs +++ b/src/Text/ABNF/ABNF/CFG.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE GADTs #-} module Text.ABNF.ABNF.CFG where @@ -13,7 +14,8 @@ type RuleMap stage = Map.Map Text.Text (Rule stage) -- | State for tracking fresh names and new rules during normalization data NormalizeState = NormalizeState - { nextFresh :: Natural + { currentRule :: Text.Text + , ruleCounters :: Map.Map Text.Text Natural -- Per-rule counters , newRules :: RuleMap ('CFG 'RepetitionElimination) } @@ -21,9 +23,10 @@ data NormalizeState = NormalizeState freshName :: State NormalizeState Text.Text freshName = do st <- get - let n = nextFresh st - put $ st { nextFresh = n + 1 } - return $ Text.pack ("_rep" ++ show n) + let rule = currentRule st + let n = Map.findWithDefault 0 rule (ruleCounters st) + put $ st { ruleCounters = Map.insert rule (n + 1) (ruleCounters st) } + return $ "_" <> rule <> "_rep" <> Text.pack (show n) -- | Add a new rule to the state addRule :: Text.Text -> Rule ('CFG 'RepetitionElimination) -> State NormalizeState () @@ -35,12 +38,13 @@ addRule name rule = do -- All RepetitionRep instances are converted to RepetitionSingle with generated rules normalizeRules :: RuleMap ('Parsed 'Dedup) -> RuleMap ('CFG 'RepetitionElimination) normalizeRules inputRules = - let initialState = NormalizeState { nextFresh = 0, newRules = Map.empty } + let initialState = NormalizeState { currentRule = "", ruleCounters = Map.empty, newRules = Map.empty } (normalizedRules, finalState) = runState (traverse normalizeRule inputRules) initialState in Map.union normalizedRules (newRules finalState) normalizeRule :: Rule ('Parsed 'Dedup) -> State NormalizeState (Rule ('CFG 'RepetitionElimination)) normalizeRule (Rule ident def spec) = do + modify $ \st -> st { currentRule = ident } -- Set context for generated names normalizedSpec <- normalizeSumSpec spec return $ Rule ident def normalizedSpec @@ -50,6 +54,9 @@ normalizeSumSpec (SumSpec prods) = do return $ SumSpec normalizedProds normalizeProductSpec :: ProductSpec ('Parsed 'Dedup) -> State NormalizeState (ProductSpec ('CFG 'RepetitionElimination)) +normalizeProductSpec (ProductSpec []) = do + -- Empty ProductSpec should become epsilon + return $ ProductSpec [RepetitionSingle EmptyElement] normalizeProductSpec (ProductSpec reps) = do normalizedReps <- concat <$> mapM normalizeRepetition reps return $ ProductSpec normalizedReps @@ -60,6 +67,10 @@ normalizeRepetition (RepetitionSingle elem) = do return [RepetitionSingle normalizedElem] -- Exactly N times: Repeat n (Just 0) -> n copies +-- Special case: exactly 0 times = empty +normalizeRepetition (RepetitionRep (Repeat 0 (Just 0)) _elem) = do + return [RepetitionSingle EmptyElement] + normalizeRepetition (RepetitionRep (Repeat n (Just 0)) elem) = do normalizedElem <- normalizeElement elem return $ replicate (fromIntegral n) (RepetitionSingle normalizedElem) @@ -81,7 +92,9 @@ normalizeRepetition (RepetitionRep (Repeat n (Just m)) elem) = do normalizedElem <- normalizeElement elem name <- freshName -- Create alternations: empty | elem | elem^2 | ... | elem^m - let alternatives = map (\k -> ProductSpec $ replicate k (RepetitionSingle normalizedElem)) + let alternatives = map (\k -> if k == 0 + then ProductSpec [RepetitionSingle EmptyElement] + else ProductSpec $ replicate k (RepetitionSingle normalizedElem)) [0..fromIntegral m] let rule = Rule name Equals $ SumSpec alternatives addRule name rule @@ -131,7 +144,8 @@ normalizeGroup (Group spec) = Group <$> normalizeSumSpec spec -- | State for tracking fresh names during group elimination data GroupEliminationState = GroupEliminationState - { nextFreshGroup :: Natural + { currentGroupRule :: Text.Text + , groupCounters :: Map.Map Text.Text Natural -- Per-rule counters , newGroupRules :: RuleMap ('CFG 'GroupElimination) } @@ -139,9 +153,10 @@ data GroupEliminationState = GroupEliminationState freshGroupName :: State GroupEliminationState Text.Text freshGroupName = do st <- get - let n = nextFreshGroup st - put $ st { nextFreshGroup = n + 1 } - return $ Text.pack ("_grp" ++ show n) + let rule = currentGroupRule st + let n = Map.findWithDefault 0 rule (groupCounters st) + put $ st { groupCounters = Map.insert rule (n + 1) (groupCounters st) } + return $ "_" <> rule <> "_grp" <> Text.pack (show n) -- | Add a new group rule to the state addGroupRule :: Text.Text -> Rule ('CFG 'GroupElimination) -> State GroupEliminationState () @@ -149,15 +164,22 @@ addGroupRule name rule = do st <- get put $ st { newGroupRules = Map.insert name rule (newGroupRules st) } +-- | Find an existing rule with the given spec, if one exists +findExistingGroup :: SumSpec ('CFG 'GroupElimination) -> State GroupEliminationState (Maybe Text.Text) +findExistingGroup spec = do + gets (Map.foldlWithKey' (\acc name (Rule _ _ ruleSpec) -> + if ruleSpec == spec then Just name else acc) Nothing . newGroupRules) + -- | Eliminate all groups by converting them to fresh non-terminals eliminateGroups :: RuleMap ('CFG 'RepetitionElimination) -> RuleMap ('CFG 'GroupElimination) eliminateGroups inputRules = - let initialState = GroupEliminationState { nextFreshGroup = 0, newGroupRules = Map.empty } + let initialState = GroupEliminationState { currentGroupRule = "", groupCounters = Map.empty, newGroupRules = Map.empty } (eliminatedRules, finalState) = runState (traverse eliminateGroupsRule inputRules) initialState in Map.union eliminatedRules (newGroupRules finalState) eliminateGroupsRule :: Rule ('CFG 'RepetitionElimination) -> State GroupEliminationState (Rule ('CFG 'GroupElimination)) eliminateGroupsRule (Rule ident def spec) = do + modify $ \st -> st { currentGroupRule = ident } -- Set context for generated names eliminatedSpec <- eliminateGroupsSumSpec spec return $ Rule ident def eliminatedSpec @@ -182,12 +204,12 @@ eliminateGroupsElement (RuleElement' name) = return $ RuleElement' name eliminateGroupsElement (LiteralElement lit) = return $ LiteralElement lit eliminateGroupsElement EmptyElement = return EmptyElement eliminateGroupsElement (GroupElement (Group spec)) = do - -- Eliminate groups within the group's spec first eliminatedSpec <- eliminateGroupsSumSpec spec - -- Generate a fresh name for this group - name <- freshGroupName - -- Create a new rule: name = eliminatedSpec - let rule = Rule name Equals eliminatedSpec - addGroupRule name rule - -- Replace the group with a reference to the new rule - return $ RuleElement' name + existing <- findExistingGroup eliminatedSpec + case existing of + Just name -> return $ RuleElement' name + Nothing -> do + name <- freshGroupName + let rule = Rule name Equals eliminatedSpec + addGroupRule name rule + return $ RuleElement' name diff --git a/src/Text/ABNF/PrettyPrinter.hs b/src/Text/ABNF/PrettyPrinter.hs index e4c11da..8a78170 100644 --- a/src/Text/ABNF/PrettyPrinter.hs +++ b/src/Text/ABNF/PrettyPrinter.hs @@ -57,7 +57,7 @@ instance Pretty (Element stage) where prettyShow (RuleElement rule) = prettyShow rule prettyShow (GroupElement group) = "(" ++ prettyShow group ++ ")" prettyShow (LiteralElement lit) = prettyShow lit - prettyShow EmptyElement = "" + prettyShow EmptyElement = "ε" instance Pretty (Group stage) where prettyShow (Group sumSpec) = prettyShow sumSpec diff --git a/test/CFGTest.hs b/test/CFGTest.hs deleted file mode 100644 index 006db96..0000000 --- a/test/CFGTest.hs +++ /dev/null @@ -1,114 +0,0 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE OverloadedStrings #-} - -module Main where - -import qualified Data.Map.Strict as Map -import Text.ABNF.ABNF.Types -import Text.ABNF.ABNF.CFG - --- Test: 3*5DIGIT should become 3 required DIGIT + rule for 0-2 more DIGIT -testRange :: Rule ('Parsed 'Dedup) -testRange = Rule "test-rule" Equals $ SumSpec - [ ProductSpec - [ RepetitionRep (Repeat 3 (Just 2)) (LiteralElement (CharLit "X")) - ] - ] - --- Test: 2*DIGIT should become 2 required DIGIT + recursive rule -testAtLeast :: Rule ('Parsed 'Dedup) -testAtLeast = Rule "test-atleast" Equals $ SumSpec - [ ProductSpec - [ RepetitionRep (Repeat 2 Nothing) (LiteralElement (CharLit "Y")) - ] - ] - --- Test: 3DIGIT should become 3 copies -testExact :: Rule ('Parsed 'Dedup) -testExact = Rule "test-exact" Equals $ SumSpec - [ ProductSpec - [ RepetitionRep (Repeat 3 (Just 0)) (LiteralElement (CharLit "Z")) - ] - ] - --- Test: [DIGIT] (optional) should become alternation -testOptional :: Rule ('Parsed 'Dedup) -testOptional = Rule "test-optional" Equals $ SumSpec - [ ProductSpec - [ RepetitionRep (Repeat 0 (Just 1)) (LiteralElement (CharLit "W")) - ] - ] - --- Test: (X / Y) should become a fresh non-terminal -testGroup :: Rule ('Parsed 'Dedup) -testGroup = Rule "test-group" Equals $ SumSpec - [ ProductSpec - [ RepetitionSingle (GroupElement (Group (SumSpec - [ ProductSpec [RepetitionSingle (LiteralElement (CharLit "X"))] - , ProductSpec [RepetitionSingle (LiteralElement (CharLit "Y"))] - ]))) - ] - ] - --- Test: nested groups (A / (B / C)) -testNestedGroup :: Rule ('Parsed 'Dedup) -testNestedGroup = Rule "test-nested" Equals $ SumSpec - [ ProductSpec - [ RepetitionSingle (GroupElement (Group (SumSpec - [ ProductSpec [RepetitionSingle (LiteralElement (CharLit "A"))] - , ProductSpec [RepetitionSingle (GroupElement (Group (SumSpec - [ ProductSpec [RepetitionSingle (LiteralElement (CharLit "B"))] - , ProductSpec [RepetitionSingle (LiteralElement (CharLit "C"))] - ])))] - ]))) - ] - ] - -main :: IO () -main = do - putStrLn "=== Testing Repetition Elimination ===" - let inputRules = Map.fromList - [ ("test-rule", testRange) - , ("test-atleast", testAtLeast) - , ("test-exact", testExact) - , ("test-optional", testOptional) - ] - - let normalized = normalizeRules inputRules - - putStrLn $ "Input rules: " ++ show (Map.size inputRules) - putStrLn $ "Output rules: " ++ show (Map.size normalized) - putStrLn "" - - putStrLn "Normalized rules:" - mapM_ (\(name, rule) -> do - putStrLn $ " " ++ show name - putStrLn $ " " ++ show rule - putStrLn "" - ) (Map.toList normalized) - - putStrLn "✓ All repetitions have been eliminated!" - putStrLn "" - - putStrLn "=== Testing Group Elimination ===" - let groupInputRules = Map.fromList - [ ("test-group", testGroup) - , ("test-nested", testNestedGroup) - ] - - let normalizedGroups = normalizeRules groupInputRules - let groupsEliminated = eliminateGroups normalizedGroups - - putStrLn $ "Input rules: " ++ show (Map.size groupInputRules) - putStrLn $ "After repetition elimination: " ++ show (Map.size normalizedGroups) - putStrLn $ "After group elimination: " ++ show (Map.size groupsEliminated) - putStrLn "" - - putStrLn "Group-eliminated rules:" - mapM_ (\(name, rule) -> do - putStrLn $ " " ++ show name - putStrLn $ " " ++ show rule - putStrLn "" - ) (Map.toList groupsEliminated) - - putStrLn "✓ All groups have been eliminated!"