Skip to content

Commit 836e373

Browse files
committed
Data.PrioTrie: use Text instead of ByteString
Because the user is entering text, not bytes, and because it's faster for unicode text (comparing one unicode character at a time instead of one byte at a time).
1 parent 5f06831 commit 836e373

File tree

3 files changed

+78
-66
lines changed

3 files changed

+78
-66
lines changed

src/server/Data/PrioTrie.hs

+53-38
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
{-# HLINT ignore "Use camelCase" #-}
44
{-# LANGUAGE TupleSections #-}
55
{-# LANGUAGE DeriveGeneric #-}
6+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
67
-- TODO: document
78
module Data.PrioTrie
89
( PrioTrie
@@ -12,21 +13,22 @@ module Data.PrioTrie
1213
)
1314
where
1415

15-
import qualified Data.ByteString as BS
1616
import qualified Data.List.NonEmpty as NE
1717
import qualified Data.IntMap.Strict as IMap
1818
import qualified Data.Map.Strict as Map
19-
import Data.Word (Word8)
2019
import qualified Data.Set as Set
2120
import Data.Maybe (fromJust)
2221
import Data.List (sortOn)
2322
import Data.Ord (Down(Down))
2423
import Control.DeepSeq (NFData)
2524
import GHC.Generics (Generic, Generic1)
25+
import qualified Data.Char
26+
import Data.Bifunctor (first)
27+
import qualified Data.Text as T
2628

2729
data PrioTrie prio a
2830
= PrioTrie_Node
29-
!(IMap.IntMap (PrioTrie prio a))
31+
!(CharMap (PrioTrie prio a))
3032
-- ^ Children
3133
!(NE.NonEmpty (prio, a))
3234
-- ^ List containing /all/ items that match the prefix reached through the 'IntMap'.
@@ -36,32 +38,32 @@ data PrioTrie prio a
3638

3739
instance (NFData prio, NFData a) => NFData (PrioTrie prio a)
3840

39-
-- | Construct a 'PrioTrie' from a list of items and their priorities, deriving each 'BS.ByteString' key from the item
41+
-- | Construct a 'PrioTrie' from a list of items and their priorities, deriving each 'T.Text' key from the item
4042
fromListDeriveKey
4143
:: forall prio a.
4244
Ord prio
4345
=> (NE.NonEmpty (prio, a) -> NE.NonEmpty (prio, a))
4446
-- ^ Modify the sorted list stored at each 'PrioTrie_Node'.
4547
-- Used to e.g. limit the number of items in the list.
46-
-> (a -> BS.ByteString)
47-
-- ^ Derive a 'BS.ByteString' from the item (@a@) stored in the 'PrioTrie'
48+
-> (a -> T.Text)
49+
-- ^ Derive a 'T.Text' from the item (@a@) stored in the 'PrioTrie'
4850
-> NE.NonEmpty (prio, a)
4951
-> PrioTrie prio a
5052
fromListDeriveKey modify deriveKey lst =
5153
fromList modify $ NE.map (\prioAndA -> (deriveKey $ snd prioAndA, prioAndA)) lst
5254

53-
-- | Construct a 'PrioTrie' from a list of items and their priorities with the given 'BS.ByteString' as key.
55+
-- | Construct a 'PrioTrie' from a list of items and their priorities with the given 'T.Text' as key.
5456
fromList
5557
:: forall prio a.
5658
Ord prio
5759
=> (NE.NonEmpty (prio, a) -> NE.NonEmpty (prio, a))
5860
-- ^ Modify the sorted list stored at each 'PrioTrie_Node'.
5961
-- Used to e.g. limit the number of items in the list.
60-
-> NE.NonEmpty (BS.ByteString, (prio, a))
62+
-> NE.NonEmpty (T.Text, (prio, a))
6163
-> PrioTrie prio a
6264
fromList modify lst =
6365
let
64-
initMap :: Map.Map BS.ByteString (NE.NonEmpty (prio, a)) -- unsorted!
66+
initMap :: Map.Map T.Text (NE.NonEmpty (prio, a)) -- unsorted!
6567
-- ByteString -> [(144, Data.Internal.ByteString), (37, Data.Internal.Lazy.ByteString)]
6668
-- String -> [(12344, Data.String.String)]
6769
-- Tree -> [(13, Data.Tree.Tree), (7, Data.Tree.Special.Tree)]
@@ -71,63 +73,76 @@ fromList modify lst =
7173
(map (fmap NE.singleton) (NE.toList lst))
7274

7375
mkUnconsMap
74-
:: Map.Map BS.ByteString (NE.NonEmpty (prio, a))
75-
-> Map.Map (Word8, BS.ByteString) (NE.NonEmpty (prio, a))
76+
:: Map.Map T.Text (NE.NonEmpty (prio, a))
77+
-> Map.Map (Char, T.Text) (NE.NonEmpty (prio, a))
7678
mkUnconsMap map' =
7779
Map.mapKeys fromJust $ -- safe because all 'Nothing' keys have been removed
7880
Map.withoutKeys
79-
(Map.mapKeys BS.uncons map')
81+
(Map.mapKeys T.uncons map')
8082
(Set.singleton Nothing)
8183

8284
mkPrefixToList
83-
:: Map.Map (Word8, BS.ByteString) (NE.NonEmpty (prio, a)) -- output of 'mkUnconsMap'
84-
-> Map.Map Word8 (NE.NonEmpty (BS.ByteString, NE.NonEmpty (prio, a))) -- input to 'fromList' for each Word8-prefix
85+
:: Map.Map (Char, T.Text) (NE.NonEmpty (prio, a)) -- output of 'mkUnconsMap'
86+
-> Map.Map Char (NE.NonEmpty (T.Text, NE.NonEmpty (prio, a))) -- input to 'fromList' for each Char-prefix
8587
mkPrefixToList =
8688
Map.fromListWith (<>) -- TODO: performance
87-
. map (\((w8, bs), ne) -> (w8, NE.singleton (bs, ne)))
89+
. map (\((char, txt), ne) -> (char, NE.singleton (txt, ne)))
8890
. Map.toList
8991

90-
mkIntMap
91-
:: Map.Map Word8 (NE.NonEmpty (BS.ByteString, NE.NonEmpty (prio, a))) -- output of 'mkPrefixToList'
92-
-> IMap.IntMap (PrioTrie prio a) -- first argument to 'PrioTrie_Node'
93-
mkIntMap =
94-
IMap.fromList
92+
mkCharMap
93+
:: Map.Map Char (NE.NonEmpty (T.Text, NE.NonEmpty (prio, a))) -- output of 'mkPrefixToList'
94+
-> CharMap (PrioTrie prio a) -- first argument to 'PrioTrie_Node'
95+
mkCharMap =
96+
fromListCharMap
9597
. Map.toList
9698
. Map.map (fromList modify)
97-
. Map.map (NE.fromList . concatMap (\(bs, ne) -> map (bs,) (NE.toList ne)) . NE.toList)
98-
. Map.mapKeys fromIntegral
99+
. Map.map (NE.fromList . concatMap (\(txt, ne) -> map (txt,) (NE.toList ne)) . NE.toList)
99100

100-
intMap :: IMap.IntMap (PrioTrie prio a)
101-
intMap =
102-
mkIntMap $ mkPrefixToList $ mkUnconsMap initMap
101+
charMap :: CharMap (PrioTrie prio a)
102+
charMap =
103+
mkCharMap $ mkPrefixToList $ mkUnconsMap initMap
103104

104105
-- the non-empty input list sorted by priority (descending)
105106
nonEmpty = modify $
106107
NE.fromList . sortOn (Down . fst) $ map snd (NE.toList lst)
107108

108-
in PrioTrie_Node intMap nonEmpty
109+
in PrioTrie_Node charMap nonEmpty
109110

110111
-- | Convert a 'PrioTrie' to a list of keys and items (along with priority)
111112
-- given the @deriveKey@ function used to construct the 'PrioTrie' using 'fromListDeriveKey'
112113
toListDeriveKey
113-
:: (a -> BS.ByteString)
114-
-- ^ Derive a 'BS.ByteString' from the item (@a@) stored in the 'PrioTrie'
114+
:: (a -> T.Text)
115+
-- ^ Derive a 'T.Text' from the item (@a@) stored in the 'PrioTrie'
115116
-> PrioTrie prio a
116-
-> NE.NonEmpty (BS.ByteString, (prio, a))
117+
-> NE.NonEmpty (T.Text, (prio, a))
117118
toListDeriveKey deriveKey (PrioTrie_Node _ neList) =
118119
NE.map (\prioAndA -> (deriveKey $ snd prioAndA, prioAndA)) neList
119120

120121
prefixLookup
121122
:: PrioTrie prio a
122-
-> BS.ByteString
123+
-> T.Text
123124
-> Maybe (NE.NonEmpty (prio, a))
124-
prefixLookup initTrie initBs =
125-
let go (PrioTrie_Node intMap neList) bs =
126-
case BS.uncons bs of
127-
Nothing -> -- end of 'bs' reached
125+
prefixLookup initTrie prefix =
126+
let go (PrioTrie_Node charMap neList) prefix' =
127+
case T.uncons prefix' of
128+
Nothing -> -- end of prefix reached
128129
Just neList
129-
Just (w8, bsRem) -> --
130-
case IMap.lookup (fromIntegral w8) intMap of
131-
Just trie' -> go trie' bsRem
130+
Just (char, prefixRem) -> --
131+
case lookupCharMap char charMap of
132+
Just trie' -> go trie' prefixRem
132133
Nothing -> Nothing
133-
in go initTrie initBs
134+
in go initTrie prefix
135+
136+
-- ####################
137+
-- ## Helpers ##
138+
-- ####################
139+
140+
-- | A map from a unicode character ('Char') to something
141+
newtype CharMap a = CharMap { unCharMap :: IMap.IntMap a }
142+
deriving (Eq, Show, Functor, Generic, Generic1, NFData)
143+
144+
lookupCharMap :: Char -> CharMap a -> Maybe a
145+
lookupCharMap c = IMap.lookup (Data.Char.ord c) . unCharMap
146+
147+
fromListCharMap :: [(Char, a)] -> CharMap a
148+
fromListCharMap = CharMap . IMap.fromList . map (first Data.Char.ord)

src/server/Server/Pages/Typeahead.hs

+4-6
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@ import qualified Data.Graph.Digraph as DG
2020
import qualified Data.Map.Strict as Map
2121
import Data.Tuple (swap)
2222
import qualified Data.List.NonEmpty as NE
23-
import qualified Data.ByteString as BS
2423
import Control.Monad.Except (throwError)
25-
import qualified Data.Text.Encoding as TE
2624
import qualified Control.Exception as Ex
2725
import qualified Control.DeepSeq
2826
import qualified Control.Monad.ST as ST
@@ -47,7 +45,7 @@ mkHandler mLimit graph = do
4745
mPrioTrie <- mkPrioTrie mLimit graph
4846
prioTrie <- maybe (fail "empty input graph in Typeahead handler") pure mPrioTrie
4947
let initialSuggestions = suggestions prioTrie ""
50-
lookupFunction prefix = fmap snd <$> Data.PrioTrie.prefixLookup prioTrie (TE.encodeUtf8 prefix)
48+
lookupFunction prefix = fmap snd <$> Data.PrioTrie.prefixLookup prioTrie prefix
5149
pure (handler prioTrie, lookupFunction, initialSuggestions)
5250

5351
-- | For each vertex (type), count the number of different packages that export a function which operates on this vertex (type).
@@ -91,8 +89,8 @@ mkPrioTrie mLimit graph = do
9189
limit = maybe id (\l -> NE.fromList . NE.take (fromIntegral l)) mLimit
9290

9391
-- | Match what the user enters with this string
94-
deriveKey :: FunGraph.FullyQualifiedType -> BS.ByteString
95-
deriveKey = TE.encodeUtf8 . FunGraph.renderFullyQualifiedTypeUnqualified
92+
deriveKey :: FunGraph.FullyQualifiedType -> T.Text
93+
deriveKey = FunGraph.renderFullyQualifiedTypeUnqualified
9694

9795
handler
9896
:: Data.PrioTrie.PrioTrie Word FunGraph.FullyQualifiedType
@@ -113,7 +111,7 @@ suggestions prioTrie prefix = do
113111
forM_ mSuggestions $ \suggestionsLst ->
114112
forM_ suggestionsLst $ \(_, fqt) -> suggestionOption_ [] fqt
115113
where
116-
mSuggestions = Data.PrioTrie.prefixLookup prioTrie (TE.encodeUtf8 prefix)
114+
mSuggestions = Data.PrioTrie.prefixLookup prioTrie prefix
117115

118116
suggestionOption_ :: [Attribute] -> FunGraph.FullyQualifiedType -> Html ()
119117
suggestionOption_ extraAttrs fqt =

test/prop/Spec/PrioTrie.hs

+21-22
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ module Spec.PrioTrie (spec, setup) where
44
import qualified Control.DeepSeq
55
import qualified Control.Exception as Ex
66
import qualified Control.Monad.ST as ST
7-
import qualified Data.ByteString as BS
8-
import qualified Data.ByteString.Char8
7+
import qualified Data.Text as T
98
import qualified Data.List.NonEmpty as NE
109
import qualified Data.PrioTrie
1110
import qualified Data.Set as Set
@@ -36,7 +35,7 @@ spec
3635
spec productionPrioTrie =
3736
Tasty.testGroup "Data.PrioTrie"
3837
[ Tasty.testGroup "Positive"
39-
[ let genPrefix = const $ Data.ByteString.Char8.pack <$> arbitrary
38+
[ let genPrefix = const $ T.pack <$> arbitrary
4039
in Tasty.testGroup "Arbitrary prefix" $
4140
map (setMaxRatio 100)
4241
[ Tasty.testGroup "Arbitrary PrioTrie" $
@@ -52,7 +51,7 @@ spec productionPrioTrie =
5251
]
5352
, let genPrefix deriveKey prioTrie =
5453
let prioItems = snd <$> Data.PrioTrie.toListDeriveKey deriveKey prioTrie
55-
mkArbitraryPrefix bs = (`BS.take` bs) <$> Test.QuickCheck.choose (0, BS.length bs)
54+
mkArbitraryPrefix txt = (`T.take` txt) <$> Test.QuickCheck.choose (0, T.length txt)
5655
in Test.QuickCheck.oneof $ map (mkArbitraryPrefix . deriveKey . snd) $ NE.toList prioItems
5756
in Tasty.testGroup "Chosen prefix"
5857
[ Tasty.testGroup "Arbitrary PrioTrie" $
@@ -86,7 +85,7 @@ spec productionPrioTrie =
8685
let prioItems = NE.fromList $ Test.QuickCheck.getNonEmpty (fmap Test.QuickCheck.getLarge <$> prioAndAList)
8786
pure $ Data.PrioTrie.fromListDeriveKey id arbitraryPrioItemsDeriveKey prioItems
8887

89-
arbitraryPrioItemsDeriveKey = Data.ByteString.Char8.pack . show
88+
arbitraryPrioItemsDeriveKey = T.pack . show
9089

9190
productionPrioItemsDeriveKey = Server.Pages.Typeahead.deriveKey
9291

@@ -104,9 +103,9 @@ apply =
104103

105104
allProperties
106105
:: (Show triePrio, Show trieItem, Eq triePrio, Eq trieItem, Ord triePrio, Ord trieItem)
107-
=> (trieItem -> Data.ByteString.Char8.ByteString) -- ^ "Derive key"-function
106+
=> (trieItem -> T.Text) -- ^ "Derive key"-function
108107
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
109-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
108+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
110109
-> [Tasty.TestTree]
111110
allProperties deriveKey ePrioTrie mkPrefixGen =
112111
[ deriveKeyAppliedToResultHasTheGivenPrefix deriveKey ePrioTrie mkPrefixGen
@@ -119,10 +118,10 @@ genericProperty
119118
:: (Show triePrio, Show trieItem, TQC.Testable prop)
120119
=> String -- ^ Property name
121120
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
122-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
121+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
123122
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> a) -- ^ Pre-computed value available to the assertion function
124123
-> ( a
125-
-> BS.ByteString -- Generated prefix
124+
-> T.Text -- Generated prefix
126125
-> Maybe (NE.NonEmpty (triePrio, trieItem)) -- Return value of 'Data.PrioTrie.prefixLookup'
127126
-> prop
128127
) -- ^ Assertion function
@@ -135,9 +134,9 @@ genericProperty name ePrioTrie mkPrefixGen assertValue assert =
135134
assert (assertValue prioTrie) prefix $ Data.PrioTrie.prefixLookup prioTrie prefix
136135

137136
deriveKeyAppliedToResultHasTheGivenPrefix
138-
:: (Show triePrio, Show trieItem) => (trieItem -> Data.ByteString.Char8.ByteString) -- ^ "Derive key"-function
137+
:: (Show triePrio, Show trieItem) => (trieItem -> T.Text) -- ^ "Derive key"-function
139138
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
140-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
139+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
141140
-> Tasty.TestTree
142141
deriveKeyAppliedToResultHasTheGivenPrefix deriveKey ePrioTrie mkPrefixGen =
143142
genericProperty
@@ -151,15 +150,15 @@ deriveKeyAppliedToResultHasTheGivenPrefix deriveKey ePrioTrie mkPrefixGen =
151150
let results = fromMaybe TQC.discard mResults in
152151
all
153152
(\(_, item) ->
154-
prefix `BS.isPrefixOf` deriveKey item
153+
prefix `T.isPrefixOf` deriveKey item
155154
)
156155
results
157156

158157
resultAndPriorityActuallyExistsInPrioTrie
159158
:: (Show triePrio, Show trieItem, Eq triePrio, Eq trieItem, Ord trieItem, Ord triePrio)
160-
=> (trieItem -> Data.ByteString.Char8.ByteString) -- ^ "Derive key"-function
159+
=> (trieItem -> T.Text) -- ^ "Derive key"-function
161160
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
162-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
161+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
163162
-> Tasty.TestTree
164163
resultAndPriorityActuallyExistsInPrioTrie deriveKey ePrioTrie mkPrefixGen =
165164
genericProperty
@@ -179,9 +178,9 @@ resultAndPriorityActuallyExistsInPrioTrie deriveKey ePrioTrie mkPrefixGen =
179178

180179
noValidResultsLeftOut
181180
:: (Show triePrio, Show trieItem, Eq triePrio, Eq trieItem, Ord triePrio, Ord trieItem)
182-
=> (trieItem -> Data.ByteString.Char8.ByteString) -- ^ "Derive key"-function
181+
=> (trieItem -> T.Text) -- ^ "Derive key"-function
183182
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
184-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
183+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
185184
-> Tasty.TestTree
186185
noValidResultsLeftOut deriveKey ePrioTrie mkPrefixGen =
187186
genericProperty
@@ -200,14 +199,14 @@ noValidResultsLeftOut deriveKey ePrioTrie mkPrefixGen =
200199
resultSet = Set.fromList $ NE.toList results
201200
validResultSet =
202201
Set.map (\(triePrio, (trieItem, _)) -> (triePrio, trieItem))
203-
$ Set.filter (\(_, (_, key)) -> prefix `BS.isPrefixOf` key) prioItemsWithKeySet
202+
$ Set.filter (\(_, (_, key)) -> prefix `T.isPrefixOf` key) prioItemsWithKeySet
204203
in resultSet == validResultSet
205204

206205
resultPrioritiesAreDecreasing
207206
:: (Show triePrio, Show trieItem, Eq triePrio, Eq trieItem, Ord triePrio)
208-
=> (trieItem -> Data.ByteString.Char8.ByteString) -- ^ "Derive key"-function
207+
=> (trieItem -> T.Text) -- ^ "Derive key"-function
209208
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
210-
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen BS.ByteString) -- ^ Prefix generator
209+
-> (Data.PrioTrie.PrioTrie triePrio trieItem -> TQC.Gen T.Text) -- ^ Prefix generator
211210
-> Tasty.TestTree
212211
resultPrioritiesAreDecreasing _ ePrioTrie mkPrefixGen =
213212
genericProperty
@@ -248,15 +247,15 @@ resultPrioritiesAreDecreasing _ ePrioTrie mkPrefixGen =
248247

249248
prefixLookupReturnsNothingForNonPrefix
250249
:: (Show triePrio, Show trieItem)
251-
=> (trieItem -> BS.ByteString)
250+
=> (trieItem -> T.Text)
252251
-> TestArgument (Data.PrioTrie.PrioTrie triePrio trieItem)
253252
-> Tasty.TestTree
254253
prefixLookupReturnsNothingForNonPrefix deriveKey ePrioTrie =
255254
let mkPrefixGen prioTrie =
256255
let prioItems = snd <$> Data.PrioTrie.toListDeriveKey deriveKey prioTrie
257256
keys = NE.map (deriveKey . snd) prioItems
258-
in Test.QuickCheck.suchThat (Data.ByteString.Char8.pack <$> arbitrary) $ \bs ->
259-
not $ any (BS.isPrefixOf bs) keys
257+
in Test.QuickCheck.suchThat (T.pack <$> arbitrary) $ \txt ->
258+
not $ any (T.isPrefixOf txt) keys
260259
in
261260
genericProperty
262261
"'prefixLookup' returns 'Nothing' for non-prefix"

0 commit comments

Comments
 (0)