diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d7060e2..51801a4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,4 +11,14 @@ It works like this: * double-check the `CHANGELOG.md`... -1. check the github action for release and then check the subsequent release cut and the packages attached \ No newline at end of file +1. check the github action for release and then check the subsequent release cut and the packages attached + +## Tests + +Use doctests for simple unit tests (you can even use doctests for property tests), but use +QuickCheck tests in `test/` to test a module's public functions/exports. + +The purpose of the actual tests in `tests/` is not to test the minutia of how a goal is +accomplished (all the private functions and such behind a public interface/function), but +instead to test that the public interface actually accomplishes what is desirable. For a +good example of this, see the test(s) for the search functionality. \ No newline at end of file diff --git a/bore.cabal b/bore.cabal index faaaa40..560b12b 100644 --- a/bore.cabal +++ b/bore.cabal @@ -5,7 +5,7 @@ cabal-version: 2.2 -- see: https://github.com/sol/hpack name: bore -version: 0.17.0.0 +version: 0.18.0.0 synopsis: Build gopherholes. description: Static site builder, but for gopherholes. Manage phlogs with tags, use the Markdown renderer and Mustache templating system. category: Network @@ -155,3 +155,60 @@ executable bore , xml-conduit-writer , yaml default-language: GHC2021 + +test-suite bore-test + type: exitcode-stdio-1.0 + main-is: Spec.hs + other-modules: + Paths_bore + autogen-modules: + Paths_bore + hs-source-dirs: + test + ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall -Werror -Wall -threaded -rtsopts -with-rtsopts=-N + build-depends: + QuickCheck + , aeson + , attoparsec + , base + , bore + , bytestring + , commonmark + , containers + , data-default + , directory + , doctest + , edit-distance + , fast-logger + , filepath + , filepath-bytestring + , filepattern + , frontmatter + , fsnotify + , fuzzy-dates + , hashable + , hashmap + , hourglass + , mtl + , mustache + , neat-interpolation + , network-uri + , optparse-applicative + , parsec + , raw-strings-qq + , socket + , spacecookie + , split + , systemd + , temporary + , text + , time + , tomland + , unix + , unordered-containers + , vector + , word-wrap + , xml-conduit + , xml-conduit-writer + , yaml + default-language: GHC2021 diff --git a/package.yaml b/package.yaml index cd15197..49be362 100644 --- a/package.yaml +++ b/package.yaml @@ -92,3 +92,16 @@ executables: - DeriveAnyClass - OverloadedRecordDot default-language: GHC2021 + +tests: + bore-test: + main: Spec.hs + source-dirs: test + ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N + build-depends: + - bore + - QuickCheck + - text + - temporary + - doctest + default-language: GHC2021 \ No newline at end of file diff --git a/src/Bore/FileLayout.hs b/src/Bore/FileLayout.hs index 8dd7520..82a80da 100644 --- a/src/Bore/FileLayout.hs +++ b/src/Bore/FileLayout.hs @@ -7,15 +7,23 @@ Also includes tools for dealing with file paths and dealing with the file system rudimentary file operations that are only specific to project layout, in other words, do things according to project layout rules, and nothing more specific. +Also handles some things to do with Gopher selectors. + -} module Bore.FileLayout where import Control.Monad (filterM, when, forM_) import System.Directory (listDirectory, doesDirectoryExist, copyFile, createDirectoryIfMissing, canonicalizePath, removeDirectoryRecursive, removeFile) -import System.FilePath ((), makeRelative, takeFileName, takeDirectory) +import System.FilePath ((), makeRelative, takeFileName, takeDirectory, isValid, isRelative, dropExtension) import qualified Data.Text as Text import qualified Data.Text.IO as TextIO +import Data.List (sortOn) +import Data.Ord (Down(..)) +import Bore.Text.Clean (cleanText) +import Data.Text.Encoding (encodeUtf8) +import qualified Data.ByteString as B +import qualified Data.Text as T {- | Directory which "gets left alone" during the build process, i.e., it won't get cleared out of output. @@ -244,4 +252,70 @@ resetOutputDirectory outputDir = do isDir <- doesDirectoryExist itemPath if isDir then removeDirectoryRecursive itemPath - else removeFile itemPath \ No newline at end of file + else removeFile itemPath + +-- | Check if what's supposed to be a filename is safe and sane. +-- +-- Example: +-- >>> isSafeFilename "file.txt" +-- True +-- >>> isSafeFilename "../file.txt" +-- False +-- >>> isSafeFilename "file.txt/" +-- False +-- >>> isSafeFilename "file.txt/.." +-- False +-- >>> isSafeFilename "." +-- False +-- >>> isSafeFilename "/dir/file.txt" +-- False +-- >>> isSafeFilename "file" +-- True +isSafeFilename :: FilePath -> Bool +isSafeFilename name = + isValid name -- Checks if it contains only valid characters + && not (null name) -- Ensures it’s not empty + && not (null $ takeFileName name) -- Ensures it’s not just a directory + && takeFileName name == name -- ensure it doesn't have a directory traversal + && isRelative name -- Ensures it's a relative path + && name /= "." && name /= ".." -- Disallows "." and ".." + && filenameByteLength name <= 255 -- Ensures it's not too long + +-- | Get the words/tokens from a selector, and normalize them. +-- +-- Example: +-- >>> selectorWords "hello_world-foo bar" +-- ["hello","world","foo","bar"] +selectorWords :: FilePath -> [Text.Text] +selectorWords = pathWords . Text.toLower . cleanText . Text.pack + +-- | Get the byte length of a filename in UTF-8 encoding +filenameByteLength :: String -> Int +filenameByteLength filename = B.length . encodeUtf8 $ T.pack filename + +-- | Get the words/tokens from a path. +-- +-- Also enforces a maximum length. +-- +-- Differs from `selectorWords` in that it's not focused on cleaning the text of any +-- ineappropriate characters. +-- +-- Example: +-- >>> pathWords "hello_world-foo bar" +-- ["hello","world","foo","bar"] +-- >>> pathWords "hello/word/foo.gopher.txt" +-- ["hello","word","foo"] +pathWords :: Text.Text -> [Text.Text] +pathWords = Text.split (`elem` splitChars) . dropExtensionSpecial + where + splitChars = [' ', '_', '-', '.', '/'] + sortedExtensions = sortOn (Down . Text.length) (map Text.pack onlyParse) + -- Drop the onlyParse extensions if it exists, otherwise just use dropExtension from FilePath + dropExtensionSpecial :: Text.Text -> Text.Text + dropExtensionSpecial fp = + let + fpNoExt = dropExtension (Text.unpack fp) -- Remove last extension if no specific match + in + case filter (`Text.isSuffixOf` fp) sortedExtensions of + (ext:_) -> Text.dropEnd (Text.length ext) fp + [] -> Text.pack fpNoExt \ No newline at end of file diff --git a/src/Bore/SpacecookieClone/Search.hs b/src/Bore/SpacecookieClone/Search.hs index cfb01db..11ef150 100644 --- a/src/Bore/SpacecookieClone/Search.hs +++ b/src/Bore/SpacecookieClone/Search.hs @@ -1,5 +1,6 @@ -- TODO: Am I not stripping punctuation from keywords and tokens? That's a big deal. Also -- need to ensure a standard function for doing so like `searchableText`. +-- TODO: improvements: exclude words like and, is, etc. {- | Text file search and rank functionality. Designed for use with the Bore SpacecookieClone, for the Gopher Protocol, in order to @@ -34,6 +35,16 @@ import Text.EditDistance (defaultEditCosts, levenshteinDistance) -- | The relevancy score of a document as it pertains to some set of keywords. type RankScore = Float +-- | The minimum length of a keyword to be considered for fuzzy matching. This is very +-- English-centric. For example, this would be foolish for Chinese, although it could use +-- pinyin, romanization of various logographic languages or whatever. +minFuzzyKeywordLength :: Int +minFuzzyKeywordLength = 4 + +-- | The minimum likeness percentage for a fuzzy match to be considered. +minimumFuzzyLikeness :: Float +minimumFuzzyLikeness = 75 + -- | Define the size of the context window (number of words before and after) contextWindowSize :: Int contextWindowSize = 5 @@ -42,9 +53,9 @@ contextWindowSize = 5 scoreThreshold :: Float scoreThreshold = 200 --- | Weight constant for proximity ordered bonus. -weightProximityOrdered :: Float -weightProximityOrdered = 100 +-- | Weight constant for ordered proximity bonus +weightOrderedProximity :: Float +weightOrderedProximity = 1000 -- | Weight constant for fuzzy match score. -- @@ -58,7 +69,7 @@ weightFrequency = 2.0 -- | Weight for each exact match. weightExactMatch :: Float -weightExactMatch = 2.0 +weightExactMatch = 10.0 -- | Weight for keywords exactly appearing in selector (path). weightSelectorExact :: Float @@ -66,7 +77,7 @@ weightSelectorExact = 100.0 -- | Weight for keywords fuzzy appearing in selector (path). weightSelectorFuzzy :: Float -weightSelectorFuzzy = 1.0 +weightSelectorFuzzy = weightFuzzyMatch -- | Data structure to hold context snippet along with its start and end indices data ContextSnippet = ContextSnippet @@ -85,18 +96,20 @@ rankDocument -- ^ Keywords to search for. -> Text -- ^ Document to rank/score. - -> (RankScore, [ContextSnippet]) - -- ^ The score along with the "contexts" where matches occur. + -> (String, RankScore, [ContextSnippet]) + -- ^ The highlighted selector, the score, and the "contexts" where matches occur (in the body). rankDocument selector keywords content = let contentWords = T.words . toLower $ cleanText content - (totalScore, keywordMatches) = computeTotalScore selector keywords contentWords + (highlightedSelector, totalScore, keywordMatches) = computeTotalScore selector keywords contentWords contexts = extractKeywordContexts keywordMatches contentWords - in (totalScore, contexts) + in (highlightedSelector, totalScore, contexts) -- | Find all keyword *exact* matches in the document and their positions -- | Find keyword matches (both exact and fuzzy) in the document and their positions. -- -- Finds all exact matches, but if there are none, try to find the best fuzzy match. +-- +-- Will skip fuzzy matches if the keyword is too short. findKeywordMatches :: [Text] -> [Text] @@ -107,7 +120,14 @@ findKeywordMatches keywords contentWords = concatMap findMatchesForKeyword keywo findMatchesForKeyword :: Text -> [(Text, Int, Float)] findMatchesForKeyword keyword = case [(keyword, idx, 100) | idx <- findIndices (== keyword) contentWords] of - [] -> [let (likeness, idx) = bestFuzzyMatch keyword contentWords in (keyword, idx, likeness)] + [] -> + if T.length keyword >= minFuzzyKeywordLength + then + filter (\(_, _, likeness) -> likeness >= minimumFuzzyLikeness) + [ let (likeness, idx) = bestFuzzyMatch keyword contentWords in (keyword, idx, likeness) + ] + else + [] exactMatches -> exactMatches @@ -125,31 +145,65 @@ findKeywordMatches keywords contentWords = concatMap findMatchesForKeyword keywo -- >>> keywords = ["I", "like", "tags"] -- >>> fst $ computeTotalScore keywords tokens -- 300.0 -computeTotalScore :: FilePath -> [Text] -> [Text] -> (RankScore, [(Text, Int)]) +computeTotalScore + :: FilePath + -> [Text] + -> [Text] + -> (String, RankScore, [(Text, Int)]) + -- ^ The new selector with matches highlighted, the final score, and the keyword matches (with their indexes). computeTotalScore selector keywords contentWords = let (keywordMatches, exactMatchScore) = computeMatchScore keywords contentWords + (selectorHighlighted, selectorScore) = computeSelectorScore keywords selector finalScore = sum - [ computeSelectorScore keywords selector - , keywordProximity keywordMatches + [ selectorScore + , keywordOrderedProximity keywordMatches , computeFrequencyScore keywords contentWords --, computeFuzzyMatchScore keywords contentWords , exactMatchScore ] in - (finalScore, keywordMatches) + (selectorHighlighted, finalScore, keywordMatches) -- | The score for keywords (fuzzy) appearing in the selector (path). -- -- Currently has no weighted difference between exact and fuzzy matches. I think this is -- fine since only one fuzzy match (the best) ever gets considered. -computeSelectorScore :: [Text] -> FilePath -> RankScore +-- +-- Example: +-- >>> computeSelectorScore ["hello", "world"] "test/this/hello_world-foo bar.foo" +-- 20000.0 +--- >>> computeSelectorScore ["gopher"] "test/this/hello_world-foo bar.gopher.txt" +-- 16.666672 +--- >>> computeSelectorScore ["gopher"] "test/this/hello_gopher-foo bar.gopher.txt" +-- 10000.0 +-- >>> computeSelectorScore ["world"] "hello/world/foo.txt" +-- 10000.0 +computeSelectorScore + :: [Text] + -> FilePath + -> (String, RankScore) + -- ^ The new selector with matches highlighted and the score. computeSelectorScore keywords selector = let - selectorWords = splitWords . toLower $ cleanText (T.pack selector) - likenessSum = sum [if likeness == 100 then likeness * weightSelectorExact else likeness * weightSelectorFuzzy | (_, _, likeness) <- findKeywordMatches keywords selectorWords] + selectorWords' = selectorWords selector + wordLikeness = + [ (word, indx, if likeness == 100 then likeness * weightSelectorExact else likeness * weightSelectorFuzzy) + | (word, indx, likeness) <- findKeywordMatches keywords selectorWords' + ] + likenessSum = sum $ map (\(_, _, likeness) -> likeness) wordLikeness + -- Highlight matches directly within `selector` using start positions + highlightedSelector = + foldr + (\(word, indx, _) acc -> + let (prefix, rest) = splitAt (T.length (T.unwords $ take indx selectorWords')) acc + (toHighlight, suffix) = splitAt (T.length word) rest + in prefix <> "[" <> toHighlight <> "]" <> suffix + ) + selector + wordLikeness in - likenessSum + (highlightedSelector, likenessSum) -- Change this to do the fuzzy match and just do it off the batt and give bonus for 100% match. computeMatchScore :: [Text] -> [Text] -> ([(Text, Int)], RankScore) @@ -172,22 +226,27 @@ computeFrequencyScore keywords contentWords = -- | Calculate proximity score for different keywords based on their positions. The closer -- the keywords are to each other, the higher the score. A significant bonus is applied for -- keywords appearing in the same order with small gaps. -keywordProximity :: [(Text, Int)] -> RankScore -keywordProximity keywordMatches = +-- +-- Assumes @@keywordMatches@@ is supplied in the order keywords were given. +-- +-- Example: +-- >>> keywordOrderedProximity [("hello", 0), ("world", 2), ("foo", 5)] +-- 28.57143 +keywordOrderedProximity :: [(Text, Int)] -> RankScore +keywordOrderedProximity keywordMatches = let positions = map snd keywordMatches -- Calculate proximity between different keywords keywordPairs = zip positions (tail positions) -- Consecutive positions for order bonus + -- Will naturally be the distance between the keywords in the order they were given. distances = map (\(i, j) -> abs (i - j)) keywordPairs - -- Apply bonus if keywords are in the same order and close proximity note this also - -- gives the same bonus for cases like searching for "hello world" and "hello hello - -- hello world" - orderBonus = - if length keywordPairs > 1 - then (if all (uncurry (<)) keywordPairs then 1 else 0) * weightProximityOrdered - else 0 - in if null distances - then 0 - else max 0 (50 - fromIntegral (minimum distances)) + orderBonus -- Closer proximity scores higher, add order bonus + -- Calculate the average distance if there are distances available. + avgDistanceBonus = + if length keywordMatches <= 1 + then 0 + else + let average = fromIntegral (sum distances) / fromIntegral (length distances) + in (1 / (average + 1)) + in avgDistanceBonus * weightOrderedProximity -- | Count the number of times a keyword appears in the content keywordFrequency :: [Text] -> Text -> Int @@ -268,7 +327,12 @@ addContext acc cs = else acc ++ [cs] -- | Function to process multiple documents -searchDocuments :: [Text] -> AbsolutePath -> AbsolutePath -> IO [(FilePath, Bool, RankScore, [Text])] +searchDocuments + :: [Text] + -> AbsolutePath + -> AbsolutePath + -> IO [(FilePath, String, Bool, RankScore, [Text])] + -- ^ The file path, the highlighted selector, whether it's a menu, the score, and the context snippets. searchDocuments keywords sourceDirectoryAbsolutePath absoluteOutputPath = do docPaths <- getTxtFiles absoluteOutputPath docs <- @@ -283,10 +347,10 @@ searchDocuments keywords sourceDirectoryAbsolutePath absoluteOutputPath = do map ( \(fp, isMenu, content) -> let relativePath = makeRelative absoluteOutputPath fp - (score, contexts) = rankDocument relativePath keywords content + (highlightedSelector, score, contexts) = rankDocument relativePath keywords content nonOverlappingContexts = combineContexts contexts contextTexts = map csText nonOverlappingContexts - in (fp, isMenu, score, contextTexts) + in (fp, highlightedSelector, isMenu, score, contextTexts) ) docs @@ -349,14 +413,19 @@ removeGophermapSyntax = getSearchResults :: Text -> AbsolutePath -> AbsolutePath -> IO GopherResponse getSearchResults query sourceDirectoryAbsolutePath absoluteOutputPath = do documentResults <- searchDocuments (T.words query) sourceDirectoryAbsolutePath absoluteOutputPath - let prunedResults = filter (\(_, _, s, _) -> s >= scoreThreshold) documentResults - pure $ searchResponse absoluteOutputPath query $ L.sortOn (\(_, _, s, _) -> negate s) prunedResults + let prunedResults = filter (\(_, _, _, s, _) -> s >= scoreThreshold) documentResults + pure $ searchResponse absoluteOutputPath query $ L.sortOn (\(_, _, _, s, _) -> negate s) prunedResults makeInfoLine :: B.ByteString -> GopherMenuItem makeInfoLine t = Item InfoLine t "" Nothing Nothing -- Function to generate a GopherResponse for search results -searchResponse :: AbsolutePath -> Text -> [(FilePath, Bool, RankScore, [Text])] -> GopherResponse +searchResponse + :: AbsolutePath + -> Text + -> [(FilePath, String, Bool, RankScore, [Text])] + -- ^ The selector (?) or filepath (?), the highlighted selector, whether it's a menu, the score, and the context snippets. + -> GopherResponse searchResponse absoluteOutputPath query files = let actualResults = concatMap (makeSearchResult absoluteOutputPath) files preamble = @@ -369,8 +438,12 @@ searchResponse absoluteOutputPath query files = in MenuResponse $ preamble : actualResults -- Build a search result for a file -makeSearchResult :: AbsolutePath -> (FilePath, Bool, RankScore, [Text]) -> [GopherMenuItem] -makeSearchResult absoluteOutputPath (fp, isMenu, score, contexts) = +makeSearchResult + :: AbsolutePath + -> (FilePath, String, Bool, RankScore, [Text]) + -- ^ The selector (?) or filepath (?), the highlighted selector, whether it's a menu, the score, and the context snippets. + -> [GopherMenuItem] +makeSearchResult absoluteOutputPath (fp, highlightedSelector, isMenu, score, contexts) = makeLink selector : scoreLine : [makeSummary (T.intercalate " ... " contexts)] where selector = "/" makeRelative absoluteOutputPath fp @@ -379,7 +452,7 @@ makeSearchResult absoluteOutputPath (fp, isMenu, score, contexts) = makeLink selector' = Item (if isMenu then Directory else File) - (C8.pack selector') + (C8.pack highlightedSelector) (C8.pack selector') Nothing Nothing diff --git a/src/Bore/Text/Clean.hs b/src/Bore/Text/Clean.hs index 3b676cb..c8d389c 100644 --- a/src/Bore/Text/Clean.hs +++ b/src/Bore/Text/Clean.hs @@ -8,14 +8,6 @@ import Data.Text (Text) import qualified Data.Text as T import Data.Char (isPunctuation, isAlphaNum, isSpace, generalCategory, GeneralCategory(..)) --- | Like words, except works not only on spaces, but also on underscores and hyphens. --- --- Example: --- >>> splitWords "hello_world-foo bar" --- ["hello","world","foo","bar"] -splitWords :: Text -> [Text] -splitWords = T.split (\c -> c == ' ' || c == '_' || c == '-') - -- | Remove repeating punctuation and keep only desired characters (letters, numbers, punctuation, and spaces). cleanText :: Text -> Text cleanText = removeRepeatingPunctuation . removeNonStandardChars @@ -44,10 +36,3 @@ collapsePunctuation = map collapseGroup collapseGroup grp | isPunctuation (T.head grp) = T.take 1 grp -- If group is all punctuation, keep just one | otherwise = grp -- Otherwise keep the group as is - --- Example usage: -main :: IO () -main = do - let inputText = "Hello!!! How are you?? 这是一个测试! @#$%^&*()" - let cleanedText = cleanText inputText - putStrLn $ T.unpack cleanedText -- Expected output: "Hello! How are you? 这是一个测试!" diff --git a/test/Spec.hs b/test/Spec.hs index e69de29..e7ded30 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -0,0 +1,132 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +-- TODO: generate filenames in same way like using text generation too. hav ea filepath newtype. just three.... exclude keywords. +{- | Test suite for the search module. + +The approach to testing the search functionality is to simplly test the ranking of the +search results, ensuring it ranks documents the way I'd desire and expect. + +-} + +module Main (main) where + +import Bore.FileLayout +import Bore.SpacecookieClone.Search + +import Control.Monad +import Data.Text (Text) +import Data.Text qualified as T +import Data.Text.Encoding qualified as TE +import Data.Text.IO qualified as TIO +import Network.Gopher +import System.FilePath +import System.IO.Temp (withSystemTempDirectory) +--import Test.DocTest (doctest) +import Test.QuickCheck +import qualified Data.Set as Set + +defaultFileNames :: [FilePath] +defaultFileNames = ["all_keywords.txt", "sprinkled_keywords.txt", "no_keywords.txt"] + +noKeywordsMessage :: Text +noKeywordsMessage = "This document does not contain any of the keywords we are searching for." + +-- TODO: +-- needs to exclude filenames too! or intentionally match filename keywords and adjust for +-- such in the property! that would be a good test! could easily do this by using keyword +-- in filename and otherwise adding the filenames to nokeywords! +forbiddenKeywords :: [Text] +forbiddenKeywords = (T.words "This document does not contain any of the keywords we are searching for.") ++ (concatMap selectorWords defaultFileNames) + +-- FIXME: put keywords into file name too but nothing in document, but still make that first. +-- Property to test if getSearchResults ranks documents as expected +prop_getSearchResultsRanking :: Keywords -> Property +prop_getSearchResultsRanking (Keywords keywords) = ioProperty $ do + withSystemTempDirectory "test-search" $ \dir -> do + -- Define file names and contents + -- FIXME/TODO just add one keyword match. also how about a keyword in the file name? + -- Fixme: ensure keyword safety with `isSafeFilename` somehow + let + fileNamedAfterKeywords = T.unpack $ T.intercalate "_" keywords <> ".txt" + (fileNames :: [FilePath]) = [fileNamedAfterKeywords] ++ defaultFileNames + let expectedResults = init fileNames -- in that order, too + let contiguousKeywords = T.unwords keywords + -- what about SPARSE keywords coming after this? + let sprinkledOrderedKeywords = "This is some text with " <> T.intercalate " and " keywords <> " scattered." + -- could even do one that's out-of-order! but above should test proximity + + -- Write files with specified contents + let fileContents = [noKeywordsMessage, contiguousKeywords, sprinkledOrderedKeywords, noKeywordsMessage] + forM_ (zip fileNames fileContents) $ \(fileName, content) -> do + TIO.writeFile (dir fileName) content + + -- Call getSearchResults + response <- getSearchResults (T.unwords keywords) dir dir + + -- Extract file paths in the returned results (ordered by rank) + let rankedFiles = case response of + MenuResponse items -> [takeFileName (T.unpack (TE.decodeUtf8 selector)) | Item File _ selector _ _ <- items] + _ -> [] + let items = case response of + MenuResponse i -> i + _ -> [] + putStrLn . show $ (zip fileNames fileContents) + _ <- forM (items) $ \f -> putStrLn . show $ f + return $ rankedFiles === expectedResults + +-- FIXME: use isValid to ensure filenames are safe, also should check length! +-- Generate random text data +instance Arbitrary Text where + arbitrary = sized $ \n -> do + -- Define the character generator (lowercase letters) + let charGen = elements ['a'..'z'] + let forbiddenSet = Set.fromList forbiddenKeywords + -- Create a generator for Text with the desired properties + suchThat + (do + -- Generate a length between minLen and minLen + 10 + len <- choose (2, 2 + n) + -- Generate a list of 'len' lowercase letters + chars <- vectorOf len charGen + -- Convert the list of chars into Text + return $ T.pack chars + ) + -- Predicate to ensure the generated Text is not in forbiddenWords + (\txt -> txt `Set.notMember` forbiddenSet) + +newtype Keywords = Keywords [Text] + deriving (Show) + +instance Arbitrary Keywords where + arbitrary = sized $ \n -> do + numKeywords <- chooseInt (2, min 10 (2 + n)) -- Generate between 2 and 10 keywords + keywords <- vectorOf numKeywords arbitrary + let combinedKeywords = T.unpack $ T.intercalate "_" keywords + if isSafeFilename combinedKeywords + then return $ Keywords keywords + else arbitrary -- Retry if the generated keywords don't form a valid filename + +-- I know it looks weird, but I believe this is what has to happen. +-- See: https://hackage.haskell.org/package/QuickCheck-2.15.0.1/docs/Test-QuickCheck.html#v:quickCheckAll +return [] + +runTests :: IO Bool +runTests = $quickCheckAll + +-- Run doctests independently +--runDoctests :: IO () +--runDoctests = doctest ["src"] + +-- Main function that runs both QuickCheck and Doctests +main :: IO () +main = do + --putStrLn "Running doctests..." + --runDoctests + + putStrLn "Running QuickCheck properties..." + allTestsPassed <- runTests + if allTestsPassed + then putStrLn "All tests passed." + else putStrLn "Some tests failed." \ No newline at end of file