Skip to content

Commit

Permalink
cool, sloppy quicktest of search + algo tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
someodd committed Oct 26, 2024
1 parent 5810fcb commit d73b7a3
Show file tree
Hide file tree
Showing 7 changed files with 402 additions and 58 deletions.
12 changes: 11 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,14 @@ It works like this:

* double-check the `CHANGELOG.md`...

1. check the github action for release and then check the subsequent release cut and the packages attached
1. check the github action for release and then check the subsequent release cut and the packages attached

## Tests

Use doctests for simple unit tests (you can even use doctests for property tests), but use
QuickCheck tests in `test/` to test a module's public functions/exports.

The purpose of the actual tests in `tests/` is not to test the minutia of how a goal is
accomplished (all the private functions and such behind a public interface/function), but
instead to test that the public interface actually accomplishes what is desirable. For a
good example of this, see the test(s) for the search functionality.
59 changes: 58 additions & 1 deletion bore.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cabal-version: 2.2
-- see: https://github.com/sol/hpack

name: bore
version: 0.17.0.0
version: 0.18.0.0
synopsis: Build gopherholes.
description: Static site builder, but for gopherholes. Manage phlogs with tags, use the Markdown renderer and Mustache templating system.
category: Network
Expand Down Expand Up @@ -155,3 +155,60 @@ executable bore
, xml-conduit-writer
, yaml
default-language: GHC2021

test-suite bore-test
type: exitcode-stdio-1.0
main-is: Spec.hs
other-modules:
Paths_bore
autogen-modules:
Paths_bore
hs-source-dirs:
test
ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall -Werror -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck
, aeson
, attoparsec
, base
, bore
, bytestring
, commonmark
, containers
, data-default
, directory
, doctest
, edit-distance
, fast-logger
, filepath
, filepath-bytestring
, filepattern
, frontmatter
, fsnotify
, fuzzy-dates
, hashable
, hashmap
, hourglass
, mtl
, mustache
, neat-interpolation
, network-uri
, optparse-applicative
, parsec
, raw-strings-qq
, socket
, spacecookie
, split
, systemd
, temporary
, text
, time
, tomland
, unix
, unordered-containers
, vector
, word-wrap
, xml-conduit
, xml-conduit-writer
, yaml
default-language: GHC2021
13 changes: 13 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,16 @@ executables:
- DeriveAnyClass
- OverloadedRecordDot
default-language: GHC2021

tests:
bore-test:
main: Spec.hs
source-dirs: test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
- bore
- QuickCheck
- text
- temporary
- doctest
default-language: GHC2021
78 changes: 76 additions & 2 deletions src/Bore/FileLayout.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,23 @@ Also includes tools for dealing with file paths and dealing with the file system
rudimentary file operations that are only specific to project layout, in other words, do
things according to project layout rules, and nothing more specific.
Also handles some things to do with Gopher selectors.
-}

module Bore.FileLayout where

import Control.Monad (filterM, when, forM_)
import System.Directory (listDirectory, doesDirectoryExist, copyFile, createDirectoryIfMissing, canonicalizePath, removeDirectoryRecursive, removeFile)
import System.FilePath ((</>), makeRelative, takeFileName, takeDirectory)
import System.FilePath ((</>), makeRelative, takeFileName, takeDirectory, isValid, isRelative, dropExtension)
import qualified Data.Text as Text
import qualified Data.Text.IO as TextIO
import Data.List (sortOn)
import Data.Ord (Down(..))
import Bore.Text.Clean (cleanText)
import Data.Text.Encoding (encodeUtf8)
import qualified Data.ByteString as B
import qualified Data.Text as T

{- | Directory which "gets left alone" during the build process, i.e., it won't get
cleared out of output.
Expand Down Expand Up @@ -244,4 +252,70 @@ resetOutputDirectory outputDir = do
isDir <- doesDirectoryExist itemPath
if isDir
then removeDirectoryRecursive itemPath
else removeFile itemPath
else removeFile itemPath

-- | Check if what's supposed to be a filename is safe and sane.
--
-- Example:
-- >>> isSafeFilename "file.txt"
-- True
-- >>> isSafeFilename "../file.txt"
-- False
-- >>> isSafeFilename "file.txt/"
-- False
-- >>> isSafeFilename "file.txt/.."
-- False
-- >>> isSafeFilename "."
-- False
-- >>> isSafeFilename "/dir/file.txt"
-- False
-- >>> isSafeFilename "file"
-- True
isSafeFilename :: FilePath -> Bool
isSafeFilename name =
isValid name -- Checks if it contains only valid characters
&& not (null name) -- Ensures it’s not empty
&& not (null $ takeFileName name) -- Ensures it’s not just a directory
&& takeFileName name == name -- ensure it doesn't have a directory traversal
&& isRelative name -- Ensures it's a relative path
&& name /= "." && name /= ".." -- Disallows "." and ".."
&& filenameByteLength name <= 255 -- Ensures it's not too long

-- | Get the words/tokens from a selector, and normalize them.
--
-- Example:
-- >>> selectorWords "hello_world-foo bar"
-- ["hello","world","foo","bar"]
selectorWords :: FilePath -> [Text.Text]
selectorWords = pathWords . Text.toLower . cleanText . Text.pack

-- | Get the byte length of a filename in UTF-8 encoding
filenameByteLength :: String -> Int
filenameByteLength filename = B.length . encodeUtf8 $ T.pack filename

-- | Get the words/tokens from a path.
--
-- Also enforces a maximum length.
--
-- Differs from `selectorWords` in that it's not focused on cleaning the text of any
-- ineappropriate characters.
--
-- Example:
-- >>> pathWords "hello_world-foo bar"
-- ["hello","world","foo","bar"]
-- >>> pathWords "hello/word/foo.gopher.txt"
-- ["hello","word","foo"]
pathWords :: Text.Text -> [Text.Text]
pathWords = Text.split (`elem` splitChars) . dropExtensionSpecial
where
splitChars = [' ', '_', '-', '.', '/']
sortedExtensions = sortOn (Down . Text.length) (map Text.pack onlyParse)
-- Drop the onlyParse extensions if it exists, otherwise just use dropExtension from FilePath
dropExtensionSpecial :: Text.Text -> Text.Text
dropExtensionSpecial fp =
let
fpNoExt = dropExtension (Text.unpack fp) -- Remove last extension if no specific match
in
case filter (`Text.isSuffixOf` fp) sortedExtensions of
(ext:_) -> Text.dropEnd (Text.length ext) fp
[] -> Text.pack fpNoExt
Loading

0 comments on commit d73b7a3

Please sign in to comment.