From b2614d6ab6366a7c174f45c2fd5753a751f2646b Mon Sep 17 00:00:00 2001
From: Juri <1773075+2mol@users.noreply.github.com>
Date: Sat, 11 Jun 2022 17:19:54 +0200
Subject: [PATCH] vendorize pdfinfo (#55)

Co-authored-by: 2mol <2mol@users.noreply.github.com>
---
 pboy.cabal     |   5 +-
 src/Lib.hs     |   6 +-
 src/PDFInfo.hs | 180 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 187 insertions(+), 4 deletions(-)
 create mode 100644 src/PDFInfo.hs

diff --git a/pboy.cabal b/pboy.cabal
index d7c3553..4c7c315 100644
--- a/pboy.cabal
+++ b/pboy.cabal
@@ -27,6 +27,7 @@ library
   other-modules:
       Config
     , Lib
+    , PDFInfo
     , Paths_pboy
   hs-source-dirs:
       src
@@ -39,10 +40,12 @@ library
     , either
     , microlens
     , microlens-th
-    , pdfinfo
+    , mtl
     , process
+    , process-extras
     , text
     , time
+    , time-locale-compat
     , titlecase
     , vector
     , vty
diff --git a/src/Lib.hs b/src/Lib.hs
index caa4dc7..be32c73 100644
--- a/src/Lib.hs
+++ b/src/Lib.hs
@@ -29,7 +29,7 @@ import qualified System.Directory as Dir
 import           System.FilePath ((</>))
 import qualified System.FilePath as FilePath
 import qualified System.Process as P
-import qualified Text.PDF.Info as PDFI
+import qualified PDFInfo
 
 
 data FileInfo = FileInfo
@@ -70,7 +70,7 @@ isPdf fileInfo =
 
 fileNameSuggestions :: FilePath -> IO (Text, [Text])
 fileNameSuggestions file = do
-    pdfInfo <- PDFI.pdfInfo file
+    pdfInfo <- PDFInfo.pdfInfo file
 
     topLines <- getTopLines file
 
@@ -87,7 +87,7 @@ fileNameSuggestions file = do
 
         maybeTitle =
             Either.rightToMaybe pdfInfo
-                >>= PDFI.pdfInfoTitle
+                >>= PDFInfo.pdfInfoTitle
                 & fmap sanitize
                 >>= boolToMaybe lengthCheck
 
diff --git a/src/PDFInfo.hs b/src/PDFInfo.hs
new file mode 100644
index 0000000..d5f4070
--- /dev/null
+++ b/src/PDFInfo.hs
@@ -0,0 +1,180 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS -Wall #-}
+
+module PDFInfo
+    (-- * Reading PDF info
+      pdfInfo
+    , PDFInfo(..)
+    , PDFSize(..)
+    , PDFEncryptionInfo(..)
+    , PDFInfoError(..)
+    -- * Internals
+    , ParsePDFInfo
+    , runParse
+    , parse
+    , parseSize
+    , parseDate
+    , parseEncrypted
+    , readRight)
+    where
+
+import           Control.Applicative
+import           Control.Arrow
+import           Control.Exception as E
+import           Control.Monad.Except
+
+import           Data.Char (isSpace)
+import           Data.Text (Text)
+import qualified Data.Text as T
+import           Data.Time (UTCTime, parseTimeM)
+import           Data.Time.Locale.Compat (defaultTimeLocale)
+import           Prelude
+import           System.Exit
+import           System.Process.Text
+
+-- | A type representing the output from the pdfinfo command.
+data PDFInfo = PDFInfo {
+    pdfInfoTitle        :: !(Maybe Text)    -- ^ Title
+  , pdfInfoSubject      :: !(Maybe Text)    -- ^ Subject
+  , pdfInfoAuthor       :: !(Maybe Text)    -- ^ Author: E.g. Chris Done
+  , pdfInfoCreator      :: !(Maybe Text)    -- ^ Creator: E.g. Microsoft Office Word 2007
+  , pdfInfoProducer     :: !(Maybe Text)    -- ^ Producer: E.g. Microsoft Office Word 2007
+  , pdfInfoCreationDate :: !(Maybe UTCTime) -- ^ Creation Date
+  , pdfInfoModDate      :: !(Maybe UTCTime) -- ^ Modification Date
+  , pdfInfoTagged       :: !(Maybe Bool)    -- ^ Tagged?
+  , pdfInfoPages        :: !(Maybe Integer) -- ^ Pages: E.g. 238
+  , pdfInfoEncrypted    :: !(Maybe PDFEncryptionInfo) -- ^ Encryption information
+  , pdfInfoPageSize     :: !(Maybe PDFSize) -- ^ Page: E.g. 595.32 x 841.92 pts (A4)
+  , pdfInfoFileSize     :: !(Maybe Integer) -- ^ File: E.g. 4061737 bytes
+  , pdfInfoOptimized    :: !(Maybe Bool)    -- ^ Optimized?
+  , pdfInfoPDFVersion   :: !(Maybe Double)  -- ^ PDF: E.g. 1.5
+  } deriving Show
+
+-- | Possible things that can go wrong while reading the info.
+data PDFInfoError
+  = ParseError !String        -- ^ Couldn't parse a property value.
+  | ProcessFailure !Text      -- ^ Process exited with this stderr.
+  | ProcessError !IOException -- ^ Error to do with the pdfinfo process.
+  | NoMessage                 -- ^ No message given.
+  | SomeError String          -- ^ Some nonspecific error.
+  deriving Show
+
+-- | Size of the PDF in pts.
+data PDFSize = PDFSize { pdfSizeW :: !Float, pdfSizeH :: !Float }
+  deriving (Eq,Show)
+
+-- | Encryption and restricted permissions
+data PDFEncryptionInfo
+  -- | Not encrypted
+  = PDFNoEncryption
+  -- | Encrypted with possible permission restrictions
+  | PDFEncryption {
+      pdfCanPrint            :: !(Maybe Bool) -- ^ Can the file be printed?
+    , pdfCanCopy             :: !(Maybe Bool) -- ^ Can the file be copied?
+    , pdfCanChange           :: !(Maybe Bool) -- ^ Can the file be changed?
+    , pdfCanAddNotes         :: !(Maybe Bool) -- ^ Can notes be added?
+    , pdfEncryptionAlgorithm :: !(Maybe Text) -- ^ Encryption algorithm: e.g. unknown, RC4, AES, AES-256
+    }
+  deriving (Eq,Show)
+
+-- instance Error PDFInfoError where noMsg = NoMessage; strMsg = SomeError
+newtype ParsePDFInfo a = ParsePDFInfo { runParse :: Either PDFInfoError a }
+  deriving (Monad,Functor,MonadError PDFInfoError)
+instance Applicative ParsePDFInfo where (<*>) = ap; pure = pure
+
+-- | Run pdfinfo on the given file. Handles IO exceptions to do with
+-- running the process.
+pdfInfo :: MonadIO m => FilePath -> m (Either PDFInfoError PDFInfo)
+pdfInfo path = liftIO $ loadInfo `E.catch` ioErrorHandler where
+  loadInfo = do (code,out,err) <- readProcessWithExitCode "pdfinfo" ["-enc","UTF-8",path] ""
+                case code of
+                  ExitSuccess -> return (parse out)
+                  ExitFailure{} -> return (Left (ProcessFailure err))
+  ioErrorHandler = return . Left . ProcessError
+
+-- | Parse PDFInfo's output.
+parse :: Text -> Either PDFInfoError PDFInfo
+parse out = runParse $
+  PDFInfo <$> string props "Title"
+          <*> string props "Subject"
+          <*> string props "Author"
+          <*> string props "Creator"
+          <*> string props "Producer"
+          <*> date "CreationDate"
+          <*> date "ModDate"
+          <*> bool props "Tagged"
+          <*> integer "Pages"
+          <*> encrypted "Encrypted"
+          <*> size "Page size"
+          <*> integer "File size"
+          <*> bool props "Optimized"
+          <*> floating "PDF version"
+    where date = get parseDate
+          size = get parseSize
+          encrypted = get parseEncrypted
+          floating = readIt
+          integer = readIt
+          readIt :: Read a => Text -> ParsePDFInfo (Maybe a)
+          readIt = get readRight
+          props = map split . T.lines $ out
+          get = withProps props
+
+type Props = [(Text,Text)]
+
+-- | Look up a name in a finite map of "properties" and apply a (parsing)
+-- function.
+withProps :: Props -> (Text -> ParsePDFInfo a) -> Text -> ParsePDFInfo (Maybe a)
+withProps properties f name =
+  case lookup name properties of
+    Just ok -> catchError (Just <$> (f $ T.strip ok))
+                          (\_ -> return Nothing)
+    Nothing -> return Nothing
+
+split :: Text -> (Text, Text)
+split = second (T.drop 1) . T.span (/=':')
+
+string :: Props -> Text -> ParsePDFInfo (Maybe Text)
+string props = withProps props return
+
+bool :: Props -> Text -> ParsePDFInfo (Maybe Bool)
+bool props = withProps props $ \yes -> return $ yes == "yes"
+
+-- | Parse a page size. This is loosely defined.
+parseSize :: Text -> ParsePDFInfo PDFSize
+parseSize s =
+  case T.words s of
+    ((readRight -> Right x):"x":(readRight -> Right y):_) ->
+        return $ PDFSize x y
+    _ -> throwError $ ParseError $ "Unable to read size: " ++ show s
+
+-- | Parse a date according to pdfinfo's format.
+parseDate :: Text -> ParsePDFInfo UTCTime
+parseDate s =
+  case parseTimeM True defaultTimeLocale "%a %b %e %H:%M:%S %Y" (T.unpack s) of
+    Just ok -> return ok
+    Nothing -> throwError $ ParseError $ "Unable to parse date: " ++ show s
+
+-- | Parse encryption information according to pdfinfo's format.
+parseEncrypted :: Text -> ParsePDFInfo PDFEncryptionInfo
+parseEncrypted s =
+  case T.break isSpace s of
+    ("yes",rest) ->
+      PDFEncryption <$> bool props "print"
+                    <*> bool props "copy"
+                    <*> bool props "change"
+                    <*> bool props "addNotes"
+                    <*> string props "algorithm"
+      where
+        props = map split $ T.words $ T.filter (flip notElem ['(',')']) rest
+    ("no",_) -> return PDFNoEncryption
+    _ -> throwError $ ParseError $ "Unable to parse encryption: " ++ show s
+
+-- | Read a value, maybe, allow misc trailing data.
+readRight :: (MonadError PDFInfoError m,Read a) => Text -> m a
+readRight s =
+  case reads (T.unpack s) of
+    [(v,_)] -> return v
+    _ -> throwError $ ParseError $ "Couldn't read value: " ++ show s