From 2a5e321964d984397c9139201d2be29158fd1e03 Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 14:23:21 +0200 Subject: [PATCH 1/9] fix performance on large pdf, fix folder creation --- Changelog.txt | 4 ++++ README.md | 6 +++--- Roadmap.md | 7 ++++--- src/Config.hs | 4 ++-- src/Lib.hs | 15 +++++++++------ 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 5520c7b..4b506ce 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,5 +1,9 @@ Changelog +1.2, 2019-04-20 + - faster performance on large pdfs by only parsing the first couple of pages. + - won't create the default directories anymore, until the first file is moved. + 1.1, 2019-04-19 - "first start" screen, showing the config file location. - help screen. diff --git a/README.md b/README.md index bad93f0..914a7ab 100644 --- a/README.md +++ b/README.md @@ -61,13 +61,13 @@ $ nix-env -if https://github.com/2mol/pboy/tarball/master # Config -TODO: config file location & format changed +Paperboy creates a `pboy.ini` in your XDG config directory. This is probably in `~/.config/pboy/pboy.ini`, the welcome or help screen will tell you. Use this to change your library and incoming folders, as well as to specify whether you want to move the imported files or just copy them. -Paperboy creates a `.pboy.toml` in your home directory. Use this to change your library and incoming folders, as well as to specify whether you want to move the imported files or just copy them. +**Note**: The config file location & format changed in version 1.1. Sorry to existing users, but the update should be trivial. Simply tweak the inbox and library folders in the new config. You can then delete `~/.pboy.toml`. # Current Limitations -For large files, `pdftotext` can take quite a long time to parse the entire document, which is stupid because we're only using the first couple of lines for file name suggestions. +Paperboy doesn't do anything fancy with providing renaming patterns yet. For example, some people requested to be able to specify a format like `author-document_name-date.pdf`, others have asked if they could compose multiple suggestions into one. I haven't figured out a way to do this while keeping the UI simple and straightforward, so the idea needs a bit of design work first. # Contribute diff --git a/Roadmap.md b/Roadmap.md index 06b68cb..159468e 100644 --- a/Roadmap.md +++ b/Roadmap.md @@ -3,8 +3,8 @@ ## Future releases -- [ ] don't create any folders unless the first file import is triggered. -- [ ] improve performance of parsing large pdfs +- [x] don't create any folders unless the first file import is triggered. +- [x] improve performance of parsing large pdfs - [ ] ability to mark documents as 'to-read'. - [ ] warn when importing an already existing filename. - [ ] refresh if any files move outside of the application. @@ -45,10 +45,11 @@ want: maybe: +- recursively list both inbox and library. - try out circleCI - move away from ghr for releases and use inbuilt travis uploads instead. - use https://github.com/tfausak/github-release - subfolders. - tag files to sync to phone (or kindle). - try various open commands with `asum` from `Data.Foldable` -- option to switch between underscores and spaces. \ No newline at end of file +- option to switch between underscores and spaces. diff --git a/src/Config.hs b/src/Config.hs index 533b389..931f53d 100644 --- a/src/Config.hs +++ b/src/Config.hs @@ -64,7 +64,7 @@ defaultConfig = readConfigData defaultConfigData createConfig :: Path Abs File -> IO () createConfig cpath = do - _ <- Path.createDirIfMissing True (Path.parent cpath) + _ <- Path.ensureDir (Path.parent cpath) TIO.writeFile (Path.fromAbsFile cpath) configContent where configContent = @@ -109,7 +109,7 @@ configSpec = & C.comment [ "The folder to watch for incoming files." , "Paths are relative to your home directory, but absolute paths are valid too." - , "I will watch multiple folders if you give me a comma-separated list" + , "I will watch multiple folders if you give me a comma-separated list." ] libraryDirD .= C.field "library" C.string diff --git a/src/Lib.hs b/src/Lib.hs index b6827a8..ed8830b 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -39,10 +39,12 @@ data FileInfo = FileInfo listFiles :: Path Abs Dir -> IO [FileInfo] listFiles path = do - Path.ensureDir path - files <- snd <$> Path.listDir path - fileInfos <- mapM getFileInfo files - pure $ filter isPdf fileInfos + dirExists <- Path.doesDirExist path + if dirExists then do + files <- snd <$> Path.listDir path + fileInfos <- mapM getFileInfo files + pure $ filter isPdf fileInfos + else pure [] sortFileInfoByDate :: [FileInfo] -> [FileInfo] @@ -98,7 +100,7 @@ fileNameSuggestions file = do getTopLines :: Path Abs File -> IO [Text] getTopLines file = do plainTextContent <- - E.try (P.readProcess "pdftotext" [Path.fromAbsFile file, "-"] "") + E.try (P.readProcess "pdftotext" [Path.fromAbsFile file, "-", "-f", "1", "-l", "4"] "") :: IO (Either SomeException String) let topLines = @@ -158,7 +160,8 @@ finalFileName text = fileFile :: Config -> Text -> Path Abs File -> IO () fileFile conf newFileName file = do - newFile <- Path.parseRelFile (T.unpack newFileName ++ Path.fileExtension file) + _ <- Path.ensureDir (conf ^. Config.libraryDir) + newFile <- Path.parseRelFile (T.unpack newFileName <> Path.fileExtension file) let newFilePath = conf ^. Config.libraryDir newFile From de304998ce5d10a095773904a6c517c91142ab0b Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 14:36:55 +0200 Subject: [PATCH 2/9] readme fixes and updates --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 914a7ab..16a7337 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ If you're a Homebrew user, you can install the latest version and its dependenci $ brew install 2mol/tools/pboy ``` -## Get the binary release +## Linux/Mac binary release Download the archive for your operating system from [https://github.com/2mol/pboy/releases](https://github.com/2mol/pboy/releases). Extract and install it with @@ -39,11 +39,17 @@ $ mv pboy ~/.local/bin/ For the latter to work, `~/.local/bin/` needs to exist and be in your `PATH`. Alternatively, put it in `/usr/local/bin`. -## Stack +## Linux, any distro + +I am still looking to package Paperboy for Debian/Ubuntu, Arch/Manjaro, Fedora, Doge Linux, or whatever else people install these days. + +Any pointers or help with regards to generate `.deb`, `.rpm`, AUR `PKGBUILD`, etc is appreciated. Ideally this could be mostly automated in CI, in the end Paperboy is just a single binary with a dependency or two. How do other packages do it? If you got a good example or link, open a GitHub issue! + +## Cabal/Stack Make sure you have `poppler` installed, which will provide both `pdftotext` and `pdfinfo`. On Linux, install `poppler` with your package manager of choice. If you are on Mac and using Homebrew you can do `brew install poppler`. -Assuming you have stack, the following will compile, then install the `pboy` executable in your `.local/bin`: +Assuming you have cabal or stack, the following will compile, then install the `pboy` executable in your `.local/bin`: ``` $ git clone git@github.com:2mol/pboy.git @@ -51,6 +57,8 @@ $ cd pboy $ stack install ``` +Replace `stack install` with `cabal new-install` at your leisure. + ## Nix If you have Nix, then you can install `pboy` with a single command: From c54c75ec2bb5a0fba84197e446a378e2215fb479 Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 14:37:26 +0200 Subject: [PATCH 3/9] a word --- Changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index 4b506ce..e3bf494 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -2,7 +2,7 @@ Changelog 1.2, 2019-04-20 - faster performance on large pdfs by only parsing the first couple of pages. - - won't create the default directories anymore, until the first file is moved. + - won't create the default directories until the first file is moved. 1.1, 2019-04-19 - "first start" screen, showing the config file location. From 7cdbcdbb533b2ba8049f1685320a5a87c0eb17fb Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 14:48:20 +0200 Subject: [PATCH 4/9] 1.2 --- pboy.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pboy.cabal b/pboy.cabal index 60fb8a5..b937b1c 100644 --- a/pboy.cabal +++ b/pboy.cabal @@ -1,7 +1,7 @@ cabal-version: 2.4 name: pboy -version: 1.1 +version: 1.2 synopsis: a small .pdf management utility description: Please see the README on Github at homepage: https://github.com/2mol/pboy#readme From 967f6edda8329a1de8484c7f1ef2d38fd1733b4c Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 14:50:56 +0200 Subject: [PATCH 5/9] license and roadmap --- Roadmap.md | 11 +++++++++-- pboy.cabal | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Roadmap.md b/Roadmap.md index 159468e..bda3387 100644 --- a/Roadmap.md +++ b/Roadmap.md @@ -3,8 +3,6 @@ ## Future releases -- [x] don't create any folders unless the first file import is triggered. -- [x] improve performance of parsing large pdfs - [ ] ability to mark documents as 'to-read'. - [ ] warn when importing an already existing filename. - [ ] refresh if any files move outside of the application. @@ -12,6 +10,13 @@ ## Done +1.2 + +- [x] don't create any folders unless the first file import is triggered. +- [x] improve performance of parsing large pdfs + +1.1 + - [x] show version number in the UI. - [x] use xdg path for writing config file instead of home direcory. - [x] use http://hackage.haskell.org/package/path for filepaths. @@ -26,6 +31,8 @@ - [x] ability to specify multiple folders as inboxes. - [x] help screen +pre 1.1 + - [x] use nix for CI and releases. - [x] compiled releases for Mac & Linux so that people other than Haskellers with 24Gb worth of stack/GHC installs can actually use this. - [x] homebrew for mac diff --git a/pboy.cabal b/pboy.cabal index b937b1c..50878f5 100644 --- a/pboy.cabal +++ b/pboy.cabal @@ -15,6 +15,7 @@ build-type: Simple extra-source-files: README.md Changelog.txt + LICENSE source-repository head type: git From e6906ce7a7c5f294c44222a1731bfce687fc9ba0 Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 15:18:12 +0200 Subject: [PATCH 6/9] travis fun again --- .travis.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index af4ba85..2bb0f95 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,14 +2,6 @@ matrix: include: - language: nix sudo: true - os: osx - - script: nix-build - - - language: nix - sudo: true - os: linux - script: nix-build - language: generic From 5ce24ac0838e9e517db42862d6ba4e1ad5c12a92 Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sat, 20 Apr 2019 15:53:18 +0200 Subject: [PATCH 7/9] circleci+stack --- .circleci/config.yml | 23 +++++++++++++++++++++++ stack.yaml | 9 ++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..1d97f22 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,23 @@ +version: 2.1 +jobs: + build: + docker: + - image: fpco/stack-build:lts + steps: + - checkout + - restore_cache: + name: Restore Cached Dependencies + keys: + - pboy-{{ checksum "pboy.cabal" }}-{{ checksum "stack.yaml" }} + - run: + name: Install executable + command: stack install + - save_cache: + name: Cache Dependencies + key: pboy-{{ checksum "pboy.cabal" }}-{{ checksum "stack.yaml" }} + paths: + - "/root/.stack" + - ".stack-work" + - store_artifacts: + path: ~/.local/bin/pboy + destination: pboy \ No newline at end of file diff --git a/stack.yaml b/stack.yaml index b35c893..bd41c99 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,3 +1,10 @@ -resolver: nightly-2019-03-25 +resolver: lts-13.17 packages: - . + +extra-deps: +- brick-0.47@sha256:4936c50acfdf09620dad5217fb384fc0d59626f75abed8b48250b419ec2ab623 +- config-ini-0.2.4.0@sha256:38a6d484d471c6fac81445de2eac8c4e8c82760962fca5491ae1c3bfca9c4047 +- data-clist-0.1.2.2@sha256:4d70add0a200a178853cd37c6469101bac3c36aebb3aa9c503ff225211b1a8c9 +- text-zipper-0.10.1@sha256:8b73a97a3717a17df9b0a722b178950c476ff2268ca5c583e99d010c94af849e +- word-wrap-0.4.1@sha256:f72233b383ef569c557bfd9812cbb8e306c415ce509082c0bd15ee51c0239ccc \ No newline at end of file From db2ce5805df2f50f573b23b44568e28ca1dbb5aa Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sun, 21 Apr 2019 12:47:30 +0200 Subject: [PATCH 8/9] credit to my reddit friends for performance issue --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 16a7337..3b6ec49 100644 --- a/README.md +++ b/README.md @@ -85,5 +85,6 @@ You're very welcome to suggest new features or open issues. See the Roadmap http - [brick](https://github.com/jtdaugherty/brick) is a lovely way to a write a command-line UI. - [nmattia](https://github.com/nmattia) did the work to get Paperboy to build with [nix](https://github.com/NixOS/nix) and patiently explained some of the basics to me. +- OsugiSakae and bri-an on reddit helped me with an annoying performance issue! The name 'Paperboy' is a reference to [this game](https://en.wikipedia.org/wiki/Paperboy_(video_game)), which I had for the NES and never quite mastered. From 3ab6978a9b3a291082a489fdbddcf691a7ea6cd6 Mon Sep 17 00:00:00 2001 From: 2mol <2mol@users.noreply.github.com> Date: Sun, 21 Apr 2019 13:52:32 +0200 Subject: [PATCH 9/9] publication date --- Changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index e3bf494..c66c3c7 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,6 +1,6 @@ Changelog -1.2, 2019-04-20 +1.2, 2019-04-21 - faster performance on large pdfs by only parsing the first couple of pages. - won't create the default directories until the first file is moved.