Skip to content

Commit f130b33

Browse files
authored
Fix glob performance on large monorepos (purescript#1244)
The final patch goes back to the approach we had before purescript#1234, where we'd compose the glob-matching functions instead of keeping a list of ignores, and recompute the matchers when needed. The patch in purescript#1234 was not optimised, as the matchers were being recreated for every file encountered, and optimising that in the first two commits of this PR got us down to 2x of the performance pre-1234. That is unfortunately still not acceptable, so I reintroduced the function-composition approach, which is still prone to blowing the stack - the change here should reduce that risk, since instead of composing every line of gitignores as a separate matcher in the chain, we instead nest them in a single or block. That should dramatically reduce the size of the call chain.
1 parent 5634733 commit f130b33

File tree

4 files changed

+69
-43
lines changed

4 files changed

+69
-43
lines changed

src/Spago/Command/Build.purs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,9 @@ run opts = do
153153
if Array.null pedanticPkgs || opts.depsOnly then
154154
pure true
155155
else do
156-
logInfo $ "Looking for unused and undeclared transitive dependencies..."
156+
logInfo "Looking for unused and undeclared transitive dependencies..."
157157
eitherGraph <- Graph.runGraph globs opts.pursArgs
158+
logDebug "Decoded the output of `purs graph` successfully. Analyzing dependencies..."
158159
eitherGraph # either (prepareToDie >>> (_ $> false)) \graph -> do
159160
env <- ask
160161
checkResults <- map Array.fold $ for pedanticPkgs \(Tuple selected options) -> do

src/Spago/Glob.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import mm from 'micromatch';
22
import * as fsWalk from '@nodelib/fs.walk';
33

4-
export const testGlob = glob => mm.matcher(glob.include, {ignore: glob.ignore});
4+
export const testGlob = glob => mm.matcher(glob.include, { ignore: glob.ignore });
55

66
export const fsWalkImpl = Left => Right => respond => options => path => () => {
77
const entryFilter = entry => options.entryFilter(entry)();
@@ -13,4 +13,3 @@ export const fsWalkImpl = Left => Right => respond => options => path => () => {
1313
};
1414

1515
export const isFile = dirent => dirent.isFile();
16-

src/Spago/Glob.purs

Lines changed: 60 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@ import Control.Monad.Maybe.Trans (runMaybeT)
77
import Control.Monad.Trans.Class (lift)
88
import Data.Array as Array
99
import Data.Filterable (filter)
10-
import Data.Foldable (any, fold)
10+
import Data.Foldable (any, traverse_)
1111
import Data.String as String
12-
import Data.Traversable (traverse_)
12+
import Data.String as String.CodePoint
1313
import Effect.Aff as Aff
1414
import Effect.Ref as Ref
1515
import Node.FS.Sync as SyncFS
1616
import Node.Path as Path
17-
import Record as Record
18-
import Type.Proxy (Proxy(..))
1917

2018
type Glob =
2119
{ ignore :: Array String
@@ -30,8 +28,10 @@ splitGlob { ignore, include } = (\a -> { ignore, include: [ a ] }) <$> include
3028
type Entry = { name :: String, path :: String, dirent :: DirEnt }
3129
type FsWalkOptions = { entryFilter :: Entry -> Effect Boolean, deepFilter :: Entry -> Effect Boolean }
3230

31+
-- https://nodejs.org/api/fs.html#class-fsdirent
3332
foreign import data DirEnt :: Type
3433
foreign import isFile :: DirEnt -> Boolean
34+
3535
foreign import fsWalkImpl
3636
:: (forall a b. a -> Either a b)
3737
-> (forall a b. b -> Either a b)
@@ -40,32 +40,32 @@ foreign import fsWalkImpl
4040
-> String
4141
-> Effect Unit
4242

43-
gitignoreGlob :: String -> String -> Glob
44-
gitignoreGlob base =
43+
gitignoreFileToGlob :: FilePath -> String -> Glob
44+
gitignoreFileToGlob base =
4545
String.split (String.Pattern "\n")
4646
>>> map String.trim
4747
>>> Array.filter (not <<< or [ String.null, isComment ])
4848
>>> partitionMap
4949
( \line -> do
50-
let
51-
resolve a = Path.concat [ base, a ]
52-
pat a = withForwardSlashes $ resolve $ unpackPattern a
50+
let pattern lin = withForwardSlashes $ Path.concat [ base, gitignorePatternToGlobPattern lin ]
5351
case String.stripPrefix (String.Pattern "!") line of
54-
Just negated -> Left $ pat negated
55-
Nothing -> Right $ pat line
52+
Just negated -> Left $ pattern negated
53+
Nothing -> Right $ pattern line
5654
)
57-
>>> Record.rename (Proxy @"left") (Proxy @"ignore")
58-
>>> Record.rename (Proxy @"right") (Proxy @"include")
55+
>>> (\{ left, right } -> { ignore: left, include: right })
5956

6057
where
6158
isComment = isJust <<< String.stripPrefix (String.Pattern "#")
62-
leadingSlash = String.stripPrefix (String.Pattern "/")
63-
trailingSlash = String.stripSuffix (String.Pattern "/")
59+
dropSuffixSlash str = fromMaybe str $ String.stripSuffix (String.Pattern "/") str
60+
dropPrefixSlash str = fromMaybe str $ String.stripPrefix (String.Pattern "/") str
61+
62+
leadingSlash str = String.codePointAt 0 str == Just (String.CodePoint.codePointFromChar '/')
63+
trailingSlash str = String.codePointAt (String.length str - 1) str == Just (String.CodePoint.codePointFromChar '/')
6464

65-
unpackPattern :: String -> String
66-
unpackPattern pattern
67-
| Just a <- trailingSlash pattern = unpackPattern a
68-
| Just a <- leadingSlash pattern = a <> "/**"
65+
gitignorePatternToGlobPattern :: String -> String
66+
gitignorePatternToGlobPattern pattern
67+
| trailingSlash pattern = gitignorePatternToGlobPattern $ dropSuffixSlash pattern
68+
| leadingSlash pattern = dropPrefixSlash pattern <> "/**"
6969
| otherwise = "**/" <> pattern <> "/**"
7070

7171
fsWalk :: String -> Array String -> Array String -> Aff (Array Entry)
@@ -74,41 +74,63 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do
7474

7575
-- Pattern for directories which can be outright ignored.
7676
-- This will be updated whenver a .gitignore is found.
77-
ignoreMatcherRef :: Ref Glob <- Ref.new { ignore: [], include: ignorePatterns }
77+
ignoreMatcherRef :: Ref (String -> Boolean) <- Ref.new (testGlob { ignore: [], include: ignorePatterns })
7878

7979
-- If this Ref contains `true` because this Aff has been canceled, then deepFilter will always return false.
8080
canceled <- Ref.new false
8181

8282
let
83-
entryGitignore :: Entry -> Effect Unit
84-
entryGitignore entry =
85-
try (SyncFS.readTextFile UTF8 entry.path)
86-
>>= traverse_ \gitignore ->
87-
let
88-
base = Path.relative cwd $ Path.dirname entry.path
89-
glob = gitignoreGlob base gitignore
90-
pats = splitGlob glob
91-
patOk g = not $ any (testGlob g) includePatterns
92-
newPats = filter patOk pats
93-
in
94-
void $ Ref.modify (_ <> fold newPats) $ ignoreMatcherRef
83+
-- Update the ignoreMatcherRef with the patterns from a .gitignore file
84+
updateIgnoreMatcherWithGitignore :: Entry -> Effect Unit
85+
updateIgnoreMatcherWithGitignore entry = do
86+
let
87+
gitignorePath = entry.path
88+
-- directory of this .gitignore relative to the directory being globbed
89+
base = Path.relative cwd (Path.dirname gitignorePath)
90+
91+
try (SyncFS.readTextFile UTF8 entry.path) >>= traverse_ \gitignore -> do
92+
let
93+
gitignored = testGlob <$> (splitGlob $ gitignoreFileToGlob base gitignore)
94+
95+
-- Do not add `.gitignore` patterns that explicitly ignore the files
96+
-- we're searching for;
97+
--
98+
-- ex. if `includePatterns` is [".spago/p/aff-1.0.0/**/*.purs"],
99+
-- and `gitignored` is ["node_modules", ".spago"],
100+
-- then add "node_modules" to `ignoreMatcher` but not ".spago"
101+
wouldConflictWithSearch matcher = any matcher includePatterns
102+
103+
newMatchers = or $ filter (not <<< wouldConflictWithSearch) gitignored
104+
105+
-- Another possible approach could be to keep a growing array of patterns and
106+
-- regenerate the matcher on every gitignore. We have tried that (see #1234),
107+
-- and turned out to be 2x slower. (see #1242, and #1244)
108+
-- Composing functions is faster, but there's the risk of blowing the stack
109+
-- (see #1231) - when this was introduced in #1210, every match from the
110+
-- gitignore file would be `or`ed to the previous matcher, which would create
111+
-- a very long (linear) call chain - in this latest iteration we are `or`ing the
112+
-- new matchers together, then the whole thing with the previous matcher.
113+
-- This is still prone to stack issues, but we now have a tree so it should
114+
-- not be as dramatic.
115+
addMatcher currentMatcher = or [ currentMatcher, newMatchers ]
116+
117+
Ref.modify_ addMatcher ignoreMatcherRef
95118

96119
-- Should `fsWalk` recurse into this directory?
97120
deepFilter :: Entry -> Effect Boolean
98121
deepFilter entry = fromMaybe false <$> runMaybeT do
99122
isCanceled <- lift $ Ref.read canceled
100123
guard $ not isCanceled
101-
shouldIgnore <- lift $ testGlob <$> Ref.read ignoreMatcherRef
124+
shouldIgnore <- lift $ Ref.read ignoreMatcherRef
102125
pure $ not $ shouldIgnore $ Path.relative cwd entry.path
103126

104127
-- Should `fsWalk` retain this entry for the result array?
105128
entryFilter :: Entry -> Effect Boolean
106129
entryFilter entry = do
107-
when (isFile entry.dirent && entry.name == ".gitignore") (entryGitignore entry)
108-
ignorePat <- Ref.read ignoreMatcherRef
109-
let
110-
ignoreMatcher = testGlob ignorePat
111-
path = withForwardSlashes $ Path.relative cwd entry.path
130+
when (isFile entry.dirent && entry.name == ".gitignore") do
131+
updateIgnoreMatcherWithGitignore entry
132+
ignoreMatcher <- Ref.read ignoreMatcherRef
133+
let path = withForwardSlashes $ Path.relative cwd entry.path
112134
pure $ includeMatcher path && not (ignoreMatcher path)
113135

114136
options = { entryFilter, deepFilter }

test/Spago/Glob.purs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,14 @@ spec = Spec.around globTmpDir do
9292

9393
Spec.it "is stacksafe" \p -> do
9494
let
95-
chars = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k" ]
96-
-- 10,000-line gitignore
95+
chars = [ "a", "b", "c", "d", "e", "f", "g", "h" ]
96+
-- 4000-line gitignore
9797
words = [ \a b c d -> a <> b <> c <> d ] <*> chars <*> chars <*> chars <*> chars
9898
hugeGitignore = intercalate "\n" words
99+
-- Write it in a few places
99100
FS.writeTextFile (Path.concat [ p, ".gitignore" ]) hugeGitignore
101+
FS.writeTextFile (Path.concat [ p, "fruits", ".gitignore" ]) hugeGitignore
102+
FS.writeTextFile (Path.concat [ p, "fruits", "left", ".gitignore" ]) hugeGitignore
103+
FS.writeTextFile (Path.concat [ p, "fruits", "right", ".gitignore" ]) hugeGitignore
100104
a <- Glob.gitignoringGlob p [ "fruits/**/apple" ]
101105
Array.sort a `Assert.shouldEqual` [ "fruits/left/apple", "fruits/right/apple" ]

0 commit comments

Comments
 (0)