Skip to content

Commit 72b2e61

Browse files
authored
Fix slow lockfile generation (purescript#1261)
* Memoize and combine queries to speed up lockfile generation * Add comments and remove pointless parTraverse * Re-add swallowed error and remove accidental formatting in foreign mod * Lockfile gen speed review feedback - naming and comments * Replace singular metadata query
1 parent 5efaad1 commit 72b2e61

File tree

4 files changed

+169
-48
lines changed

4 files changed

+169
-48
lines changed

src/Spago/Command/Fetch.purs

Lines changed: 99 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ import Data.Array.NonEmpty as NEA
2222
import Data.Codec.JSON as CJ
2323
import Data.Codec.JSON.Common as CJ.Common
2424
import Data.Either as Either
25+
import Data.Filterable (filterMap)
2526
import Data.HTTP.Method as Method
2627
import Data.Int as Int
28+
import Data.List as List
2729
import Data.Map as Map
2830
import Data.Newtype (wrap)
2931
import Data.Set as Set
@@ -279,16 +281,40 @@ type LockfileBuilderResult =
279281
, packages :: Map PackageName Lock.LockEntry
280282
}
281283

284+
lookupInCache :: forall a k v. Ord k => k -> Ref.Ref (Map k v) -> Spago a (Maybe v)
285+
lookupInCache key cacheRef = liftEffect $ Ref.read cacheRef >>= Map.lookup key >>> pure
286+
287+
updateCache :: forall a k v. Ord k => k -> v -> Ref.Ref (Map k v) -> Spago a Unit
288+
updateCache key value cacheRef = liftEffect $ Ref.modify_ (Map.insert key value) cacheRef
289+
282290
writeNewLockfile :: forall a. String -> PackageTransitiveDeps -> Spago (FetchEnv a) PackageTransitiveDeps
283291
writeNewLockfile reason allTransitiveDeps = do
284292
logInfo $ reason <> ", generating it..."
285293
{ workspace } <- ask
294+
295+
-- All these Refs are needed to memoise Db and file reads
296+
packageDependenciesCache <- liftEffect $ Ref.new Map.empty
297+
gitRefCache <- liftEffect $ Ref.new Map.empty
298+
metadataRefCache <- liftEffect $ Ref.new Map.empty
286299
let
287-
processPackage :: LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult
288-
processPackage result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do
289-
(packageDependencies :: Array PackageName) <- (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty)
290-
<$> getPackageDependencies dependencyName dependencyPackage
300+
memoisedGetPackageDependencies :: PackageName -> Package -> Spago (FetchEnv a) (Maybe (Map PackageName Range))
301+
memoisedGetPackageDependencies packageName package = do
302+
lookupInCache packageName packageDependenciesCache >>=
303+
case _ of
304+
Just cached -> do
305+
pure cached
306+
Nothing -> do
307+
-- Not cached. Compute it, write to ref, return it
308+
res <- getPackageDependencies packageName package
309+
updateCache packageName res packageDependenciesCache
310+
pure res
311+
312+
processPackage :: Map PackageName _ -> LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult
313+
processPackage registryIntegrityMap result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do
291314
let
315+
getDeps = (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty)
316+
<$> memoisedGetPackageDependencies dependencyName dependencyPackage
317+
292318
updatePackage r package = (updateWorkspacePackage r)
293319
{ packages = Map.insert dependencyName package r.packages }
294320
updateWorkspacePackage r = r
@@ -303,28 +329,84 @@ writeNewLockfile reason allTransitiveDeps = do
303329

304330
case dependencyPackage of
305331
WorkspacePackage _pkg -> pure $ updateWorkspacePackage result
332+
306333
GitPackage gitPackage -> do
307334
let packageLocation = Config.getPackageLocation dependencyName dependencyPackage
308-
Git.getRef (Just packageLocation) >>= case _ of
309-
Left err -> die err -- TODO maybe not die here?
310-
Right rev -> pure $ updatePackage result $ FromGit { rev, dependencies: packageDependencies, url: gitPackage.git, subdir: gitPackage.subdir }
335+
lookupInCache packageLocation gitRefCache >>= case _ of
336+
Nothing ->
337+
-- Get the ref and update the cache
338+
Git.getRef (Just packageLocation) >>= case _ of
339+
Left err -> die err -- TODO maybe not die here?
340+
Right rev -> do
341+
dependencies <- getDeps
342+
let
343+
lockEntry =
344+
FromGit { rev, dependencies, url: gitPackage.git, subdir: gitPackage.subdir }
345+
updateCache packageLocation lockEntry gitRefCache
346+
pure $ updatePackage result lockEntry
347+
Just entry -> pure $ updatePackage result entry
348+
311349
RegistryVersion version -> do
312-
metadata <- Registry.getMetadata dependencyName
313-
registryVersion <- case (metadata >>= (\(Metadata meta) -> Either.note "Didn't find version in the metadata file" $ Map.lookup version meta.published)) of
314-
Left err -> die $ "Couldn't read metadata, reason:\n " <> err
315-
Right { hash: integrity } ->
316-
pure { version, integrity, dependencies: packageDependencies }
317-
pure $ updatePackage result $ FromRegistry registryVersion
350+
lookupInCache dependencyName metadataRefCache >>= case _ of
351+
Nothing -> do
352+
registryVersion <- FromRegistry <$> case Map.lookup dependencyName registryIntegrityMap of
353+
-- This shouldn't be Nothing because it's already handled when building the integrity map below
354+
Nothing -> die $ "Couldn't read metadata"
355+
Just integrity -> do
356+
dependencies <- getDeps
357+
pure { version, integrity, dependencies }
358+
updateCache dependencyName registryVersion metadataRefCache
359+
pure $ updatePackage result registryVersion
360+
Just entry -> do
361+
pure $ updatePackage result entry
362+
318363
LocalPackage { path } -> do
319-
pure $ updatePackage result $ FromPath { path, dependencies: packageDependencies }
364+
dependencies <- getDeps
365+
pure $ updatePackage result $ FromPath { path, dependencies }
320366

321367
let
322368
toArray :: forall k v. Map k v -> Array (Tuple k v)
323369
toArray = Map.toUnfoldable
370+
allDependencies = foldMap sequence $ toArray $ map toArray allTransitiveDeps
371+
372+
-- Fetch the Registry metadata in one go for all required packages
373+
let
374+
uniqueRegistryPackageNames = Array.nub $ filterMap
375+
( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of
376+
RegistryVersion _ -> Just dependencyName
377+
_ -> Nothing
378+
)
379+
allDependencies
380+
metadataMap <- Registry.getMetadataForPackages uniqueRegistryPackageNames >>= case _ of
381+
Left err -> die $ "Couldn't read metadata, reason:\n " <> err
382+
Right ms -> pure ms
383+
384+
(registryVersions :: Map PackageName Sha256) <- Map.fromFoldable <<< Array.catMaybes <$>
385+
( traverse
386+
( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of
387+
RegistryVersion version -> do
388+
let metadata = Map.lookup dependencyName metadataMap
389+
case (metadata >>= (\(Metadata meta) -> Map.lookup version meta.published)) of
390+
Nothing | isNothing metadata ->
391+
die $ "Couldn't read metadata for " <> PackageName.print dependencyName
392+
Nothing ->
393+
die $ "Couldn't read metadata for " <> PackageName.print dependencyName
394+
<> ": didn't find version in the metadata file"
395+
Just { hash: integrity } ->
396+
pure $ Just $ dependencyName /\ integrity
397+
_ -> pure Nothing
398+
)
399+
$ allDependencies
400+
)
401+
324402
({ packages, workspacePackages } :: LockfileBuilderResult) <-
325-
Array.foldM processPackage
326-
{ workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet), packages: Map.empty }
327-
(foldMap sequence $ toArray $ map toArray allTransitiveDeps)
403+
-- NOTE! We used to have `Array.foldM` here, but it was significantly slower
404+
-- (~10ms vs 6s on a very large project)
405+
List.foldM (processPackage registryVersions)
406+
{ workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet)
407+
, packages: Map.empty
408+
}
409+
$ List.fromFoldable allDependencies
328410

329411
let
330412
lockfile =

src/Spago/Db.js

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,12 @@ export const removeManifestImpl = (db, name, version) => {
107107
db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version);
108108
}
109109

110-
export const getMetadataImpl = (db, name) => {
111-
const row = db
112-
.prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1")
113-
.get(name);
114-
return row;
115-
}
116-
117110
export const insertMetadataImpl = (db, name, metadata, last_fetched) => {
118111
db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata, last_fetched) VALUES (@name, @metadata, @last_fetched)").run({ name, metadata, last_fetched });
119112
}
113+
114+
export const getMetadataForPackagesImpl = (db, names) => {
115+
// There can be a lot of package names here, potentially hitting the max number of sqlite parameters, so we use json to bypass this
116+
const query = db.prepare("SELECT * FROM package_metadata WHERE name IN (SELECT value FROM json_each(?));");
117+
return query.all(JSON.stringify(names));
118+
};

src/Spago/Db.purs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ module Spago.Db
88
, getLastPull
99
, getManifest
1010
, getMetadata
11+
, getMetadataForPackages
1112
, insertManifest
1213
, insertMetadata
1314
, insertPackageSet
@@ -26,6 +27,7 @@ import Data.Codec.JSON.Record as CJ.Record
2627
import Data.DateTime (Date, DateTime(..))
2728
import Data.DateTime as DateTime
2829
import Data.Either as Either
30+
import Data.Filterable (filterMap)
2931
import Data.Formatter.DateTime as DateTime.Format
3032
import Data.Map as Map
3133
import Data.Nullable (Nullable)
@@ -100,18 +102,28 @@ insertManifest :: Db -> PackageName -> Version -> Manifest -> Effect Unit
100102
insertManifest db packageName version manifest = Uncurried.runEffectFn4 insertManifestImpl db (PackageName.print packageName) (Version.print version) (printJson Manifest.codec manifest)
101103

102104
getMetadata :: Db -> PackageName -> Effect (Maybe Metadata)
103-
getMetadata db packageName = do
104-
maybeMetadataEntry <- Nullable.toMaybe <$> Uncurried.runEffectFn2 getMetadataImpl db (PackageName.print packageName)
105+
getMetadata db packageName =
106+
getMetadataForPackages db [ packageName ]
107+
<#> Map.lookup packageName
108+
109+
getMetadataForPackages :: Db -> Array PackageName -> Effect (Map PackageName Metadata)
110+
getMetadataForPackages db packageNames = do
111+
metadataEntries <- Uncurried.runEffectFn2 getMetadataForPackagesImpl db (PackageName.print <$> packageNames)
105112
now <- Now.nowDateTime
106-
pure $ do
107-
metadataEntry <- maybeMetadataEntry
108-
lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched
109-
-- if the metadata is older than 15 minutes, we consider it stale
110-
case DateTime.diff now lastFetched of
111-
Minutes n | n <= 15.0 -> do
112-
metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata
113-
pure metadata
114-
_ -> Nothing
113+
pure
114+
$ metadataEntries
115+
#
116+
( filterMap \metadataEntry -> do
117+
packageName <- hush $ PackageName.parse metadataEntry.name
118+
lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched
119+
-- if the metadata is older than 15 minutes, we consider it stale
120+
case DateTime.diff now lastFetched of
121+
Minutes n | n <= 15.0 -> do
122+
metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata
123+
pure $ packageName /\ metadata
124+
_ -> Nothing
125+
)
126+
# Map.fromFoldable
115127

116128
insertMetadata :: Db -> PackageName -> Metadata -> Effect Unit
117129
insertMetadata db packageName metadata@(Metadata { unpublished }) = do
@@ -246,4 +258,6 @@ foreign import removeManifestImpl :: EffectFn3 Db String String Unit
246258

247259
foreign import getMetadataImpl :: EffectFn2 Db String (Nullable MetadataEntryJs)
248260

261+
foreign import getMetadataForPackagesImpl :: EffectFn2 Db (Array String) (Array MetadataEntryJs)
262+
249263
foreign import insertMetadataImpl :: EffectFn4 Db String String String Unit

src/Spago/Registry.purs

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ module Spago.Registry
77
, findPackageSet
88
, getManifestFromIndex
99
, getMetadata
10+
, getMetadataForPackages
1011
, getRegistryFns
1112
, listMetadataFiles
1213
, listPackageSets
@@ -62,6 +63,7 @@ type RegistryEnv a = Record (RegistryEnvRow a)
6263
type RegistryFunctions =
6364
{ getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest)
6465
, getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata)
66+
, getMetadataForPackages :: Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata))
6567
, findPackageSet :: Maybe Version -> Spago (PreRegistryEnv ()) Version
6668
, listPackageSets :: Spago (PreRegistryEnv ()) (Array Db.PackageSet)
6769
, listMetadataFiles :: Spago (LogEnv ()) (Array String)
@@ -74,6 +76,12 @@ getMetadata packageName = do
7476
{ getMetadata: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry
7577
runSpago { logOptions } (fn packageName)
7678

79+
getMetadataForPackages :: Array PackageName -> Spago (RegistryEnv _) _
80+
getMetadataForPackages packageNames = do
81+
{ getRegistry, logOptions, db, git, purs, offline } <- ask
82+
{ getMetadataForPackages: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry
83+
runSpago { logOptions } (fn packageNames)
84+
7785
getManifestFromIndex :: PackageName -> Version -> Spago (RegistryEnv _) _
7886
getManifestFromIndex packageName version = do
7987
{ getRegistry, logOptions, db, git, purs, offline } <- ask
@@ -123,6 +131,7 @@ getRegistryFns registryBox registryLock = do
123131
registryFns =
124132
{ getManifestFromIndex: getManifestFromIndexImpl db
125133
, getMetadata: getMetadataImpl db
134+
, getMetadataForPackages: getMetadataForPackagesImpl db
126135
, listMetadataFiles: FS.ls (Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory ])
127136
, listPackageSets: listPackageSetsImpl
128137
, findPackageSet: findPackageSetImpl
@@ -200,20 +209,37 @@ getRegistryFns registryBox registryLock = do
200209
-- Metadata can change over time (unpublished packages, and new packages), so we need
201210
-- to read it from file every time we have a fresh Registry
202211
getMetadataImpl :: Db -> PackageName -> Spago (LogEnv ()) (Either String Metadata)
203-
getMetadataImpl db name = do
212+
getMetadataImpl db name =
213+
getMetadataForPackagesImpl db [ name ]
214+
<#> case _ of
215+
Left err -> Left err
216+
Right metadataMap -> case Map.lookup name metadataMap of
217+
Nothing -> Left $ "Failed to get metadata for package: " <> PackageName.print name
218+
Just metadata -> Right metadata
219+
220+
-- Parallelised version of `getMetadataImpl`
221+
getMetadataForPackagesImpl :: Db -> Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata))
222+
getMetadataForPackagesImpl db names = do
204223
-- we first try reading it from the DB
205-
liftEffect (Db.getMetadata db name) >>= case _ of
206-
Just metadata -> do
207-
logDebug $ "Got metadata from DB: " <> PackageName.print name
208-
pure (Right metadata)
209-
_ -> do
210-
-- if we don't have it we try reading it from file
211-
metadataFromFile name >>= case _ of
212-
Left e -> pure (Left e)
213-
Right m -> do
214-
-- and memoize it
215-
liftEffect (Db.insertMetadata db name m)
216-
pure (Right m)
224+
liftEffect (Db.getMetadataForPackages db names) >>= \metadatas -> do
225+
{ fail, success } <- partitionEithers <$> parTraverseSpago
226+
( \name -> do
227+
case Map.lookup name metadatas of
228+
Nothing ->
229+
-- if we don't have it we try reading it from file
230+
metadataFromFile name >>= case _ of
231+
Left e -> pure (Left e)
232+
Right m -> do
233+
-- and memoize it
234+
liftEffect (Db.insertMetadata db name m)
235+
pure (Right $ name /\ m)
236+
Just m -> pure $ Right $ name /\ m
237+
)
238+
names
239+
case Array.head fail of
240+
Nothing -> pure $ Right $ Map.fromFoldable success
241+
Just f -> pure $ Left $ f
242+
217243
where
218244
metadataFromFile pkgName = do
219245
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print pkgName <> ".json" ]

0 commit comments

Comments
 (0)