From 72b2e614bf3988e10135f47e96d70b907386e427 Mon Sep 17 00:00:00 2001 From: Finn Hodgkin Date: Fri, 23 Aug 2024 14:45:07 +0100 Subject: [PATCH] Fix slow lockfile generation (#1261) * Memoize and combine queries to speed up lockfile generation * Add comments and remove pointless parTraverse * Re-add swallowed error and remove accidental formatting in foreign mod * Lockfile gen speed review feedback - naming and comments * Replace singular metadata query --- src/Spago/Command/Fetch.purs | 116 ++++++++++++++++++++++++++++++----- src/Spago/Db.js | 13 ++-- src/Spago/Db.purs | 36 +++++++---- src/Spago/Registry.purs | 52 ++++++++++++---- 4 files changed, 169 insertions(+), 48 deletions(-) diff --git a/src/Spago/Command/Fetch.purs b/src/Spago/Command/Fetch.purs index c64837dd8..4a1febfcd 100644 --- a/src/Spago/Command/Fetch.purs +++ b/src/Spago/Command/Fetch.purs @@ -22,8 +22,10 @@ import Data.Array.NonEmpty as NEA import Data.Codec.JSON as CJ import Data.Codec.JSON.Common as CJ.Common import Data.Either as Either +import Data.Filterable (filterMap) import Data.HTTP.Method as Method import Data.Int as Int +import Data.List as List import Data.Map as Map import Data.Newtype (wrap) import Data.Set as Set @@ -279,16 +281,40 @@ type LockfileBuilderResult = , packages :: Map PackageName Lock.LockEntry } +lookupInCache :: forall a k v. Ord k => k -> Ref.Ref (Map k v) -> Spago a (Maybe v) +lookupInCache key cacheRef = liftEffect $ Ref.read cacheRef >>= Map.lookup key >>> pure + +updateCache :: forall a k v. Ord k => k -> v -> Ref.Ref (Map k v) -> Spago a Unit +updateCache key value cacheRef = liftEffect $ Ref.modify_ (Map.insert key value) cacheRef + writeNewLockfile :: forall a. String -> PackageTransitiveDeps -> Spago (FetchEnv a) PackageTransitiveDeps writeNewLockfile reason allTransitiveDeps = do logInfo $ reason <> ", generating it..." { workspace } <- ask + + -- All these Refs are needed to memoise Db and file reads + packageDependenciesCache <- liftEffect $ Ref.new Map.empty + gitRefCache <- liftEffect $ Ref.new Map.empty + metadataRefCache <- liftEffect $ Ref.new Map.empty let - processPackage :: LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult - processPackage result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do - (packageDependencies :: Array PackageName) <- (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty) - <$> getPackageDependencies dependencyName dependencyPackage + memoisedGetPackageDependencies :: PackageName -> Package -> Spago (FetchEnv a) (Maybe (Map PackageName Range)) + memoisedGetPackageDependencies packageName package = do + lookupInCache packageName packageDependenciesCache >>= + case _ of + Just cached -> do + pure cached + Nothing -> do + -- Not cached. Compute it, write to ref, return it + res <- getPackageDependencies packageName package + updateCache packageName res packageDependenciesCache + pure res + + processPackage :: Map PackageName _ -> LockfileBuilderResult -> Tuple PackageName (Tuple PackageName Package) -> Spago (FetchEnv a) LockfileBuilderResult + processPackage registryIntegrityMap result (Tuple workspacePackageName (Tuple dependencyName dependencyPackage)) = do let + getDeps = (Array.fromFoldable <<< Map.keys <<< fromMaybe Map.empty) + <$> memoisedGetPackageDependencies dependencyName dependencyPackage + updatePackage r package = (updateWorkspacePackage r) { packages = Map.insert dependencyName package r.packages } updateWorkspacePackage r = r @@ -303,28 +329,84 @@ writeNewLockfile reason allTransitiveDeps = do case dependencyPackage of WorkspacePackage _pkg -> pure $ updateWorkspacePackage result + GitPackage gitPackage -> do let packageLocation = Config.getPackageLocation dependencyName dependencyPackage - Git.getRef (Just packageLocation) >>= case _ of - Left err -> die err -- TODO maybe not die here? - Right rev -> pure $ updatePackage result $ FromGit { rev, dependencies: packageDependencies, url: gitPackage.git, subdir: gitPackage.subdir } + lookupInCache packageLocation gitRefCache >>= case _ of + Nothing -> + -- Get the ref and update the cache + Git.getRef (Just packageLocation) >>= case _ of + Left err -> die err -- TODO maybe not die here? + Right rev -> do + dependencies <- getDeps + let + lockEntry = + FromGit { rev, dependencies, url: gitPackage.git, subdir: gitPackage.subdir } + updateCache packageLocation lockEntry gitRefCache + pure $ updatePackage result lockEntry + Just entry -> pure $ updatePackage result entry + RegistryVersion version -> do - metadata <- Registry.getMetadata dependencyName - registryVersion <- case (metadata >>= (\(Metadata meta) -> Either.note "Didn't find version in the metadata file" $ Map.lookup version meta.published)) of - Left err -> die $ "Couldn't read metadata, reason:\n " <> err - Right { hash: integrity } -> - pure { version, integrity, dependencies: packageDependencies } - pure $ updatePackage result $ FromRegistry registryVersion + lookupInCache dependencyName metadataRefCache >>= case _ of + Nothing -> do + registryVersion <- FromRegistry <$> case Map.lookup dependencyName registryIntegrityMap of + -- This shouldn't be Nothing because it's already handled when building the integrity map below + Nothing -> die $ "Couldn't read metadata" + Just integrity -> do + dependencies <- getDeps + pure { version, integrity, dependencies } + updateCache dependencyName registryVersion metadataRefCache + pure $ updatePackage result registryVersion + Just entry -> do + pure $ updatePackage result entry + LocalPackage { path } -> do - pure $ updatePackage result $ FromPath { path, dependencies: packageDependencies } + dependencies <- getDeps + pure $ updatePackage result $ FromPath { path, dependencies } let toArray :: forall k v. Map k v -> Array (Tuple k v) toArray = Map.toUnfoldable + allDependencies = foldMap sequence $ toArray $ map toArray allTransitiveDeps + + -- Fetch the Registry metadata in one go for all required packages + let + uniqueRegistryPackageNames = Array.nub $ filterMap + ( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of + RegistryVersion _ -> Just dependencyName + _ -> Nothing + ) + allDependencies + metadataMap <- Registry.getMetadataForPackages uniqueRegistryPackageNames >>= case _ of + Left err -> die $ "Couldn't read metadata, reason:\n " <> err + Right ms -> pure ms + + (registryVersions :: Map PackageName Sha256) <- Map.fromFoldable <<< Array.catMaybes <$> + ( traverse + ( \(Tuple _ (Tuple dependencyName dependencyPackage)) -> case dependencyPackage of + RegistryVersion version -> do + let metadata = Map.lookup dependencyName metadataMap + case (metadata >>= (\(Metadata meta) -> Map.lookup version meta.published)) of + Nothing | isNothing metadata -> + die $ "Couldn't read metadata for " <> PackageName.print dependencyName + Nothing -> + die $ "Couldn't read metadata for " <> PackageName.print dependencyName + <> ": didn't find version in the metadata file" + Just { hash: integrity } -> + pure $ Just $ dependencyName /\ integrity + _ -> pure Nothing + ) + $ allDependencies + ) + ({ packages, workspacePackages } :: LockfileBuilderResult) <- - Array.foldM processPackage - { workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet), packages: Map.empty } - (foldMap sequence $ toArray $ map toArray allTransitiveDeps) + -- NOTE! We used to have `Array.foldM` here, but it was significantly slower + -- (~10ms vs 6s on a very large project) + List.foldM (processPackage registryVersions) + { workspacePackages: Map.fromFoldable $ map Config.workspacePackageToLockfilePackage (Config.getWorkspacePackages workspace.packageSet) + , packages: Map.empty + } + $ List.fromFoldable allDependencies let lockfile = diff --git a/src/Spago/Db.js b/src/Spago/Db.js index 0ee61f4d9..793ce65e5 100644 --- a/src/Spago/Db.js +++ b/src/Spago/Db.js @@ -107,13 +107,12 @@ export const removeManifestImpl = (db, name, version) => { db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version); } -export const getMetadataImpl = (db, name) => { - const row = db - .prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1") - .get(name); - return row; -} - export const insertMetadataImpl = (db, name, metadata, last_fetched) => { db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata, last_fetched) VALUES (@name, @metadata, @last_fetched)").run({ name, metadata, last_fetched }); } + +export const getMetadataForPackagesImpl = (db, names) => { + // There can be a lot of package names here, potentially hitting the max number of sqlite parameters, so we use json to bypass this + const query = db.prepare("SELECT * FROM package_metadata WHERE name IN (SELECT value FROM json_each(?));"); + return query.all(JSON.stringify(names)); +}; \ No newline at end of file diff --git a/src/Spago/Db.purs b/src/Spago/Db.purs index 76e909296..874df03ce 100644 --- a/src/Spago/Db.purs +++ b/src/Spago/Db.purs @@ -8,6 +8,7 @@ module Spago.Db , getLastPull , getManifest , getMetadata + , getMetadataForPackages , insertManifest , insertMetadata , insertPackageSet @@ -26,6 +27,7 @@ import Data.Codec.JSON.Record as CJ.Record import Data.DateTime (Date, DateTime(..)) import Data.DateTime as DateTime import Data.Either as Either +import Data.Filterable (filterMap) import Data.Formatter.DateTime as DateTime.Format import Data.Map as Map import Data.Nullable (Nullable) @@ -100,18 +102,28 @@ insertManifest :: Db -> PackageName -> Version -> Manifest -> Effect Unit insertManifest db packageName version manifest = Uncurried.runEffectFn4 insertManifestImpl db (PackageName.print packageName) (Version.print version) (printJson Manifest.codec manifest) getMetadata :: Db -> PackageName -> Effect (Maybe Metadata) -getMetadata db packageName = do - maybeMetadataEntry <- Nullable.toMaybe <$> Uncurried.runEffectFn2 getMetadataImpl db (PackageName.print packageName) +getMetadata db packageName = + getMetadataForPackages db [ packageName ] + <#> Map.lookup packageName + +getMetadataForPackages :: Db -> Array PackageName -> Effect (Map PackageName Metadata) +getMetadataForPackages db packageNames = do + metadataEntries <- Uncurried.runEffectFn2 getMetadataForPackagesImpl db (PackageName.print <$> packageNames) now <- Now.nowDateTime - pure $ do - metadataEntry <- maybeMetadataEntry - lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched - -- if the metadata is older than 15 minutes, we consider it stale - case DateTime.diff now lastFetched of - Minutes n | n <= 15.0 -> do - metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata - pure metadata - _ -> Nothing + pure + $ metadataEntries + # + ( filterMap \metadataEntry -> do + packageName <- hush $ PackageName.parse metadataEntry.name + lastFetched <- Either.hush $ DateTime.Format.unformat Internal.Format.iso8601DateTime metadataEntry.last_fetched + -- if the metadata is older than 15 minutes, we consider it stale + case DateTime.diff now lastFetched of + Minutes n | n <= 15.0 -> do + metadata <- Either.hush $ parseJson Metadata.codec metadataEntry.metadata + pure $ packageName /\ metadata + _ -> Nothing + ) + # Map.fromFoldable insertMetadata :: Db -> PackageName -> Metadata -> Effect Unit insertMetadata db packageName metadata@(Metadata { unpublished }) = do @@ -246,4 +258,6 @@ foreign import removeManifestImpl :: EffectFn3 Db String String Unit foreign import getMetadataImpl :: EffectFn2 Db String (Nullable MetadataEntryJs) +foreign import getMetadataForPackagesImpl :: EffectFn2 Db (Array String) (Array MetadataEntryJs) + foreign import insertMetadataImpl :: EffectFn4 Db String String String Unit diff --git a/src/Spago/Registry.purs b/src/Spago/Registry.purs index 8f840a9da..2c2341c39 100644 --- a/src/Spago/Registry.purs +++ b/src/Spago/Registry.purs @@ -7,6 +7,7 @@ module Spago.Registry , findPackageSet , getManifestFromIndex , getMetadata + , getMetadataForPackages , getRegistryFns , listMetadataFiles , listPackageSets @@ -62,6 +63,7 @@ type RegistryEnv a = Record (RegistryEnvRow a) type RegistryFunctions = { getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest) , getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata) + , getMetadataForPackages :: Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata)) , findPackageSet :: Maybe Version -> Spago (PreRegistryEnv ()) Version , listPackageSets :: Spago (PreRegistryEnv ()) (Array Db.PackageSet) , listMetadataFiles :: Spago (LogEnv ()) (Array String) @@ -74,6 +76,12 @@ getMetadata packageName = do { getMetadata: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry runSpago { logOptions } (fn packageName) +getMetadataForPackages :: Array PackageName -> Spago (RegistryEnv _) _ +getMetadataForPackages packageNames = do + { getRegistry, logOptions, db, git, purs, offline } <- ask + { getMetadataForPackages: fn } <- runSpago { logOptions, db, git, purs, offline } getRegistry + runSpago { logOptions } (fn packageNames) + getManifestFromIndex :: PackageName -> Version -> Spago (RegistryEnv _) _ getManifestFromIndex packageName version = do { getRegistry, logOptions, db, git, purs, offline } <- ask @@ -123,6 +131,7 @@ getRegistryFns registryBox registryLock = do registryFns = { getManifestFromIndex: getManifestFromIndexImpl db , getMetadata: getMetadataImpl db + , getMetadataForPackages: getMetadataForPackagesImpl db , listMetadataFiles: FS.ls (Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory ]) , listPackageSets: listPackageSetsImpl , findPackageSet: findPackageSetImpl @@ -200,20 +209,37 @@ getRegistryFns registryBox registryLock = do -- Metadata can change over time (unpublished packages, and new packages), so we need -- to read it from file every time we have a fresh Registry getMetadataImpl :: Db -> PackageName -> Spago (LogEnv ()) (Either String Metadata) -getMetadataImpl db name = do +getMetadataImpl db name = + getMetadataForPackagesImpl db [ name ] + <#> case _ of + Left err -> Left err + Right metadataMap -> case Map.lookup name metadataMap of + Nothing -> Left $ "Failed to get metadata for package: " <> PackageName.print name + Just metadata -> Right metadata + +-- Parallelised version of `getMetadataImpl` +getMetadataForPackagesImpl :: Db -> Array PackageName -> Spago (LogEnv ()) (Either String (Map PackageName Metadata)) +getMetadataForPackagesImpl db names = do -- we first try reading it from the DB - liftEffect (Db.getMetadata db name) >>= case _ of - Just metadata -> do - logDebug $ "Got metadata from DB: " <> PackageName.print name - pure (Right metadata) - _ -> do - -- if we don't have it we try reading it from file - metadataFromFile name >>= case _ of - Left e -> pure (Left e) - Right m -> do - -- and memoize it - liftEffect (Db.insertMetadata db name m) - pure (Right m) + liftEffect (Db.getMetadataForPackages db names) >>= \metadatas -> do + { fail, success } <- partitionEithers <$> parTraverseSpago + ( \name -> do + case Map.lookup name metadatas of + Nothing -> + -- if we don't have it we try reading it from file + metadataFromFile name >>= case _ of + Left e -> pure (Left e) + Right m -> do + -- and memoize it + liftEffect (Db.insertMetadata db name m) + pure (Right $ name /\ m) + Just m -> pure $ Right $ name /\ m + ) + names + case Array.head fail of + Nothing -> pure $ Right $ Map.fromFoldable success + Just f -> pure $ Left $ f + where metadataFromFile pkgName = do let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print pkgName <> ".json" ]