Skip to content

Commit c9fd17b

Browse files
authored
Use the database to cache the index and metadata (#1107)
1 parent 796d80e commit c9fd17b

File tree

4 files changed

+191
-142
lines changed

4 files changed

+191
-142
lines changed

bin/src/Main.purs

Lines changed: 53 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,12 @@ import Data.Array.NonEmpty as NEA
88
import Data.Array.NonEmpty as NonEmptyArray
99
import Data.Codec.Argonaut.Common as CA.Common
1010
import Data.Foldable as Foldable
11-
import Data.JSDate as JSDate
1211
import Data.List as List
1312
import Data.Map as Map
1413
import Data.Maybe as Maybe
1514
import Data.String as String
1615
import Effect.Aff as Aff
1716
import Effect.Now as Now
18-
import Effect.Ref as Ref
19-
import Node.FS.Stats (Stats(..))
2017
import Node.Path as Path
2118
import Node.Process as Process
2219
import Options.Applicative (CommandFields, Mod, Parser, ParserPrefs(..))
@@ -27,6 +24,7 @@ import Registry.Constants as Registry.Constants
2724
import Registry.ManifestIndex as ManifestIndex
2825
import Registry.Metadata as Metadata
2926
import Registry.PackageName as PackageName
27+
import Registry.Version as Version
3028
import Spago.Bin.Flags as Flags
3129
import Spago.Command.Build as Build
3230
import Spago.Command.Bundle as Bundle
@@ -912,18 +910,44 @@ mkRegistryEnv offline = do
912910
-- Make sure we have git and purs
913911
git <- Git.getGit
914912
purs <- Purs.getPurs
913+
{ logOptions } <- ask
914+
915+
-- Connect to the database - we need it to keep track of when to pull the Registry,
916+
-- so we don't do it too often
917+
db <- liftEffect $ Db.connect
918+
{ database: Paths.databasePath
919+
, logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions }
920+
}
921+
922+
-- we keep track of how old the latest pull was - if the last pull was recent enough
923+
-- we just move on, otherwise run the fibers
924+
fetchingFreshRegistry <- Registry.shouldFetchRegistryRepos db
925+
when fetchingFreshRegistry do
926+
-- clone the registry and index repo, or update them
927+
logInfo "Refreshing the Registry Index..."
928+
runSpago { logOptions, git, offline } $ parallelise
929+
[ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of
930+
Right _ -> pure unit
931+
Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways"
932+
, Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of
933+
Right _ -> pure unit
934+
Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways"
935+
]
936+
937+
-- Now that we are up to date with the Registry we init/refresh the database
938+
Registry.updatePackageSetsDb db
915939

916-
-- we make a Ref for the Index so that we can memoize the lookup of packages
917-
-- and we don't have to read it all together
918-
indexRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName (Map Version Manifest))
940+
-- Prepare the functions to read the manifests and metadata - here we memoize as much
941+
-- as we can in the DB, so we don't have to read the files every time
919942
let
943+
-- Manifests are immutable so we can just lookup in the DB or read from file if not there
920944
getManifestFromIndex :: PackageName -> Version -> Spago (LogEnv ()) (Maybe Manifest)
921945
getManifestFromIndex name version = do
922-
indexMap <- liftEffect (Ref.read indexRef)
923-
case Map.lookup name indexMap of
924-
Just meta -> pure (Map.lookup version meta)
946+
liftEffect (Db.getManifest db name version) >>= case _ of
947+
Just manifest -> pure (Just manifest)
925948
Nothing -> do
926-
-- if we don't have it we try reading it from file
949+
-- if we don't have it we need to read it from file
950+
-- (note that we have all the versions of a package in the same file)
927951
logDebug $ "Reading package from Index: " <> PackageName.print name
928952
maybeManifests <- liftAff $ ManifestIndex.readEntryFile Paths.registryIndexPath name
929953
manifests <- map (map (\m@(Manifest m') -> Tuple m'.version m)) case maybeManifests of
@@ -932,50 +956,36 @@ mkRegistryEnv offline = do
932956
logWarn $ "Could not read package manifests from index, proceeding anyways. Error: " <> err
933957
pure []
934958
let versions = Map.fromFoldable manifests
935-
liftEffect (Ref.write (Map.insert name versions indexMap) indexRef)
959+
-- and memoize it
960+
for_ manifests \(Tuple _ manifest@(Manifest m)) -> do
961+
logDebug $ "Inserting manifest in DB: " <> PackageName.print name <> " v" <> Version.print m.version
962+
liftEffect $ Db.insertManifest db name m.version manifest
936963
pure (Map.lookup version versions)
937964

938-
-- same deal for the metadata files
939-
metadataRef <- liftEffect $ Ref.new (Map.empty :: Map PackageName Metadata)
965+
-- Metadata can change over time (unpublished packages, and new packages), so we need
966+
-- to read it from file every time we have a fresh Registry
940967
let
968+
metadataFromFile name = do
969+
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ]
970+
logDebug $ "Reading metadata from file: " <> metadataFilePath
971+
liftAff (FS.readJsonFile Metadata.codec metadataFilePath)
972+
941973
getMetadata :: PackageName -> Spago (LogEnv ()) (Either String Metadata)
942974
getMetadata name = do
943-
metadataMap <- liftEffect (Ref.read metadataRef)
944-
case Map.lookup name metadataMap of
945-
Just meta -> pure (Right meta)
946-
Nothing -> do
975+
-- we first try reading it from the DB
976+
liftEffect (Db.getMetadata db name) >>= case _ of
977+
Just metadata | not fetchingFreshRegistry -> do
978+
logDebug $ "Got metadata from DB: " <> PackageName.print name
979+
pure (Right metadata)
980+
_ -> do
947981
-- if we don't have it we try reading it from file
948-
let metadataFilePath = Path.concat [ Paths.registryPath, Registry.Constants.metadataDirectory, PackageName.print name <> ".json" ]
949-
logDebug $ "Reading metadata from file: " <> metadataFilePath
950-
liftAff (FS.readJsonFile Metadata.codec metadataFilePath) >>= case _ of
982+
metadataFromFile name >>= case _ of
951983
Left e -> pure (Left e)
952984
Right m -> do
953985
-- and memoize it
954-
liftEffect (Ref.write (Map.insert name m metadataMap) metadataRef)
986+
liftEffect (Db.insertMetadata db name m)
955987
pure (Right m)
956988

957-
{ logOptions } <- ask
958-
-- we keep track of how old the latest pull was - if the last pull was recent enough
959-
-- we just move on, otherwise run the fibers
960-
whenM shouldFetchRegistryRepos do
961-
-- clone the registry and index repo, or update them
962-
logInfo "Refreshing the Registry Index..."
963-
runSpago { logOptions, git, offline } $ parallelise
964-
[ Git.fetchRepo { git: "https://github.com/purescript/registry-index.git", ref: "main" } Paths.registryIndexPath >>= case _ of
965-
Right _ -> pure unit
966-
Left _err -> logWarn "Couldn't refresh the registry-index, will proceed anyways"
967-
, Git.fetchRepo { git: "https://github.com/purescript/registry.git", ref: "main" } Paths.registryPath >>= case _ of
968-
Right _ -> pure unit
969-
Left _err -> logWarn "Couldn't refresh the registry, will proceed anyways"
970-
]
971-
972-
-- Now that we are up to date with the Registry we init/refresh the database
973-
db <- liftEffect $ Db.connect
974-
{ database: Paths.databasePath
975-
, logger: \str -> Reader.runReaderT (logDebug $ "DB: " <> str) { logOptions }
976-
}
977-
Registry.updatePackageSetsDb db
978-
979989
pure
980990
{ getManifestFromIndex
981991
, getMetadata
@@ -1020,32 +1030,4 @@ mkDocsEnv args dependencies = do
10201030
, open: args.open
10211031
}
10221032

1023-
shouldFetchRegistryRepos :: forall a. Spago (LogEnv a) Boolean
1024-
shouldFetchRegistryRepos = do
1025-
let freshRegistryCanary = Path.concat [ Paths.globalCachePath, "fresh-registry-canary.txt" ]
1026-
FS.stat freshRegistryCanary >>= case _ of
1027-
Left err -> do
1028-
-- If the stat fails the file probably does not exist
1029-
logDebug [ "Could not stat " <> freshRegistryCanary, show err ]
1030-
-- in which case we touch it and fetch
1031-
touch freshRegistryCanary
1032-
pure true
1033-
Right (Stats { mtime }) -> do
1034-
-- it does exist here, see if it's old enough, and fetch if it is
1035-
now <- liftEffect $ JSDate.now
1036-
let minutes = 15.0
1037-
let staleAfter = 1000.0 * 60.0 * minutes -- need this in millis
1038-
let isOldEnough = (JSDate.getTime now) > (JSDate.getTime mtime + staleAfter)
1039-
if isOldEnough then do
1040-
logDebug "Registry index is old, refreshing canary"
1041-
touch freshRegistryCanary
1042-
pure true
1043-
else do
1044-
logDebug "Registry index is fresh enough, moving on..."
1045-
pure false
1046-
where
1047-
touch path = do
1048-
FS.ensureFileSync path
1049-
FS.writeTextFile path ""
1050-
10511033
foreign import supportsColor :: Effect Boolean

src/Spago/Db.js

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export const connectImpl = (path, logger) => {
44
logger("Connecting to database at " + path);
55
let db = new Database(path, {
66
fileMustExist: false,
7-
verbose: logger,
7+
// verbose: logger,
88
});
99
db.pragma("journal_mode = WAL");
1010
db.pragma("foreign_keys = ON");
@@ -19,16 +19,24 @@ export const connectImpl = (path, logger) => {
1919
, packageName TEXT NOT NULL
2020
, packageVersion TEXT NOT NULL
2121
, PRIMARY KEY (packageSetVersion, packageName, packageVersion)
22-
, FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version))`).run();
23-
// TODO: this is here as a placeholder, but not settled yet
24-
// db.prepare(`CREATE TABLE IF NOT EXISTS package_versions
25-
// ( name TEXT NOT NULL
26-
// , version TEXT NOT NULL
27-
// , published INTEGER NOT NULL
28-
// , date TEXT NOT NULL
29-
// , manifest TEXT NOT NULL
30-
// , location TEXT NOT NULL
31-
// , PRIMARY KEY (name, version))`).run();
22+
, FOREIGN KEY (packageSetVersion) REFERENCES package_sets(version)
23+
)`).run();
24+
db.prepare(`CREATE TABLE IF NOT EXISTS last_git_pull
25+
( key TEXT PRIMARY KEY NOT NULL
26+
, date TEXT NOT NULL
27+
)`).run();
28+
db.prepare(`CREATE TABLE IF NOT EXISTS package_metadata
29+
( name TEXT PRIMARY KEY NOT NULL
30+
, metadata TEXT NOT NULL
31+
)`).run();
32+
// it would be lovely if we'd have a foreign key on package_metadata, but that would
33+
// require reading metadatas before manifests, which we can't always guarantee
34+
db.prepare(`CREATE TABLE IF NOT EXISTS package_manifests
35+
( name TEXT NOT NULL
36+
, version TEXT NOT NULL
37+
, manifest TEXT NOT NULL
38+
, PRIMARY KEY (name, version)
39+
)`).run();
3240
return db;
3341
};
3442

@@ -38,12 +46,6 @@ export const insertPackageSetImpl = (db, packageSet) => {
3846
).run(packageSet);
3947
};
4048

41-
export const insertPackageVersionImpl = (db, packageVersion) => {
42-
db.prepare(
43-
"INSERT INTO package_versions (name, version, published, date, manifest, location) VALUES (@name, @version, @published, @date, @manifest, @location)"
44-
).run(packageVersion);
45-
}
46-
4749
export const insertPackageSetEntryImpl = (db, packageSetEntry) => {
4850
db.prepare(
4951
"INSERT INTO package_set_entries (packageSetVersion, packageName, packageVersion) VALUES (@packageSetVersion, @packageName, @packageVersion)"
@@ -64,17 +66,6 @@ export const selectPackageSetsImpl = (db) => {
6466
return row;
6567
}
6668

67-
export const selectPackageVersionImpl = (db, name, version) => {
68-
const row = db
69-
.prepare("SELECT * FROM package_versions WHERE name = ? AND version = ? LIMIT 1")
70-
.get(name, version);
71-
return row;
72-
}
73-
74-
export const unpublishPackageVersionImpl = (db, name, version) => {
75-
db.prepare("UPDATE package_versions SET published = 0 WHERE name = ? AND version = ?").run(name, version);
76-
}
77-
7869
export const selectPackageSetEntriesBySetImpl = (db, packageSetVersion) => {
7970
const row = db
8071
.prepare("SELECT * FROM package_set_entries WHERE packageSetVersion = ?")
@@ -88,3 +79,40 @@ export const selectPackageSetEntriesByPackageImpl = (db, packageName, packageVer
8879
.all(packageName, packageVersion);
8980
return row;
9081
}
82+
83+
export const getLastPullImpl = (db, key) => {
84+
const row = db
85+
.prepare("SELECT * FROM last_git_pull WHERE key = ? LIMIT 1")
86+
.get(key);
87+
return row?.date;
88+
}
89+
90+
export const updateLastPullImpl = (db, key, date) => {
91+
db.prepare("INSERT OR REPLACE INTO last_git_pull (key, date) VALUES (@key, @date)").run({ key, date });
92+
}
93+
94+
export const getManifestImpl = (db, name, version) => {
95+
const row = db
96+
.prepare("SELECT * FROM package_manifests WHERE name = ? AND version = ? LIMIT 1")
97+
.get(name, version);
98+
return row?.manifest;
99+
}
100+
101+
export const insertManifestImpl = (db, name, version, manifest) => {
102+
db.prepare("INSERT OR IGNORE INTO package_manifests (name, version, manifest) VALUES (@name, @version, @manifest)").run({ name, version, manifest });
103+
}
104+
105+
export const removeManifestImpl = (db, name, version) => {
106+
db.prepare("DELETE FROM package_manifests WHERE name = ? AND version = ?").run(name, version);
107+
}
108+
109+
export const getMetadataImpl = (db, name) => {
110+
const row = db
111+
.prepare("SELECT * FROM package_metadata WHERE name = ? LIMIT 1")
112+
.get(name);
113+
return row?.metadata;
114+
}
115+
116+
export const insertMetadataImpl = (db, name, metadata) => {
117+
db.prepare("INSERT OR REPLACE INTO package_metadata (name, metadata) VALUES (@name, @metadata)").run({ name, metadata });
118+
}

0 commit comments

Comments
 (0)