-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.py
94 lines (83 loc) · 3.18 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
## @file
# Copyright (c) 2024, Cory Bennett. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
##
from cloudflare import Cloudflare
from datetime import datetime, timedelta
import json
from os import environ as env
# Since we aren't bundling this script, we can dynamically import src/ modules.
import sys; sys.path.append('src')
from asi import fetch_archetypes, compute_archetype_bigrams
from asi.postgres import start_pool, hash
FORMATS = [
'standard',
'modern',
'pioneer',
'vintage',
'legacy',
'pauper',
]
MIN_DATE = (TIMESTAMP := datetime.now()) - timedelta(days=90)
# Start a PostgreSQL connection pool for the MTGO database.
start_pool()
# Setup a connection to the Cloudflare D1 API.
client = Cloudflare(
api_key=env["CLOUDFLARE_API_KEY"],
api_email=env["CLOUDFLARE_EMAIL"]
)
db = lambda query, **kwargs: client.d1.database.raw(
database_id=env["CLOUDFLARE_DATABASE_ID"],
account_id=env["CLOUDFLARE_ACCOUNT_ID"],
sql=query,
**kwargs
)
for format in FORMATS:
# Create the table if it does not exist.
# This doesn't consume any read/write operations if the table already exists.
db(f"""
CREATE TABLE IF NOT EXISTS {format} (
card TEXT PRIMARY KEY,
entry TEXT,
hash TEXT,
updated_at DEFAULT CURRENT_TIMESTAMP
);
""")
# Flatten bigrams into a single dictionary entry per card for better database
# I/O efficiency. This reduces the n(n-1)/2 space complexity to just n.
flattened_bigram: dict = {}
bigrams = compute_archetype_bigrams(fetch_archetypes(format, MIN_DATE))
for (card1, card2), value in bigrams.items():
# Convert card names to lowercase for case-insensitive search.
card1, card2 = card1.lower(), card2.lower()
# Create nested dictionary entries for each card.
if card1 not in flattened_bigram:
flattened_bigram[card1] = {}
flattened_bigram[card1][card2] = { k: round(v, 8) for k,v in value.items() }
# Batch insert/update bigram entries in the database.
# Allows for inserting 6,000 rows/minute (w/ 1200 requests every 5 minutes).
batch_size = 25
keys = list(sorted(flattened_bigram.keys()))
for i in range(0, len(keys), batch_size):
batch_keys = keys[i:i + batch_size]
batch: dict[str, dict] = { k: flattened_bigram[k] for k in batch_keys }
res = db(f"""
INSERT INTO {format} (card, entry, hash, updated_at)
VALUES
{','.join(['(?, ?, ?, CURRENT_TIMESTAMP)'] * len(batch_keys))}
ON CONFLICT(card) DO UPDATE SET
entry = excluded.entry,
hash = excluded.hash,
updated_at = CURRENT_TIMESTAMP
WHERE excluded.hash != {format}.hash;
""",
params=[item for sublist in [[k, e, hash(e)]
for k,e in map(lambda kv: (kv[0], json.dumps(kv[1])),
batch.items())]
for item in sublist]
)
# Create an index on the hash and updated_at columns for faster lookups.
db(f"CREATE INDEX IF NOT EXISTS {format}_hash ON {format} (hash)")
db(f"CREATE INDEX IF NOT EXISTS {format}_updated_at ON {format} (updated_at)")
# Delete old entries from the database (older than a month).
db(f"DELETE FROM {format} WHERE updated_at < datetime('now', '-1 month')")