Skip to content
This repository was archived by the owner on Dec 20, 2024. It is now read-only.

Commit 612bf72

Browse files
authored
Merge branch 'main' into reward_from_scores
2 parents a5d2c99 + b778238 commit 612bf72

File tree

11 files changed

+85
-257
lines changed

11 files changed

+85
-257
lines changed

.env.sample

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
VOLUME_PATH=data
2-
APP_DATA_MAX_RETRIES=3
3-
APP_DATA_GIVE_UP_THRESHOLD=100
42

53
# Dune credentials
64
DUNE_API_KEY=
@@ -19,5 +17,8 @@ AWS_BUCKET=
1917
BARN_DB_URL={user}:{password}@{host}:{port}/{database}
2018
PROD_DB_URL={user}:{password}@{host}:{port}/{database}
2119

20+
#Target table for app data sync
21+
APP_DATA_TARGET_TABLE=app_data_mainnet
22+
2223
# IPFS Gateway
2324
IPFS_ACCESS_KEY=

requirements/prod.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
dune-client==0.3.0
1+
dune-client==1.7.4
22
psycopg2-binary>=2.9.3
33
python-dotenv>=0.20.0
44
requests>=2.28.1

src/dune_queries.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from copy import copy
77
from dataclasses import dataclass
88

9-
from dune_client.query import Query
9+
from dune_client.query import QueryBase
1010
from dune_client.types import QueryParameter
1111

1212

@@ -15,14 +15,14 @@ class QueryData:
1515
"""Stores name and a version of the query for each query."""
1616

1717
name: str
18-
query: Query
18+
query: QueryBase
1919

2020
def __init__(self, name: str, query_id: int, filename: str) -> None:
2121
self.name = name
2222
self.filepath = filename
23-
self.query = Query(query_id, name)
23+
self.query = QueryBase(query_id, name)
2424

25-
def with_params(self, params: list[QueryParameter]) -> Query:
25+
def with_params(self, params: list[QueryParameter]) -> QueryBase:
2626
"""
2727
Copies the query and adds parameters to it, returning the copy.
2828
"""

src/fetch/dune.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import sys
88

99
from dune_client.client import DuneClient
10-
from dune_client.query import Query
10+
from dune_client.query import QueryBase
1111
from dune_client.types import DuneRecord
1212
from requests import HTTPError
1313

@@ -25,15 +25,14 @@ class DuneFetcher:
2525

2626
def __init__(
2727
self,
28-
api_key: str,
28+
dune: DuneClient,
2929
) -> None:
3030
"""
3131
Class constructor.
32-
Builds DuneClient from `api_key` along with a logger and FileIO object.
3332
"""
34-
self.dune = DuneClient(api_key)
33+
self.dune = dune
3534

36-
async def fetch(self, query: Query) -> list[DuneRecord]:
35+
async def fetch(self, query: QueryBase) -> list[DuneRecord]:
3736
"""Async Dune Fetcher with some exception handling."""
3837
log.debug(f"Executing {query}")
3938

src/fetch/orderbook.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,14 @@ def get_batch_rewards(cls, block_range: BlockRange) -> DataFrame:
151151
if not barn.empty:
152152
return barn.copy()
153153
return pd.DataFrame()
154+
155+
@classmethod
156+
def get_app_hashes(cls) -> DataFrame:
157+
"""
158+
Fetches all appData hashes and preimages from Prod and Staging DB
159+
"""
160+
app_data_query = open_query("app_hashes.sql")
161+
barn, prod = cls._query_both_dbs(app_data_query, app_data_query)
162+
163+
# We are only interested in unique app data
164+
return pd.concat([prod, barn]).drop_duplicates().reset_index(drop=True)

src/main.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from pathlib import Path
77

88
from dotenv import load_dotenv
9+
from dune_client.client import DuneClient
910

10-
from src.fetch.dune import DuneFetcher
1111
from src.fetch.orderbook import OrderbookFetcher
1212
from src.logger import set_log
1313
from src.models.tables import SyncTable
@@ -50,36 +50,32 @@ def __init__(self) -> None:
5050
volume_path = Path(os.environ["VOLUME_PATH"])
5151
args = ScriptArgs()
5252
aws = AWSClient.new_from_environment()
53+
dune = DuneClient(os.environ["DUNE_API_KEY"])
54+
orderbook = OrderbookFetcher()
5355

5456
if args.sync_table == SyncTable.APP_DATA:
57+
table = os.environ["APP_DATA_TARGET_TABLE"]
58+
assert table, "APP_DATA sync needs a APP_DATA_TARGET_TABLE env"
5559
asyncio.run(
5660
sync_app_data(
57-
aws,
58-
dune=DuneFetcher(os.environ["DUNE_API_KEY"]),
59-
config=AppDataSyncConfig(
60-
volume_path=volume_path,
61-
missing_files_name="missing_app_hashes.json",
62-
max_retries=int(os.environ.get("APP_DATA_MAX_RETRIES", 3)),
63-
give_up_threshold=int(
64-
os.environ.get("APP_DATA_GIVE_UP_THRESHOLD", 100)
65-
),
66-
),
67-
ipfs_access_key=os.environ["IPFS_ACCESS_KEY"],
61+
orderbook,
62+
dune=dune,
63+
config=AppDataSyncConfig(table),
6864
dry_run=args.dry_run,
6965
)
7066
)
7167
elif args.sync_table == SyncTable.ORDER_REWARDS:
7268
sync_order_rewards(
7369
aws,
7470
config=SyncConfig(volume_path),
75-
fetcher=OrderbookFetcher(),
71+
fetcher=orderbook,
7672
dry_run=args.dry_run,
7773
)
7874
elif args.sync_table == SyncTable.BATCH_REWARDS:
7975
sync_batch_rewards(
8076
aws,
8177
config=SyncConfig(volume_path),
82-
fetcher=OrderbookFetcher(),
78+
fetcher=orderbook,
8379
dry_run=args.dry_run,
8480
)
8581
else:

src/sql/app_hash_latest_block.sql

Lines changed: 0 additions & 5 deletions
This file was deleted.

src/sql/app_hashes.sql

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,6 @@
1-
-- App Hashes: https://dune.com/queries/1610025
2-
-- MIN(first_block_seen) = 12153263
3-
-- Nov 16, 2022: Query takes 4 seconds to run for on full block range
4-
with
5-
app_hashes as (
6-
select
7-
min(call_block_number) first_seen_block,
8-
get_json_object(trade, '$.appData') as app_hash
9-
from gnosis_protocol_v2_ethereum.GPv2Settlement_call_settle
10-
lateral view explode(trades) as trade
11-
group by app_hash
12-
)
13-
select
14-
app_hash,
15-
first_seen_block
16-
from app_hashes
17-
where first_seen_block > '{{BlockFrom}}'
18-
and first_seen_block <= '{{BlockTo}}'
1+
-- Selects all known appData hashes and preimages (as string) from the backend database
192

20-
-- For some additional stats,
21-
-- on this data see https://dune.com/queries/1608286
3+
SELECT
4+
concat('0x',encode(contract_app_data, 'hex')) contract_app_data,
5+
encode(full_app_data, 'escape')
6+
FROM app_data

src/sync/app_data.py

Lines changed: 13 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,154 +1,27 @@
11
"""Main Entry point for app_hash sync"""
22

3-
from dune_client.file.interface import FileIO
4-
from dune_client.types import DuneRecord
3+
from dune_client.client import DuneClient
54

6-
from src.fetch.dune import DuneFetcher
7-
from src.fetch.ipfs import Cid
5+
from src.fetch.orderbook import OrderbookFetcher
86
from src.logger import set_log
9-
from src.models.app_data_content import FoundContent, NotFoundContent
10-
from src.models.block_range import BlockRange
11-
from src.models.tables import SyncTable
12-
from src.post.aws import AWSClient
13-
from src.sync.common import last_sync_block
14-
from src.sync.config import SyncConfig, AppDataSyncConfig
15-
from src.sync.record_handler import RecordHandler
16-
from src.sync.upload_handler import UploadHandler
7+
from src.sync.config import AppDataSyncConfig
178

189
log = set_log(__name__)
1910

2011

21-
SYNC_TABLE = SyncTable.APP_DATA
22-
23-
24-
class AppDataHandler(RecordHandler): # pylint:disable=too-many-instance-attributes
25-
"""
26-
This class is responsible for consuming new dune records and missing values from previous runs
27-
it attempts to fetch content for them and filters them into "found" and "not found" as necessary
28-
"""
29-
30-
def __init__( # pylint:disable=too-many-arguments
31-
self,
32-
file_manager: FileIO,
33-
new_rows: list[DuneRecord],
34-
block_range: BlockRange,
35-
config: SyncConfig,
36-
ipfs_access_key: str,
37-
missing_file_name: str,
38-
):
39-
super().__init__(block_range, SYNC_TABLE, config)
40-
self.file_manager = file_manager
41-
self.ipfs_access_key = ipfs_access_key
42-
43-
self._found: list[FoundContent] = []
44-
self._not_found: list[NotFoundContent] = []
45-
46-
self.new_rows = new_rows
47-
self.missing_file_name = missing_file_name
48-
try:
49-
self.missing_values = self.file_manager.load_ndjson(missing_file_name)
50-
except FileNotFoundError:
51-
self.missing_values = []
52-
53-
def num_records(self) -> int:
54-
assert len(self.new_rows) == 0, (
55-
"this function call is not allowed until self.new_rows have been processed! "
56-
"call fetch_content_and_filter first"
57-
)
58-
return len(self._found)
59-
60-
async def _handle_new_records(self, max_retries: int) -> None:
61-
# Drain the dune_results into "found" and "not found" categories
62-
self._found, self._not_found = await Cid.fetch_many(
63-
self.new_rows, self.ipfs_access_key, max_retries
64-
)
65-
66-
async def _handle_missing_records(
67-
self, max_retries: int, give_up_threshold: int
68-
) -> None:
69-
found, not_found = await Cid.fetch_many(
70-
self.missing_values, self.ipfs_access_key, max_retries
71-
)
72-
while found:
73-
self._found.append(found.pop())
74-
while not_found:
75-
row = not_found.pop()
76-
app_hash, attempts = row.app_hash, row.attempts
77-
if attempts > give_up_threshold:
78-
log.debug(
79-
f"No content found after {attempts} attempts for {app_hash} assuming NULL."
80-
)
81-
self._found.append(
82-
FoundContent(
83-
app_hash=app_hash,
84-
first_seen_block=row.first_seen_block,
85-
content={},
86-
)
87-
)
88-
else:
89-
self._not_found.append(row)
90-
91-
def write_found_content(self) -> None:
92-
assert len(self.new_rows) == 0, "Must call _handle_new_records first!"
93-
self.file_manager.write_ndjson(
94-
data=[x.as_dune_record() for x in self._found], name=self.content_filename
95-
)
96-
# When not_found is empty, we want to overwrite the file (hence skip_empty=False)
97-
# This happens when number of attempts exceeds GIVE_UP_THRESHOLD
98-
self.file_manager.write_ndjson(
99-
data=[x.as_dune_record() for x in self._not_found],
100-
name=self.missing_file_name,
101-
skip_empty=False,
102-
)
103-
104-
def write_sync_data(self) -> None:
105-
# Only write these if upload was successful.
106-
self.file_manager.write_csv(
107-
data=[{self.config.sync_column: str(self.block_range.block_to)}],
108-
name=self.config.sync_file,
109-
)
110-
111-
async def fetch_content_and_filter(
112-
self, max_retries: int, give_up_threshold: int
113-
) -> None:
114-
"""
115-
Run loop fetching app_data for hashes,
116-
separates into (found and not found), returning the pair.
117-
"""
118-
await self._handle_new_records(max_retries)
119-
log.info(
120-
f"Attempting to recover missing {len(self.missing_values)} records from previous run"
121-
)
122-
await self._handle_missing_records(max_retries, give_up_threshold)
123-
124-
12512
async def sync_app_data(
126-
aws: AWSClient,
127-
dune: DuneFetcher,
13+
orderbook: OrderbookFetcher,
14+
dune: DuneClient,
12815
config: AppDataSyncConfig,
129-
ipfs_access_key: str,
13016
dry_run: bool,
13117
) -> None:
13218
"""App Data Sync Logic"""
133-
block_range = BlockRange(
134-
block_from=last_sync_block(
135-
aws,
136-
table=SYNC_TABLE,
137-
genesis_block=12153262, # First App Hash Block
138-
),
139-
block_to=await dune.latest_app_hash_block(),
140-
)
141-
142-
data_handler = AppDataHandler(
143-
file_manager=FileIO(config.volume_path / str(SYNC_TABLE)),
144-
new_rows=await dune.get_app_hashes(block_range),
145-
block_range=block_range,
146-
config=config,
147-
ipfs_access_key=ipfs_access_key,
148-
missing_file_name=config.missing_files_name,
149-
)
150-
await data_handler.fetch_content_and_filter(
151-
max_retries=config.max_retries, give_up_threshold=config.give_up_threshold
152-
)
153-
UploadHandler(aws, data_handler, table=SYNC_TABLE).write_and_upload_content(dry_run)
19+
hashes = orderbook.get_app_hashes()
20+
if not dry_run:
21+
dune.upload_csv(
22+
data=hashes.to_csv(index=False),
23+
table_name=config.table,
24+
description=config.description,
25+
is_private=False,
26+
)
15427
log.info("app_data sync run completed successfully")

src/sync/config.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ class SyncConfig:
1919

2020

2121
@dataclass
22-
class AppDataSyncConfig(SyncConfig):
23-
"""Additional data field for app data sync."""
22+
class AppDataSyncConfig:
23+
"""Configuration for app data sync."""
2424

25-
# Maximum number of retries on a single run
26-
max_retries: int = 3
27-
# Total number of accumulated attempts before we assume no content
28-
give_up_threshold: int = 100
29-
# Persisted file where we store the missing results and number of attempts.
30-
missing_files_name: str = "missing_app_hashes.json"
25+
# The name of the table to upload to
26+
table: str = "app_data_test"
27+
# Description of the table (for creation)
28+
description: str = (
29+
"Table containing known CoW Protocol appData hashes and their pre-images"
30+
)

0 commit comments

Comments
 (0)