Skip to content

Commit

Permalink
Optimize ERC20 addresses loading
Browse files Browse the repository at this point in the history
- Improve RAM utilization using a database iterator
  • Loading branch information
Uxio0 committed Nov 14, 2024
1 parent 3635d19 commit b1f32b0
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 6 deletions.
3 changes: 3 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,9 @@
ETH_REORG_BLOCKS = env.int(
"ETH_REORG_BLOCKS", default=200 if ETH_L2_NETWORK else 10
) # Number of blocks from the current block number needed to consider a block valid/stable
ETH_ERC20_LOAD_ADDRESSES_CHUNK_SIZE = env.int(
"ETH_ERC20_LOAD_ADDRESSES_CHUNK_SIZE", default=500_000
) # Load Safe addresses for the ERC20 indexer with a database iterator with the defined `chunk_size`

# Events processing
# ------------------------------------------------------------------------------
Expand Down
39 changes: 33 additions & 6 deletions safe_transaction_service/history/indexers/erc20_events_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ def __new__(cls):
def get_new_instance(cls) -> "Erc20EventsIndexer":
from django.conf import settings

return Erc20EventsIndexer(EthereumClient(settings.ETHEREUM_NODE_URL))
return Erc20EventsIndexer(
EthereumClient(settings.ETHEREUM_NODE_URL),
eth_erc20_load_addresses_chunk_size=settings.ETH_ERC20_LOAD_ADDRESSES_CHUNK_SIZE,
)

@classmethod
def del_singleton(cls):
Expand All @@ -65,6 +68,9 @@ def __init__(self, *args, **kwargs):

self._processed_element_cache = FixedSizeDict(maxlen=40_000) # Around 3MiB
self.addresses_cache: Optional[AddressesCache] = None
self.eth_erc20_load_addresses_chunk_size = kwargs.get(
"eth_erc20_load_addresses_chunk_size", 500_000
)

@property
def contract_events(self) -> List[ContractEvent]:
Expand Down Expand Up @@ -252,15 +258,36 @@ def get_almost_updated_addresses(
addresses = set()
last_checked = None

for created, address in query.values_list("created", "address").order_by(
"created"
"""
Chunk size optimization
-----------------------
Testing with 3M Safes
2k - 90 seconds
100k - 73 seconds
500k - 60 seconds
1M - 60 seconds
3M - 60 seconds
Testing with 15M Safes
50k - 854 seconds
500k - 460 seconds
750k - 415 seconds
1M - 430 seconds
2M - 398 seconds
3M - 407 seconds
500k sounds like a good compromise memory/speed wise
"""
created: Optional[datetime.datetime] = None
for created, address in (
query.values_list("created", "address")
.order_by("created")
.iterator(chunk_size=self.eth_erc20_load_addresses_chunk_size)
):
addresses.add(address)

try:
if created:
last_checked = created
except NameError: # database query empty, `created` not defined
pass

if last_checked:
# Don't use caching if list is empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(
updated_blocks_behind: int = 20,
query_chunk_size: Optional[int] = 1_000,
block_auto_process_limit: bool = True,
**kwargs,
):
"""
:param ethereum_client:
Expand Down

0 comments on commit b1f32b0

Please sign in to comment.