diff --git a/quarkchain/cluster/master.py b/quarkchain/cluster/master.py index 53c64c50b..58d5eb2d8 100644 --- a/quarkchain/cluster/master.py +++ b/quarkchain/cluster/master.py @@ -1529,6 +1529,33 @@ def parse_args(): return env +async def tracemalloc_snapshot(env): + import tracemalloc + tracemalloc.start(25) + snapshot_old = None + + # vmprof *** + import vmprof + import os + f=os.open("./vmprofout", os.O_RDWR|os.O_CREAT) + vmprof.enable(f, period=0.5, memory=True) # 2 Hz, see https://github.com/blue-yonder/vmprof-viewer-client + + while True: + # tracemalloc *** + ts = int(time.time()) + snapshot_new = tracemalloc.take_snapshot() + fn = "./mem/{}".format(ts) + snapshot_new.dump(fn) + Logger.warning("dumped mem snapshot to {}".format(fn)) + + if snapshot_old: + diff = snapshot_new.compare_to(snapshot_old, 'lineno') + Logger.warning("[ Top 10 differences ]") + for stat in diff[:10]: + Logger.warning(stat) + snapshot_old = snapshot_new + await asyncio.sleep(60) + def main(): from quarkchain.cluster.jsonrpc import JSONRPCServer @@ -1555,6 +1582,8 @@ def main(): public_json_rpc_server = JSONRPCServer.start_public_server(env, master) private_json_rpc_server = JSONRPCServer.start_private_server(env, master) + asyncio.ensure_future(tracemalloc_snapshot(env)) + try: loop.run_until_complete(master.shutdown_future) except KeyboardInterrupt: diff --git a/quarkchain/cluster/slave.py b/quarkchain/cluster/slave.py index e6a6e3191..ca76be7d4 100644 --- a/quarkchain/cluster/slave.py +++ b/quarkchain/cluster/slave.py @@ -788,6 +788,56 @@ async def connect_to_slave(self, slave_info: SlaveInfo) -> str: return "" +async def tracemalloc_snapshot(slave): + slave_id = slave.id.decode('ascii') + import tracemalloc + import time + tracemalloc.start(25) + snapshot_old = None + + # vmprof *** + import vmprof + import os + f=os.open("./vmprofout{}".format(slave_id), os.O_RDWR|os.O_CREAT) + vmprof.enable(f, period=0.5, memory=True) # 2 Hz, see https://github.com/blue-yonder/vmprof-viewer-client + + import pathlib + pathlib.Path("./mem{}".format(slave_id)).mkdir(parents=True, exist_ok=True) + + import objgraph + + while True: + # tracemalloc *** + ts = int(time.time()) + snapshot_new = tracemalloc.take_snapshot() + fn = "./mem{}/{}".format(slave_id, ts) + snapshot_new.dump(fn) + Logger.warning("dumped mem snapshot to {}".format(fn)) + + if snapshot_old: + diff = snapshot_new.compare_to(snapshot_old, 'lineno') + Logger.warning("[ Top 10 differences ]") + for stat in diff[:10]: + Logger.warning(stat) + snapshot_old = snapshot_new + Logger.warning(objgraph.growth()) + Logger.warning(objgraph.most_common_types(limit=20)) + Logger.warning("slave.add_block_futures={}".format(len(slave.add_block_futures))) + for branch, shard in slave.shards.items(): + Logger.warning("shard.state.new_block_pool={}".format(len(shard.state.new_block_pool))) + + Logger.warning("shard.add_block_futures={}".format(len(shard.add_block_futures))) + + Logger.warning("shard.state.db.m_header_pool={}".format(len(shard.state.db.m_header_pool))) + Logger.warning("shard.state.db.m_meta_pool={}".format(len(shard.state.db.m_meta_pool))) + Logger.warning("shard.state.db.x_shard_set={}".format(len(shard.state.db.x_shard_set))) + Logger.warning("shard.state.db.r_header_pool={}".format(len(shard.state.db.r_header_pool))) + Logger.warning("shard.state.db.r_minor_header_pool={}".format(len(shard.state.db.r_minor_header_pool))) + Logger.warning("shard.state.db.height_to_minor_block_hashes={}".format(len(shard.state.db.height_to_minor_block_hashes))) + + await asyncio.sleep(60) + + class SlaveServer: """ Slave node in a cluster """ @@ -886,6 +936,7 @@ async def __start_server(self): ) def start(self): + self.loop.create_task(tracemalloc_snapshot(self)) self.loop.create_task(self.__start_server()) def start_and_loop(self): diff --git a/requirements.txt b/requirements.txt index 01aaba5a5..f896c91e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,8 @@ eth-bloom==1.0.0 pyethash>=0.1.27,<1.0.0 py_ecc==1.4.3 eth-hash[pycryptodome]==0.1.4 +vmprof==0.4.12 +objgraph==3.4.0 # p2p pytest>=3.6,<3.7