From 3ec4143a308c0e4458c03e49a8667dc93e52b04a Mon Sep 17 00:00:00 2001 From: qcdll Date: Thu, 6 Dec 2018 15:12:30 -0800 Subject: [PATCH 1/3] use tracemalloc and vmprof to track mem usage --- quarkchain/cluster/master.py | 29 +++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 30 insertions(+) diff --git a/quarkchain/cluster/master.py b/quarkchain/cluster/master.py index 53c64c50b..58d5eb2d8 100644 --- a/quarkchain/cluster/master.py +++ b/quarkchain/cluster/master.py @@ -1529,6 +1529,33 @@ def parse_args(): return env +async def tracemalloc_snapshot(env): + import tracemalloc + tracemalloc.start(25) + snapshot_old = None + + # vmprof *** + import vmprof + import os + f=os.open("./vmprofout", os.O_RDWR|os.O_CREAT) + vmprof.enable(f, period=0.5, memory=True) # 2 Hz, see https://github.com/blue-yonder/vmprof-viewer-client + + while True: + # tracemalloc *** + ts = int(time.time()) + snapshot_new = tracemalloc.take_snapshot() + fn = "./mem/{}".format(ts) + snapshot_new.dump(fn) + Logger.warning("dumped mem snapshot to {}".format(fn)) + + if snapshot_old: + diff = snapshot_new.compare_to(snapshot_old, 'lineno') + Logger.warning("[ Top 10 differences ]") + for stat in diff[:10]: + Logger.warning(stat) + snapshot_old = snapshot_new + await asyncio.sleep(60) + def main(): from quarkchain.cluster.jsonrpc import JSONRPCServer @@ -1555,6 +1582,8 @@ def main(): public_json_rpc_server = JSONRPCServer.start_public_server(env, master) private_json_rpc_server = JSONRPCServer.start_private_server(env, master) + asyncio.ensure_future(tracemalloc_snapshot(env)) + try: loop.run_until_complete(master.shutdown_future) except KeyboardInterrupt: diff --git a/requirements.txt b/requirements.txt index 01aaba5a5..17e80447f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ eth-bloom==1.0.0 pyethash>=0.1.27,<1.0.0 py_ecc==1.4.3 eth-hash[pycryptodome]==0.1.4 +vmprof==0.4.12 # p2p pytest>=3.6,<3.7 From c60509fddc79447e2e5c6e11ce9f887f86103cc1 Mon Sep 17 00:00:00 2001 From: qcdll Date: Mon, 10 Dec 2018 13:01:21 -0800 Subject: [PATCH 2/3] add memory profiling for slave --- quarkchain/cluster/slave.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/quarkchain/cluster/slave.py b/quarkchain/cluster/slave.py index e6a6e3191..b38e0eb2e 100644 --- a/quarkchain/cluster/slave.py +++ b/quarkchain/cluster/slave.py @@ -788,6 +788,38 @@ async def connect_to_slave(self, slave_info: SlaveInfo) -> str: return "" +async def tracemalloc_snapshot(slave_id): + import tracemalloc + import time + tracemalloc.start(25) + snapshot_old = None + + # vmprof *** + import vmprof + import os + f=os.open("./vmprofout{}".format(slave_id), os.O_RDWR|os.O_CREAT) + vmprof.enable(f, period=0.5, memory=True) # 2 Hz, see https://github.com/blue-yonder/vmprof-viewer-client + + import pathlib + pathlib.Path("./mem{}".format(slave_id)).mkdir(parents=True, exist_ok=True) + + while True: + # tracemalloc *** + ts = int(time.time()) + snapshot_new = tracemalloc.take_snapshot() + fn = "./mem{}/{}".format(slave_id, ts) + snapshot_new.dump(fn) + Logger.warning("dumped mem snapshot to {}".format(fn)) + + if snapshot_old: + diff = snapshot_new.compare_to(snapshot_old, 'lineno') + Logger.warning("[ Top 10 differences ]") + for stat in diff[:10]: + Logger.warning(stat) + snapshot_old = snapshot_new + await asyncio.sleep(60) + + class SlaveServer: """ Slave node in a cluster """ @@ -886,6 +918,7 @@ async def __start_server(self): ) def start(self): + self.loop.create_task(tracemalloc_snapshot(self.id.decode('ascii'))) self.loop.create_task(self.__start_server()) def start_and_loop(self): From 07283f065eda49915b7224f7869427d439fad960 Mon Sep 17 00:00:00 2001 From: qcdll Date: Wed, 12 Dec 2018 18:42:50 -0800 Subject: [PATCH 3/3] add objgraph ref count; also dumps a few dict size to view growth --- quarkchain/cluster/slave.py | 22 ++++++++++++++++++++-- requirements.txt | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/quarkchain/cluster/slave.py b/quarkchain/cluster/slave.py index b38e0eb2e..ca76be7d4 100644 --- a/quarkchain/cluster/slave.py +++ b/quarkchain/cluster/slave.py @@ -788,7 +788,8 @@ async def connect_to_slave(self, slave_info: SlaveInfo) -> str: return "" -async def tracemalloc_snapshot(slave_id): +async def tracemalloc_snapshot(slave): + slave_id = slave.id.decode('ascii') import tracemalloc import time tracemalloc.start(25) @@ -803,6 +804,8 @@ async def tracemalloc_snapshot(slave_id): import pathlib pathlib.Path("./mem{}".format(slave_id)).mkdir(parents=True, exist_ok=True) + import objgraph + while True: # tracemalloc *** ts = int(time.time()) @@ -817,6 +820,21 @@ async def tracemalloc_snapshot(slave_id): for stat in diff[:10]: Logger.warning(stat) snapshot_old = snapshot_new + Logger.warning(objgraph.growth()) + Logger.warning(objgraph.most_common_types(limit=20)) + Logger.warning("slave.add_block_futures={}".format(len(slave.add_block_futures))) + for branch, shard in slave.shards.items(): + Logger.warning("shard.state.new_block_pool={}".format(len(shard.state.new_block_pool))) + + Logger.warning("shard.add_block_futures={}".format(len(shard.add_block_futures))) + + Logger.warning("shard.state.db.m_header_pool={}".format(len(shard.state.db.m_header_pool))) + Logger.warning("shard.state.db.m_meta_pool={}".format(len(shard.state.db.m_meta_pool))) + Logger.warning("shard.state.db.x_shard_set={}".format(len(shard.state.db.x_shard_set))) + Logger.warning("shard.state.db.r_header_pool={}".format(len(shard.state.db.r_header_pool))) + Logger.warning("shard.state.db.r_minor_header_pool={}".format(len(shard.state.db.r_minor_header_pool))) + Logger.warning("shard.state.db.height_to_minor_block_hashes={}".format(len(shard.state.db.height_to_minor_block_hashes))) + await asyncio.sleep(60) @@ -918,7 +936,7 @@ async def __start_server(self): ) def start(self): - self.loop.create_task(tracemalloc_snapshot(self.id.decode('ascii'))) + self.loop.create_task(tracemalloc_snapshot(self)) self.loop.create_task(self.__start_server()) def start_and_loop(self): diff --git a/requirements.txt b/requirements.txt index 17e80447f..f896c91e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ pyethash>=0.1.27,<1.0.0 py_ecc==1.4.3 eth-hash[pycryptodome]==0.1.4 vmprof==0.4.12 +objgraph==3.4.0 # p2p pytest>=3.6,<3.7