From 4a016e425502f10fbc1c2569054065084017e649 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 17 Aug 2023 12:45:18 +0200 Subject: [PATCH] Avoid garbage collections on preloaded objects For details see https://github.com/benoitc/gunicorn/issues/1640 and https://instagram-engineering.com/copy-on-write-friendly-python-garbage-collection-ad6ed5233ddf I think this is the most subtle to test change. I believe this is working. I started a gunicorn instance with 4 workers: ``` GALAXY_CONFIG_FILE="config/galaxy.yml" gunicorn 'galaxy.webapps.galaxy.fast_factory:factory()' -k galaxy.webapps.galaxy.workers.Worker --pythonpath lib --bind=localhost:8080 --config lib/galaxy/web_stack/gunicorn_config.py --preload -w 4 ``` Then i use the following script against that instance ``` import threading import requests def req(): for i in range(10000): requests.get('http://localhost:8080/history/current_history_json') for i in range(10): threading.Thread(target=req).start() ``` I see that the memory consumption increases much more *during* requests without this commit. It eventually decreases again, but I think not to the same baseline level (hard to tell without more elaborate testing). I attribute the higher memory load during requests to the fact that the garbage collection requiring to inspect more objects, taking more time to run and therefor not running as fast? I'm really not sure, I think we should just roll this out and see, it should be fairly obvious from the grafana dashboards. --- lib/galaxy/web_stack/gunicorn_config.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/web_stack/gunicorn_config.py b/lib/galaxy/web_stack/gunicorn_config.py index ec63f9e48569..86e81ccc9a15 100644 --- a/lib/galaxy/web_stack/gunicorn_config.py +++ b/lib/galaxy/web_stack/gunicorn_config.py @@ -1,10 +1,15 @@ """ Gunicorn config file based on https://gist.github.com/hynek/ba655c8756924a5febc5285c712a7946 """ +import gc import os import sys +def is_preload_app(): + return "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv + + def on_starting(server): """ Attach a set of IDs that can be temporarily re-used. @@ -45,6 +50,13 @@ def on_reload(server): server._worker_id_overload = set(range(1, server.cfg.workers + 1)) +def when_ready(server): + # freeze objects after preloading app + if is_preload_app(): + gc.freeze() + print("Objects frozen in perm gen: ", gc.get_freeze_count()) + + def pre_fork(server, worker): """ Attach the next free worker_id before forking off. @@ -58,7 +70,8 @@ def post_fork(server, worker): """ os.environ["GUNICORN_WORKER_ID"] = str(worker._worker_id) os.environ["GUNICORN_LISTENERS"] = ",".join(str(bind) for bind in server.LISTENERS) - if "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv: + if is_preload_app(): + gc.enable() from galaxy.web_stack import GunicornApplicationStack GunicornApplicationStack.late_postfork_event.set()