From 4a016e425502f10fbc1c2569054065084017e649 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Thu, 17 Aug 2023 12:45:18 +0200
Subject: [PATCH] Avoid garbage collections on preloaded objects

For details see https://github.com/benoitc/gunicorn/issues/1640 and
https://instagram-engineering.com/copy-on-write-friendly-python-garbage-collection-ad6ed5233ddf

I think this is the most subtle to test change. I believe this is
working.

I started a gunicorn instance with 4 workers:

```
GALAXY_CONFIG_FILE="config/galaxy.yml" gunicorn 'galaxy.webapps.galaxy.fast_factory:factory()' -k galaxy.webapps.galaxy.workers.Worker --pythonpath lib --bind=localhost:8080 --config lib/galaxy/web_stack/gunicorn_config.py --preload -w 4
```

Then i use the following script against that instance

```
import threading
import requests

def req():
    for i in range(10000):
        requests.get('http://localhost:8080/history/current_history_json')

for i in range(10):
    threading.Thread(target=req).start()
```

I see that the memory consumption increases much more *during* requests
without this commit. It eventually decreases again, but I think not to
the same baseline level (hard to tell without more elaborate testing). I
attribute the higher memory load during requests to the fact that the
garbage collection requiring to inspect more objects, taking more time
to run and therefor not running as fast? I'm really not sure, I think we
should just roll this out and see, it should be fairly obvious from the
grafana dashboards.
---
 lib/galaxy/web_stack/gunicorn_config.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/galaxy/web_stack/gunicorn_config.py b/lib/galaxy/web_stack/gunicorn_config.py
index ec63f9e48569..86e81ccc9a15 100644
--- a/lib/galaxy/web_stack/gunicorn_config.py
+++ b/lib/galaxy/web_stack/gunicorn_config.py
@@ -1,10 +1,15 @@
 """
 Gunicorn config file based on https://gist.github.com/hynek/ba655c8756924a5febc5285c712a7946
 """
+import gc
 import os
 import sys
 
 
+def is_preload_app():
+    return "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv
+
+
 def on_starting(server):
     """
     Attach a set of IDs that can be temporarily re-used.
@@ -45,6 +50,13 @@ def on_reload(server):
     server._worker_id_overload = set(range(1, server.cfg.workers + 1))
 
 
+def when_ready(server):
+    # freeze objects after preloading app
+    if is_preload_app():
+        gc.freeze()
+        print("Objects frozen in perm gen: ", gc.get_freeze_count())
+
+
 def pre_fork(server, worker):
     """
     Attach the next free worker_id before forking off.
@@ -58,7 +70,8 @@ def post_fork(server, worker):
     """
     os.environ["GUNICORN_WORKER_ID"] = str(worker._worker_id)
     os.environ["GUNICORN_LISTENERS"] = ",".join(str(bind) for bind in server.LISTENERS)
-    if "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv:
+    if is_preload_app():
+        gc.enable()
         from galaxy.web_stack import GunicornApplicationStack
 
         GunicornApplicationStack.late_postfork_event.set()