Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[metrics] Fix lost connection when metrics query database #3852

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions apps/useradmin/src/useradmin/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@
# limitations under the License.

import logging

from datetime import datetime, timedelta

from django.db import connection
from django.db.utils import OperationalError
from prometheus_client import Gauge

from desktop.lib.metrics import global_registry
from desktop.lib.security_util import get_localhost_name

LOG = logging.getLogger()


def active_users():
from useradmin.models import UserProfile
try:
Expand All @@ -32,11 +35,21 @@ def active_users():
first_login=False,
hostname__isnull=False
).count()
except:
except OperationalError as oe:
LOG.debug('active_users recovering from %s' % str(oe))
connection.close()
connection.connect()
wing2fly marked this conversation as resolved.
Show resolved Hide resolved
count = UserProfile.objects.filter(
last_activity__gt=datetime.now() - timedelta(hours=1),
first_login=False,
hostname__isnull=False
).count()
except Exception as e:
LOG.exception('Could not get active_users')
count = 0
return count


global_registry().gauge_callback(
name='users.active.total',
callback=active_users,
Expand All @@ -48,15 +61,24 @@ def active_users():
prometheus_active_users = Gauge('hue_active_users', 'Hue Active Users in All Instances')
prometheus_active_users.set_function(active_users)


def active_users_per_instance():
from useradmin.models import UserProfile
try:
count = UserProfile.objects.filter(last_activity__gt=datetime.now() - timedelta(hours=1), hostname=get_localhost_name()).count()
except:
count = UserProfile.objects.filter(last_activity__gt=datetime.now() - timedelta(hours=1),
hostname=get_localhost_name()).count()
except OperationalError as oe:
LOG.debug('active_users_per_instance recovering from %s' % str(oe))
connection.close()
connection.connect()
count = UserProfile.objects.filter(last_activity__gt=datetime.now() - timedelta(hours=1),
wing2fly marked this conversation as resolved.
Show resolved Hide resolved
hostname=get_localhost_name()).count()
except Exception as e:
LOG.exception('Could not get active_users per instance')
count = 0
return count


global_registry().gauge_callback(
name='users.active',
callback=active_users_per_instance,
Expand Down
43 changes: 32 additions & 11 deletions desktop/core/src/desktop/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,23 @@

from __future__ import absolute_import

from future import standard_library
standard_library.install_aliases()
from builtins import range
import gc
import logging
import multiprocessing
import threading

import multiprocessing
from builtins import range
from datetime import datetime, timedelta
from prometheus_client import Gauge, REGISTRY

from useradmin.models import User
from django.db import connection
from django.db.utils import OperationalError
from future import standard_library
from prometheus_client import REGISTRY, Gauge

from desktop.conf import ENABLE_PROMETHEUS
from desktop.lib.metrics import global_registry
from useradmin.models import User

standard_library.install_aliases()

LOG = logging.getLogger()

Expand All @@ -49,7 +50,9 @@
django_collectors = set()
django_metrics_names = [
name
for name in REGISTRY._names_to_collectors.keys() if name.startswith('django_') and not name.startswith(ALLOWED_DJANGO_PROMETHEUS_METRICS)
for name in REGISTRY._names_to_collectors.keys()
if name.startswith('django_')
and not name.startswith(ALLOWED_DJANGO_PROMETHEUS_METRICS)
]

for metric_name in django_metrics_names:
Expand Down Expand Up @@ -141,14 +144,21 @@

# ------------------------------------------------------------------------------


def user_count():
users = 0
try:
users = User.objects.count()
except:
except OperationalError as oe:
LOG.debug('user_count recovering from %s' % str(oe))
connection.close()
connection.connect()
wing2fly marked this conversation as resolved.
Show resolved Hide resolved
users = User.objects.count()
except Exception as e:
LOG.exception('Metrics: Failed to get number of user accounts')
return users


user_count = global_registry().gauge_callback(
name='users',
callback=user_count,
Expand Down Expand Up @@ -188,19 +198,30 @@ def user_count():

# ------------------------------------------------------------------------------


def num_of_queries():
from desktop.models import Document2 # Avoid circular dependency
from desktop.models import Document2 # Avoid circular dependency
try:
count = Document2.objects.filter(
type__istartswith='query-',
is_history=True,
last_modified__gt=datetime.now() - timedelta(minutes=10)
).count()
except:
except OperationalError as oe:
LOG.debug('num_of_queries recovering from %s' % str(oe))
connection.close()
connection.connect()
wing2fly marked this conversation as resolved.
Show resolved Hide resolved
count = Document2.objects.filter(
type__istartswith='query-',
is_history=True,
last_modified__gt=datetime.now() - timedelta(minutes=10)
).count()
except Exception as e:
LOG.exception('Could not get num_of_queries')
count = 0
return count


global_registry().gauge_callback(
name='queries.number',
callback=num_of_queries,
Expand Down