Skip to content

Commit

Permalink
Merge pull request MolSSI#831 from MolSSI/rm_access
Browse files Browse the repository at this point in the history
Add internal job to delete old access log entries
  • Loading branch information
bennybp authored May 7, 2024
2 parents 56b9189 + 5d5bdb5 commit 7260a5a
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 2 deletions.
1 change: 1 addition & 0 deletions qcarchivetesting/qcarchivetesting/testing_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def __init__(

qcf_config["database"] = {"pool_size": 0}
qcf_config["log_access"] = log_access
qcf_config["access_log_keep"] = 1

if ip_tests_enabled:
qcf_config["geoip2_dir"] = geoip_path
Expand Down
40 changes: 40 additions & 0 deletions qcfractal/qcfractal/components/serverinfo/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def __init__(self, root_socket: SQLAlchemySocket):

# Set up access logging
self._access_log_enabled = root_socket.qcf_config.log_access
self._delete_access_log_frequency = 60 * 60 * 24 # one day
self._access_log_keep = root_socket.qcf_config.access_log_keep
self._geoip2_enabled = geoip2_found and self._access_log_enabled

# MOTD contents
Expand All @@ -78,6 +80,31 @@ def __init__(self, root_socket: SQLAlchemySocket):
# Updating the access log with geolocation info. Don't do it right at startup
self.add_internal_job_geolocate_accesses(self._geolocate_accesses_frequency)

# Deleting old access logs. Wait a bit after startup
self.add_internal_job_delete_accesses(2.0)

def add_internal_job_delete_accesses(self, delay: float, *, session: Optional[Session] = None):
"""
Adds an internal job to delete old access log entries
"""

# Only add this if we are going to delete some
if self._access_log_keep <= 0 or not self._access_log_enabled:
return

with self.root_socket.optional_session(session) as session:
self.root_socket.internal_jobs.add(
"delete_access_log",
now_at_utc() + timedelta(seconds=delay),
"serverinfo.delete_old_access_logs",
{},
user_id=None,
unique_name=True,
after_function="serverinfo.add_internal_job_delete_accesses",
after_function_kwargs={"delay": self._delete_access_log_frequency}, # wait one day
session=session,
)

def add_internal_job_update_geoip2(self, delay: float, *, session: Optional[Session] = None):
"""
Adds an internal job to update the geoip database
Expand Down Expand Up @@ -275,6 +302,19 @@ def _lookup_ip(ip_address: str) -> Dict[str, Any]:

session.commit()

def delete_old_access_logs(self, session: Session, job_progress: JobProgress) -> None:
"""
Deletes old access logs (as defined by the configuration)
"""

# we check when adding the job, but double check here
if self._access_log_keep <= 0 or not self._access_log_enabled:
return

before = now_at_utc() - timedelta(days=self._access_log_keep)
num_deleted = self.delete_access_logs(before, session=session)
self._logger.info(f"Deleted {num_deleted} access logs before {before}")

def _load_motd(self, *, session: Optional[Session] = None):
stmt = select(MessageOfTheDayORM).order_by(MessageOfTheDayORM.id)
with self.root_socket.optional_session(session, True) as session:
Expand Down
5 changes: 3 additions & 2 deletions qcfractal/qcfractal/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ class FractalConfig(ConfigBase):

# Access logging
log_access: bool = Field(False, description="Store API access in the database")
access_log_keep: int = Field(0, description="Number of days of access logs to keep. 0 means keep all")

# maxmind_account_id: Optional[int] = Field(None, description="Account ID for MaxMind GeoIP2 service")
maxmind_license_key: Optional[str] = Field(
Expand Down Expand Up @@ -416,8 +417,8 @@ def _root_validator(cls, values):
values.pop("statistics_frequency")
logger.warning("The 'statistics_frequency' setting is no longer and is now ignored")

if "get_server_stats" in values['api_limits']:
values['api_limits'].pop("get_server_stats")
if "get_server_stats" in values["api_limits"]:
values["api_limits"].pop("get_server_stats")
logger.warning("The 'get_server_stats' setting in 'api_limits' is no longer and is now ignored")

return values
Expand Down
49 changes: 49 additions & 0 deletions qcfractal/qcfractal/test_periodics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import time
from datetime import timedelta
from typing import TYPE_CHECKING

import pytest
Expand All @@ -9,6 +10,8 @@
from qcfractal.components.torsiondrive.testing_helpers import submit_test_data as submit_td_test_data
from qcportal.managers import ManagerName, ManagerStatusEnum
from qcportal.record_models import RecordStatusEnum
from qcportal.serverinfo.models import AccessLogQueryFilters
from qcportal.utils import now_at_utc

if TYPE_CHECKING:
from qcarchivetesting.testing_classes import QCATestingSnowflake
Expand Down Expand Up @@ -70,3 +73,49 @@ def test_periodics_service_iteration(snowflake: QCATestingSnowflake):
rec = storage_socket.records.get([id_1, id_2])
assert rec[0]["status"] == RecordStatusEnum.running
assert rec[1]["status"] == RecordStatusEnum.running


def test_periodics_delete_old_access_logs(secure_snowflake: QCATestingSnowflake):
storage_socket = secure_snowflake.get_storage_socket()

read_id = storage_socket.users.get("read_user")["id"]

access1 = {
"module": "api",
"method": "GET",
"full_uri": "/api/v1/datasets",
"ip_address": "127.0.0.1",
"user_agent": "Fake user agent",
"request_duration": 0.24,
"user_id": read_id,
"request_bytes": 123,
"response_bytes": 18273,
"timestamp": now_at_utc() - timedelta(days=2)
}

access2 = {
"module": "api",
"method": "POST",
"full_uri": "/api/v1/records",
"ip_address": "127.0.0.2",
"user_agent": "Fake user agent",
"request_duration": 0.45,
"user_id": read_id,
"request_bytes": 456,
"response_bytes": 12671,
"timestamp": now_at_utc()
}

storage_socket.serverinfo.save_access(access1)
storage_socket.serverinfo.save_access(access2)

accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters())
n_access = len(accesses)

secure_snowflake.start_job_runner()
# There's a set delay at startup before we delete old logs
time.sleep(5.0)

# we only removed the really "old" (manually added) one
accesses = storage_socket.serverinfo.query_access_log(AccessLogQueryFilters())
assert len(accesses) == n_access-1

0 comments on commit 7260a5a

Please sign in to comment.