Skip to content

Commit

Permalink
[DOP-22268] - move spark logs to json format (#172)
Browse files Browse the repository at this point in the history
* [DOP-22268] - move spark logs to json format

* [DOP-22268] - remove CORRELATION_CELERY_HEADER_ID setting
  • Loading branch information
maxim-lixakov authored Dec 17, 2024
1 parent 7e4d1bc commit d8bc63c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 31 deletions.
2 changes: 1 addition & 1 deletion docs/worker/log_url.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ The configuration parameter is:
SYNCMASTER__SERVER__LOG_URL_TEMPLATE=https://grafana.example.com?correlation_id={{ correlation_id }}&run_id={{ run.id }}
You can search for each run by either its correlation id ``CORRELATION_CELERY_HEADER_ID`` in http headers or the ``Run.Id``.
You can search for each run by either its correlation id ``x-request-id`` in http headers or the ``Run.Id``.

9 changes: 1 addition & 8 deletions syncmaster/worker/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,9 @@ class WorkerSettings(BaseSettings):
.. code-block:: bash
SYNCMASTER__WORKER__CORRELATION_CELERY_HEADER_ID=CORRELATION_ID_CELERY
SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=custom_syncmaster.spark.get_worker_spark_session
"""

CORRELATION_CELERY_HEADER_ID: str = Field(
"CORRELATION_CELERY_HEADER_ID",
description="Header ID for correlation in Celery",
)
CREATE_SPARK_SESSION_FUNCTION: ImportString = Field(
"syncmaster.worker.spark.get_worker_spark_session",
description="Function to create Spark session for worker",
Expand All @@ -51,9 +47,6 @@ class WorkerAppSettings(BaseSettings):
.. code-block:: bash
# Example of setting a CORRELATION_CELERY_HEADER_ID via environment variable
SYNCMASTER__WORKER__CORRELATION_CELERY_HEADER_ID=CORRELATION_ID_CELERY
# Example of setting a database URL via environment variable
SYNCMASTER__DATABASE__URL=postgresql+asyncpg://user:password@localhost:5432/dbname
Expand Down
26 changes: 4 additions & 22 deletions syncmaster/worker/transfer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# SPDX-FileCopyrightText: 2023-2024 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
import logging
from datetime import datetime, timezone

import onetl
from asgi_correlation_id import correlation_id
from asgi_correlation_id.extensions.celery import load_correlation_ids
from celery import Celery
from celery.signals import after_setup_logger, before_task_publish, task_prerun
from celery.signals import after_setup_task_logger
from celery.utils.log import get_task_logger
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
Expand All @@ -20,14 +18,13 @@
from syncmaster.worker.settings import WorkerAppSettings

logger = get_task_logger(__name__)
load_correlation_ids()

WORKER_SETTINGS = WorkerAppSettings()
CORRELATION_CELERY_HEADER_ID = WORKER_SETTINGS.worker.CORRELATION_CELERY_HEADER_ID


@celery.task(name="run_transfer_task", bind=True, track_started=True)
def run_transfer_task(self: Celery, run_id: int) -> None:
onetl.log.setup_logging(level=logging.INFO)
with Session(self.engine) as session:
run_transfer(
session=session,
Expand Down Expand Up @@ -83,21 +80,6 @@ def run_transfer(session: Session, run_id: int, settings: WorkerAppSettings):
session.commit()


@after_setup_logger.connect
@after_setup_task_logger.connect
def setup_loggers(*args, **kwargs):
setup_logging(WorkerAppSettings().logging.get_log_config_path())


@before_task_publish.connect()
def transfer_correlation_id(headers, *args, **kwargs) -> None:
# This is called before task.delay() finishes
# Here we're able to transfer the correlation ID via the headers kept in our backend
headers[CORRELATION_CELERY_HEADER_ID] = correlation_id.get()


@task_prerun.connect()
def load_correlation_id(task, *args, **kwargs) -> None:
# This is called when the worker picks up the task
# Here we're able to load the correlation ID from the headers
id_value = task.request.get(CORRELATION_CELERY_HEADER_ID)
correlation_id.set(id_value)

0 comments on commit d8bc63c

Please sign in to comment.