Skip to content

Commit

Permalink
Merge pull request #1106 from skalenetwork/docker-group
Browse files Browse the repository at this point in the history
Launch telegraf using docker group id
  • Loading branch information
dmytrotkk authored Sep 12, 2024
2 parents 0f75f0e + 7bd9d9e commit 1216034
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 44 deletions.
30 changes: 12 additions & 18 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ jobs:

- name: Install python dependencies
run: |
source ~/ga/bin/activate
python --version
bash ./scripts/install_python_dependencies.sh
source ../.venv/bin/activate
uv pip install -r requirements.txt --prerelease=allow
uv pip install -r requirements-dev.txt
- name: Lint with flake8
run: flake8 .
run: uv run flake8 .

- name: Launch anvil node
run: |
Expand All @@ -39,32 +39,26 @@ jobs:
run: |
bash ./helper-scripts/deploy_test_ima.sh
- name: Cleanup skale-manager image
run: |
docker rmi -f skalenetwork/skale-manager:${{ env.MANAGER_TAG }}
- name: Show stats before tests
if: always()
run: |
sudo lsblk -f
sudo free -h
lsblk -f
free -h
- name: Run core tests
run: |
source ~/ga/bin/activate
python --version
source ../.venv/bin/activate
bash ./scripts/run_core_tests.sh
- name: Cleanup docker artifacts
run: |
docker rm -f $(docker ps -aq)
docker rmi -f $(docker images -q)
- name: Show stats after core tests
if: always()
run: |
sudo lsblk -f
sudo free -h
lsblk -f
free -h
- name: Run firewall tests
run: |
Expand All @@ -73,15 +67,15 @@ jobs:
- name: Show stats after firewall tests
if: always()
run: |
sudo lsblk -f
sudo free -h
lsblk -f
free -h
- name: Cleanup docker artifacts
if: always()
run: |
docker rm -f $(docker ps -aq)
docker rmi -f $(docker images -q)
- name: Run codecov
run: |
source ../.venv/bin/activate
codecov -t $CODECOV_TOKEN
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.7.1
2.7.2
5 changes: 3 additions & 2 deletions core/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from typing import Optional

from tools.helper import process_template
from tools.docker_utils import DockerUtils
from tools.docker_utils import DockerUtils, get_docker_group_id

from tools.configs import SKALE_DIR_HOST
from tools.configs.monitoring import (
Expand Down Expand Up @@ -68,11 +68,12 @@ def ensure_telegraf_running(dutils: Optional[DockerUtils] = None) -> None:
if dutils.is_container_exists(TELEGRAF_CONTAINER_NAME):
dutils.restart(TELEGRAF_CONTAINER_NAME)
else:
group_id = get_docker_group_id()
dutils.run_container(
image_name=TELEGRAF_IMAGE,
name=TELEGRAF_CONTAINER_NAME,
network_mode='host',
user='telegraf:998',
user=f'telegraf:{group_id}',
restart_policy={'name': 'on-failure'},
environment={'HOST_PROC': '/host/proc'},
volumes={
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_core_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ export_test_env
run_sgx_simulator $SGX_WALLET_TAG
bash scripts/run_redis.sh

py.test --cov-config=.coveragerc --cov=. tests/ --ignore=tests/firewall $@
python -m py.test --cov-config=.coveragerc --cov=. tests --ignore=tests/firewall $@
tests_cleanup
2 changes: 1 addition & 1 deletion scripts/run_redis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ set -e

docker rm -f redis || true
export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
docker run -v $DIR/../tests/redis-conf:/config -p 6381:6381 --name=redis -d redis:6.0-alpine
docker run -v $DIR/../tests/redis-conf:/config --network=host --name=redis -d redis:6.0-alpine
38 changes: 22 additions & 16 deletions tests/monitoring_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import json
import os

import pytest

from core.monitoring import TelegrafNotConfiguredError, update_telegraf_service
from tools.configs.monitoring import (
TELEGRAF_TEMPLATE_PATH,
TELEGRAF_CONFIG_PATH
)
from tools.configs import DOCKER_NODE_CONFIG_FILEPATH
from tools.configs.monitoring import TELEGRAF_TEMPLATE_PATH, TELEGRAF_CONFIG_PATH


CONFIG_TEMPLATE = """
Expand All @@ -24,6 +23,8 @@
"""

DOCKER_GROUP_ID = 1023


@pytest.fixture
def cleanup_container(dutils):
Expand All @@ -44,24 +45,29 @@ def telegraf_template():
os.remove(TELEGRAF_CONFIG_PATH)


def test_update_telegraf_service(telegraf_template, cleanup_container, dutils):
@pytest.fixture
def docker_node_config():
try:
with open(DOCKER_NODE_CONFIG_FILEPATH, 'w') as docker_config:
json.dump({'docker_group_id': DOCKER_GROUP_ID}, docker_config)
yield DOCKER_NODE_CONFIG_FILEPATH
finally:
os.remove(DOCKER_NODE_CONFIG_FILEPATH)


def test_update_telegraf_service(docker_node_config, telegraf_template, cleanup_container, dutils):
node_id = 1
node_ip = '1.1.1.1'
with pytest.raises(TelegrafNotConfiguredError):
update_telegraf_service(
node_id=node_id,
node_ip='',
url='http://127.0.0.1:1231',
dutils=dutils
node_id=node_id, node_ip='', url='http://127.0.0.1:1231', dutils=dutils
)

update_telegraf_service(
node_ip,
node_id,
url='http://127.0.0.1:1231',
dutils=dutils
)
update_telegraf_service(node_ip, node_id, url='http://127.0.0.1:1231', dutils=dutils)
with open(TELEGRAF_CONFIG_PATH) as config:
config = config.read()
assert config == '\n[agent]\n interval = "60s"\n hostname = "1.1.1.1"\n omit_hostname = false\n\n[global_tags]\n node_id = "1"\n\n[[outputs.db]]\n alias = "db"\n urls = ["http://127.0.0.1:1231"]\n' # noqa
assert (
config == '\n[agent]\n interval = "60s"\n hostname = "1.1.1.1"\n omit_hostname = false\n\n[global_tags]\n node_id = "1"\n\n[[outputs.db]]\n alias = "db"\n urls = ["http://127.0.0.1:1231"]\n') # noqa
assert dutils.is_container_running('skale_telegraf')
user_info = dutils.get_info('skale_telegraf')['stats']['Config']['User']
assert user_info == f'telegraf:{DOCKER_GROUP_ID}'
6 changes: 5 additions & 1 deletion tests/schains/monitor/containers_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import mock
import time

from unittest import mock

from core.schains.monitor.containers import monitor_schain_container
from core.schains.runner import is_container_exists
Expand Down Expand Up @@ -75,6 +77,8 @@ def test_monitor_schain_container_ec(
schain_name = schain_db

run_custom_schain_container(dutils, schain_name, entrypoint=['sh', 'exit', '1'])
# To make sure container initializaed
time.sleep(2)
with mock.patch('core.schains.monitor.containers.is_volume_exists', return_value=True):
schain_record.set_failed_rpc_count(100)
schain_record.set_restart_count(0)
Expand Down
12 changes: 8 additions & 4 deletions tests/schains/monitor/rpc_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import time
import json
import mock
from time import sleep
from unittest import mock

import freezegun
import requests
Expand Down Expand Up @@ -37,7 +37,7 @@ def test_handle_failed_schain_rpc_exit_time_reached(
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)

dutils.run_container(image_name=image_name, name=container_name, entrypoint='bash -c "exit 0"')
sleep(7)
time.sleep(7)
schain_record.set_failed_rpc_count(100)

container_info = dutils.get_info(container_name)
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_monitor_schain_downloading_snapshot(
dutils.run_container(
image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"'
)
sleep(7)
time.sleep(7)
schain_record.set_failed_rpc_count(100)

container_info = dutils.get_info(container_name)
Expand Down Expand Up @@ -112,6 +112,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers,
dutils.run_container(
image_name=image_name, name=container_name, entrypoint='bash -c "exit 100;"'
)
# Wait for container initialization
time.sleep(2)

schain_record.set_failed_rpc_count(100)
schain_record.set_restart_count(0)
Expand All @@ -126,6 +128,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers,
skaled_status=skaled_status,
dutils=dutils,
)
# Wait for container initialization
time.sleep(2)
assert schain_record.restart_count == 0
container_info = dutils.get_info(container_name)
assert container_info['stats']['State']['FinishedAt'] == finished_at
Expand Down
2 changes: 2 additions & 0 deletions tools/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,5 @@
STATSD_HOST = '127.0.0.1'
STATSD_PORT = 8125
SYNC_NODE = os.getenv('SYNC_NODE') == 'True'

DOCKER_NODE_CONFIG_FILEPATH = os.path.join(NODE_DATA_PATH, 'docker.json')
6 changes: 6 additions & 0 deletions tools/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from docker.models.containers import Container
from docker.models.volumes import Volume

from tools.configs import DOCKER_NODE_CONFIG_FILEPATH
from tools.configs.containers import (
CONTAINER_NOT_FOUND,
CREATED_STATUS,
Expand All @@ -46,6 +47,7 @@
CONTAINER_LOGS_SEPARATOR
)
from tools.configs.logs import REMOVED_CONTAINERS_FOLDER_PATH
from tools.helper import read_json


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -81,6 +83,10 @@ def inner(*args, **kwargs) -> list:
return inner


def get_docker_group_id() -> int:
return read_json(DOCKER_NODE_CONFIG_FILEPATH)['docker_group_id']


class DockerUtils:
docker_lock = multiprocessing.Lock()

Expand Down

0 comments on commit 1216034

Please sign in to comment.