Skip to content

Commit

Permalink
Merge pull request #1094 from skalenetwork/fix-repair
Browse files Browse the repository at this point in the history
Fix repair
  • Loading branch information
badrogger authored Sep 23, 2024
2 parents f760de9 + d690b55 commit 8be5567
Show file tree
Hide file tree
Showing 22 changed files with 395 additions and 466 deletions.
20 changes: 12 additions & 8 deletions core/schains/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,16 @@ def cleanup_schain(
dutils=dutils,
sync_node=SYNC_NODE,
)
status = checks.get_all()
if status['skaled_container'] or is_exited(
schain_name, container_type=ContainerType.schain, dutils=dutils
check_status = checks.get_all()
if check_status['skaled_container'] or is_exited(
schain_name,
container_type=ContainerType.schain,
dutils=dutils
):
remove_schain_container(schain_name, dutils=dutils)
if status['volume']:
if check_status['volume']:
remove_schain_volume(schain_name, dutils=dutils)
if status['firewall_rules']:
if check_status['firewall_rules']:
conf = ConfigFileManager(schain_name).skaled_config
base_port = get_base_port_from_config(conf)
own_ip = get_own_ip_from_config(conf)
Expand All @@ -256,11 +258,13 @@ def cleanup_schain(
rc.configure(base_port=base_port, own_ip=own_ip, node_ips=node_ips, sync_ip_ranges=ranges)
rc.cleanup()
if estate is not None and estate.ima_linked:
if status.get('ima_container', False) or is_exited(
schain_name, container_type=ContainerType.ima, dutils=dutils
if check_status.get('ima_container', False) or is_exited(
schain_name,
container_type=ContainerType.ima,
dutils=dutils
):
remove_ima_container(schain_name, dutils=dutils)
if status['config_dir']:
if check_status['config_dir']:
remove_config_dir(schain_name)
mark_schain_deleted(schain_name)

Expand Down
4 changes: 3 additions & 1 deletion core/schains/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from typing import Optional

from core.schains.config.file_manager import ConfigFileManager
from core.schains.config.helper import get_schain_ports_from_config
from core.schains.config.main import get_skaled_container_config_path
Expand All @@ -34,7 +36,7 @@ def get_schain_container_cmd(
download_snapshot: bool = False,
enable_ssl: bool = True,
sync_node: bool = False,
snapshot_from: str = ''
snapshot_from: Optional[str] = None
) -> str:
"""Returns parameters that will be passed to skaled binary in the sChain container"""
opts = get_schain_container_base_opts(schain_name, enable_ssl=enable_ssl, sync_node=sync_node)
Expand Down
7 changes: 6 additions & 1 deletion core/schains/config/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@

from tools.configs.schains import (
BASE_SCHAIN_CONFIG_FILEPATH,
NODE_CLI_STATUS_FILENAME,
SCHAINS_DIR_PATH,
SCHAINS_DIR_PATH_HOST,
SCHAIN_SCHECKS_FILENAME,
SKALED_STATUS_FILENAME
SKALED_STATUS_FILENAME,
)


Expand Down Expand Up @@ -58,6 +59,10 @@ def skaled_status_filepath(name: str) -> str:
return os.path.join(schain_config_dir(name), SKALED_STATUS_FILENAME)


def node_cli_status_filepath(name: str) -> str:
return os.path.join(schain_config_dir(name), NODE_CLI_STATUS_FILENAME)


def get_schain_check_filepath(schain_name):
schain_dir_path = schain_config_dir(schain_name)
return os.path.join(schain_dir_path, SCHAIN_SCHECKS_FILENAME)
Expand Down
6 changes: 4 additions & 2 deletions core/schains/config/static_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
from core.schains.config.helper import get_static_params
from tools.configs import ENV_TYPE

from typing import Optional


def get_static_schain_cmd(env_type: str = ENV_TYPE) -> list:
static_params = get_static_params(env_type)
return static_params['schain_cmd']


def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> dict | None:
def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> Optional[dict]:
static_params = get_static_params(env_type)
static_params_schain = static_params['schain']
processed_params = {}
Expand All @@ -36,7 +38,7 @@ def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> dict |
return processed_params


def get_schain_static_param(static_param_schain: dict | int, schain_name: str) -> int:
def get_schain_static_param(static_param_schain: dict, schain_name: str) -> int:
if isinstance(static_param_schain, int):
return static_param_schain
elif isinstance(static_param_schain, dict) and schain_name in static_param_schain:
Expand Down
22 changes: 15 additions & 7 deletions core/schains/monitor/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,18 @@
run_dkg,
save_dkg_results
)
from core.schains.ima import get_migration_ts as get_ima_migration_ts

from core.schains.cleaner import (
remove_ima_container,
remove_schain_container,
remove_schain_volume
)
from core.schains.ima import get_migration_ts as get_ima_migration_ts, ImaData
from core.schains.status import NodeCliStatus
from core.schains.firewall.types import IRuleController

from core.schains.volume import init_data_volume
from core.schains.exit_scheduler import ExitScheduleFileManager

from core.schains.limits import get_schain_type

from core.schains.monitor.containers import monitor_schain_container, monitor_ima_container
from core.schains.monitor.rpc import handle_failed_schain_rpc
from core.schains.runner import (
Expand All @@ -70,9 +68,8 @@
get_node_ips_from_config,
get_own_ip_from_config
)
from core.schains.ima import ImaData
from core.schains.external_config import ExternalConfig, ExternalState
from core.schains.skaled_status import init_skaled_status
from core.schains.status import init_skaled_status
from core.schains.ssl import update_ssl_change_date

from tools.configs import SYNC_NODE
Expand Down Expand Up @@ -299,6 +296,7 @@ def __init__(
rule_controller: IRuleController,
checks: SkaledChecks,
node_config: NodeConfig,
ncli_status: NodeCliStatus,
econfig: Optional[ExternalConfig] = None,
dutils: DockerUtils = None,
node_options: NodeOptions = None
Expand All @@ -321,6 +319,7 @@ def __init__(
self.statsd_client = get_statsd_client()

self.node_options = node_options or NodeOptions()
self.ncli_status = ncli_status

super().__init__(name=schain.name)

Expand Down Expand Up @@ -375,11 +374,13 @@ def skaled_container(
download_snapshot,
start_ts
)
snapshot_from = self.ncli_status.snapshot_from if self.ncli_status else None
monitor_schain_container(
self.schain,
schain_record=self.schain_record,
skaled_status=self.skaled_status,
download_snapshot=download_snapshot,
snapshot_from=snapshot_from,
start_ts=start_ts,
abort_on_exit=abort_on_exit,
dutils=self.dutils,
Expand Down Expand Up @@ -556,4 +557,11 @@ def notify_repair_mode(self) -> None:
@BaseActionManager.monitor_block
def disable_repair_mode(self) -> None:
logger.info('Switching off repair mode')
self.schain_record.set_repair_mode(False)
if self.schain_record.repair_mode:
self.schain_record.set_repair_mode(False)

@BaseActionManager.monitor_block
def update_repair_ts(self, new_ts: int) -> None:
logger.info('Setting repair_ts to %d', new_ts)
new_dt = datetime.utcfromtimestamp(new_ts)
self.schain_record.set_repair_date(new_dt)
3 changes: 2 additions & 1 deletion core/schains/monitor/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def monitor_schain_container(
skaled_status,
download_snapshot=False,
start_ts=None,
snapshot_from: Optional[str] = None,
abort_on_exit: bool = True,
dutils: Optional[DockerUtils] = None,
sync_node: bool = False,
Expand Down Expand Up @@ -83,7 +84,7 @@ def monitor_schain_container(
download_snapshot=download_snapshot,
start_ts=start_ts,
dutils=dutils,
snapshot_from=schain_record.snapshot_from,
snapshot_from=snapshot_from,
sync_node=sync_node,
historic_state=historic_state,
)
Expand Down
15 changes: 9 additions & 6 deletions core/schains/monitor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from core.schains.external_config import ExternalConfig, ExternalState
from core.schains.task import keep_tasks_running, Task
from core.schains.config.static_params import get_automatic_repair_option
from core.schains.skaled_status import get_skaled_status
from core.schains.status import get_node_cli_status, get_skaled_status
from core.node import get_current_nodes

from tools.docker_utils import DockerUtils
Expand Down Expand Up @@ -141,30 +141,33 @@ def run_skaled_pipeline(
)

skaled_status = get_skaled_status(name)
ncli_status = get_node_cli_status(name)

skaled_am = SkaledActionManager(
schain=schain,
rule_controller=rc,
checks=skaled_checks,
node_config=node_config,
ncli_status=ncli_status,
econfig=ExternalConfig(name),
dutils=dutils,
)
status = skaled_checks.get_all(log=False, expose=True)
check_status = skaled_checks.get_all(log=False, expose=True)
automatic_repair = get_automatic_repair_option()
api_status = get_api_checks_status(status=status, allowed=TG_ALLOWED_CHECKS)
api_status = get_api_checks_status(status=check_status, allowed=TG_ALLOWED_CHECKS)
notify_checks(name, node_config.all(), api_status)

logger.info('Skaled status: %s', status)
logger.info('Skaled check status: %s', check_status)

logger.info('Upstream config %s', skaled_am.upstream_config_path)

mon = get_skaled_monitor(
action_manager=skaled_am,
status=status,
check_status=check_status,
schain_record=schain_record,
skaled_status=skaled_status,
automatic_repair=automatic_repair,
ncli_status=ncli_status,
automatic_repair=automatic_repair
)

statsd_client = get_statsd_client()
Expand Down
Loading

0 comments on commit 8be5567

Please sign in to comment.