Skip to content

Commit f97487a

Browse files
committed
T7101: Add support for hardware watchdog support via systemd
1 parent b50808a commit f97487a

File tree

3 files changed

+290
-0
lines changed

3 files changed

+290
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
### Autogenerated by system_watchdog.py ###
2+
[Manager]
3+
RuntimeWatchdogSec={{ timeout }}
4+
ShutdownWatchdogSec={{ shutdown_timeout }}
5+
RebootWatchdogSec={{ reboot_timeout }}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
<?xml version="1.0"?>
2+
<interfaceDefinition>
3+
<node name="system">
4+
<children>
5+
<node name="watchdog" owner="${vyos_conf_scripts_dir}/system_watchdog.py">
6+
<properties>
7+
<help>Hardware watchdog configuration</help>
8+
<priority>9999</priority>
9+
</properties>
10+
<children>
11+
<leafNode name="enable">
12+
<properties>
13+
<help>Enable hardware watchdog</help>
14+
<valueless/>
15+
</properties>
16+
</leafNode>
17+
<leafNode name="module">
18+
<properties>
19+
<help>Kernel module to load for watchdog device (optional)</help>
20+
<valueHelp>
21+
<format>txt</format>
22+
<description>Module name (e.g. 'softdog', 'iTCO_wdt', 'sp5100_tco')</description>
23+
</valueHelp>
24+
<constraint>
25+
<regex>[a-zA-Z0-9_\-]+</regex>
26+
</constraint>
27+
<constraintErrorMessage>Module name must be alphanumeric/underscore/hyphen</constraintErrorMessage>
28+
</properties>
29+
</leafNode>
30+
<leafNode name="timeout">
31+
<properties>
32+
<help>Watchdog timeout for runtime (default 10 seconds)</help>
33+
<valueHelp>
34+
<format>u32:1-2147483647</format>
35+
<description>Timeout in seconds</description>
36+
</valueHelp>
37+
<valueHelp>
38+
<format>&lt;number&gt;s</format>
39+
<description>Timeout in seconds (e.g. 30s)</description>
40+
</valueHelp>
41+
<valueHelp>
42+
<format>&lt;number&gt;m</format>
43+
<description>Timeout in minutes (e.g. 5m)</description>
44+
</valueHelp>
45+
<valueHelp>
46+
<format>&lt;number&gt;min</format>
47+
<description>Timeout in minutes (e.g. 5min)</description>
48+
</valueHelp>
49+
<valueHelp>
50+
<format>&lt;number&gt;h</format>
51+
<description>Timeout in hours (e.g. 1h)</description>
52+
</valueHelp>
53+
<constraint>
54+
<regex>([1-9][0-9]*|[1-9][0-9]*(s|m|min|h))</regex>
55+
</constraint>
56+
<constraintErrorMessage>Timeout must be a positive number optionally followed by s, m, min, or h</constraintErrorMessage>
57+
</properties>
58+
<defaultValue>10</defaultValue>
59+
</leafNode>
60+
<leafNode name="shutdown-timeout">
61+
<properties>
62+
<help>Watchdog timeout during shutdown (default 2 minutes)</help>
63+
<valueHelp>
64+
<format>u32:1-2147483647</format>
65+
<description>Timeout in seconds</description>
66+
</valueHelp>
67+
<valueHelp>
68+
<format>&lt;number&gt;s</format>
69+
<description>Timeout in seconds (e.g. 30s)</description>
70+
</valueHelp>
71+
<valueHelp>
72+
<format>&lt;number&gt;m</format>
73+
<description>Timeout in minutes (e.g. 5m)</description>
74+
</valueHelp>
75+
<valueHelp>
76+
<format>&lt;number&gt;min</format>
77+
<description>Timeout in minutes (e.g. 5min)</description>
78+
</valueHelp>
79+
<valueHelp>
80+
<format>&lt;number&gt;h</format>
81+
<description>Timeout in hours (e.g. 1h)</description>
82+
</valueHelp>
83+
<constraint>
84+
<regex>([1-9][0-9]*|[1-9][0-9]*(s|m|min|h))</regex>
85+
</constraint>
86+
<constraintErrorMessage>Timeout must be a positive number optionally followed by s, m, min, or h</constraintErrorMessage>
87+
</properties>
88+
<defaultValue>2min</defaultValue>
89+
</leafNode>
90+
<leafNode name="reboot-timeout">
91+
<properties>
92+
<help>Watchdog timeout during reboot (default 2 minutes)</help>
93+
<valueHelp>
94+
<format>u32:1-2147483647</format>
95+
<description>Timeout in seconds</description>
96+
</valueHelp>
97+
<valueHelp>
98+
<format>&lt;number&gt;s</format>
99+
<description>Timeout in seconds (e.g. 30s)</description>
100+
</valueHelp>
101+
<valueHelp>
102+
<format>&lt;number&gt;m</format>
103+
<description>Timeout in minutes (e.g. 5m)</description>
104+
</valueHelp>
105+
<valueHelp>
106+
<format>&lt;number&gt;min</format>
107+
<description>Timeout in minutes (e.g. 5min)</description>
108+
</valueHelp>
109+
<valueHelp>
110+
<format>&lt;number&gt;h</format>
111+
<description>Timeout in hours (e.g. 1h)</description>
112+
</valueHelp>
113+
<constraint>
114+
<regex>([1-9][0-9]*|[1-9][0-9]*(s|m|min|h))</regex>
115+
</constraint>
116+
<constraintErrorMessage>Timeout must be a positive number optionally followed by s, m, min, or h</constraintErrorMessage>
117+
</properties>
118+
<defaultValue>2min</defaultValue>
119+
</leafNode>
120+
</children>
121+
</node>
122+
</children>
123+
</node>
124+
</interfaceDefinition>

src/conf_mode/system_watchdog.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright VyOS maintainers and contributors <maintainers@vyos.io>
4+
#
5+
# This program is free software; you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License version 2 or later as
7+
# published by the Free Software Foundation.
8+
#
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU General Public License
15+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
import os
18+
19+
from sys import exit
20+
21+
from vyos.config import Config
22+
from vyos.template import render
23+
from vyos.utils.file import write_file
24+
from vyos.utils.process import cmd
25+
from vyos import ConfigError
26+
from vyos import airbag
27+
28+
airbag.enable()
29+
30+
watchdog_config_dir = r'/etc/systemd/system.conf.d'
31+
watchdog_config_file = r'/etc/systemd/system.conf.d/watchdog.conf'
32+
modules_load_file = r'/etc/modules-load.d/vyos-watchdog.conf'
33+
34+
35+
def normalize_time(time_value):
36+
"""
37+
Convert time value to systemd format.
38+
Input can be: "10", "10s", "5m", "5min", "1h"
39+
Output is in systemd format: "10", "5m", "5min", "1h"
40+
If just a number, it's treated as seconds and returned as-is.
41+
"""
42+
if not time_value:
43+
return None
44+
45+
# If it's just a number, return as-is (systemd interprets bare numbers as seconds)
46+
if time_value.isdigit():
47+
return time_value
48+
49+
# Already has a unit suffix, return as-is
50+
return time_value
51+
52+
53+
def get_config(config=None):
54+
if config:
55+
conf = config
56+
else:
57+
conf = Config()
58+
base = ['system', 'watchdog']
59+
60+
if not conf.exists(base):
61+
return None
62+
63+
watchdog = conf.get_config_dict(base, key_mangling=('-', '_'),
64+
get_first_key=True,
65+
with_recursive_defaults=True)
66+
67+
return watchdog
68+
69+
70+
def verify(watchdog):
71+
if not watchdog:
72+
return None
73+
74+
# Check if watchdog is enabled
75+
if 'enable' not in watchdog:
76+
# Allow setting only the module without enabling watchdog
77+
allowed_keys = {'module'}
78+
extra_keys = set(watchdog.keys()) - allowed_keys
79+
if extra_keys:
80+
raise ConfigError('Watchdog must be enabled to configure timeout values!\n'
81+
'Use "set system watchdog enable" to enable the watchdog.')
82+
83+
return None
84+
85+
86+
def generate(watchdog):
87+
# If watchdog node removed entirely, clean up everything
88+
if not watchdog:
89+
if os.path.exists(watchdog_config_file):
90+
os.unlink(watchdog_config_file)
91+
if os.path.exists(modules_load_file):
92+
os.unlink(modules_load_file)
93+
return None
94+
95+
# Persist kernel module autoload on boot if specified (even if not enabled)
96+
module = watchdog.get('module')
97+
if module:
98+
try:
99+
write_file(modules_load_file, f"{module}\n")
100+
except Exception as e:
101+
print(f"Warning: Failed writing modules-load configuration: {e}")
102+
else:
103+
# If module option removed, drop persisted autoload file
104+
if os.path.exists(modules_load_file):
105+
os.unlink(modules_load_file)
106+
107+
# If not enabled, ensure systemd watchdog config is absent and return
108+
if 'enable' not in watchdog:
109+
if os.path.exists(watchdog_config_file):
110+
os.unlink(watchdog_config_file)
111+
return None
112+
113+
# Try to load kernel module if specified and /dev/watchdog0 is missing
114+
if not os.path.exists('/dev/watchdog0'):
115+
if module:
116+
# Try to load the module
117+
import subprocess
118+
try:
119+
subprocess.run(['modprobe', module], check=True)
120+
except Exception as e:
121+
print(f"Warning: Could not load watchdog module '{module}': {e}")
122+
# Re-check for device
123+
if not os.path.exists('/dev/watchdog0'):
124+
print("Warning: /dev/watchdog0 not found. Systemd watchdog will not be enabled.")
125+
if os.path.exists(watchdog_config_file):
126+
os.unlink(watchdog_config_file)
127+
return None
128+
129+
# Ensure the directory exists
130+
os.makedirs(watchdog_config_dir, exist_ok=True)
131+
132+
# Normalize time values for systemd format
133+
if 'timeout' in watchdog:
134+
watchdog['timeout'] = normalize_time(watchdog['timeout'])
135+
if 'shutdown_timeout' in watchdog:
136+
watchdog['shutdown_timeout'] = normalize_time(watchdog['shutdown_timeout'])
137+
if 'reboot_timeout' in watchdog:
138+
watchdog['reboot_timeout'] = normalize_time(watchdog['reboot_timeout'])
139+
140+
render(watchdog_config_file, 'system/watchdog.conf.j2', watchdog)
141+
142+
return None
143+
144+
145+
def apply(watchdog):
146+
# Reload systemd daemon to apply watchdog configuration
147+
# The watchdog settings take effect after systemd is reloaded
148+
cmd('systemctl daemon-reload')
149+
150+
return None
151+
152+
153+
if __name__ == '__main__':
154+
try:
155+
c = get_config()
156+
verify(c)
157+
generate(c)
158+
apply(c)
159+
except ConfigError as e:
160+
print(e)
161+
exit(1)

0 commit comments

Comments
 (0)