From 96c65a307313f251c22c050fedc9df0d0ca6c6c5 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sat, 15 Nov 2025 14:26:28 -0500 Subject: [PATCH 01/42] Add script to monitor connections to prefer Ethernet over PPP --- roles/conduit/files/monitor_modem.initd | 61 ++ roles/conduit/files/monitor_modem.py | 595 ++++++++++++++++++ roles/conduit/handlers/main.yml | 6 + roles/conduit/tasks/monitor_modem.yml | 56 ++ roles/conduit/tasks/restart_monitor_modem.yml | 13 + 5 files changed, 731 insertions(+) create mode 100644 roles/conduit/files/monitor_modem.initd create mode 100755 roles/conduit/files/monitor_modem.py create mode 100644 roles/conduit/tasks/monitor_modem.yml create mode 100644 roles/conduit/tasks/restart_monitor_modem.yml diff --git a/roles/conduit/files/monitor_modem.initd b/roles/conduit/files/monitor_modem.initd new file mode 100644 index 0000000..3fe6f29 --- /dev/null +++ b/roles/conduit/files/monitor_modem.initd @@ -0,0 +1,61 @@ +#!/bin/sh +# +#Start monitor_modem as a service + +### BEGIN INIT INFO +# Provides: monitor_modem +# Required-Start: $local_fs $network $syslog $dbus +# Required-Stop: $local_fs $network $syslog $dbus +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Description: Display connection status in Conduit LEDs +### END INIT INFO + +PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin" +NAME=monitor_mondem +DAEMON=/usr/local/sbin/monitor_modem +PIDFILE=/var/run/${NAME}.pid +DAEMON_ARGS= + +# source function library +. /etc/init.d/functions + +if [ -r /etc/default/${NAME} ]; then + . /etc/default/${NAME} +fi + +[ -x ${DAEMON} ] || exit 0 + +is_running() { + pgrep -F ${PIDFILE} > /dev/null +} + +start() { + start-stop-daemon --start --quiet -p ${PIDFILE} --exec ${DAEMON} -- ${DAEMON_ARGS} +} + +stop() { + start-stop-daemon --stop --quiet --p ${PIDFILE} +} + +case "$1" in + start) + is_running || start + ;; + stop) + stop + ;; + restart|reload) + nohup ${0} do_restart + ;; + do_restart) + stop + start + ;; + status) + is_running + ;; + *) + echo "Usage: $0 {start|stop|status|restart}" +esac +#========================================= diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py new file mode 100755 index 0000000..e046291 --- /dev/null +++ b/roles/conduit/files/monitor_modem.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python + +""" +MIT License + +Copyright (c) 2025 Jeffrey C Honig + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +from __future__ import print_function + +import array +import argparse +import binascii +from contextlib import contextmanager +import errno +import fcntl +import ipaddress +import logging +from logging.handlers import SysLogHandler +import os +import select +import socket +import stat +import struct +import subprocess +import sys +import time + +if not hasattr(socket, 'SO_BINDTODEVICE'): + socket.SO_BINDTODEVICE = 25 + +try: + FileNotFoundError +except NameError: + FileNotFoundError = IOError + +class LockFileTimeout(Exception): + def __init__(self, error): + self.value = error + def __str__(self): + return repr(self.value) + +@contextmanager +def pidfilelock(name): + """ Context to lock a pid file """ + + time_left = 30 + pidfile_path = os.path.join("/var/run", name + ".pid") + lock_file = open(pidfile_path, 'w+') + while True: + try: + logging.debug("Attempting to lock %s", pidfile_path) + fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) + lock_file.write(str(os.getpid()) + '\n') + lock_file.flush() + logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) + break + except IOError as err: + if err.errno != errno.EAGAIN: + raise err + else: + logging.debug("Timeout trying to lock", pidfile_path) + time.sleep(1) + time_left -= 1 + if time_left == 0: + raise LockFileTimeout("Unable to lock %s" % pidfile_path) + + try: + yield lock_file + finally: + logging.debug("Unlocking %s", pidfile_path) + fcntl.flock(lock_file, fcntl.LOCK_UN) + os.unlink(pidfile_path) + lock_file.close() + +def daemonize(): + """ Run as a daemon """ + + try: + pid = os.fork() + if pid > 0: + # exit first parent + sys.exit(0) + except OSError: + logging.exception("First fork failed") + return False + + # decouple from parent environment + os.chdir('/') + os.setsid() + os.umask(0) + # do second fork + try: + pid = os.fork() + if pid > 0: + # exit from second parent + sys.exit(0) + except OSError: + logging.exception("Second fork failed") + return False + + # redirect standard file descriptors + sys.stdout.flush() + sys.stderr.flush() + si = open(os.devnull, 'r') + so = open(os.devnull, 'w') + se = open(os.devnull, 'w') + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + return True + +def parse_args(): + """ What do we need to do """ + + parser = argparse.ArgumentParser(description="Check for errors in Basic Station log") + + # Debugging + group = parser.add_argument_group("Debugging options") + group.add_argument("-d", "--debug", + dest="debug", default=False, + action='store_true', + help="print debugging messages") + group.add_argument("--nodebug", + dest="debug", + action='store_false', + help="print debugging messages") + group.add_argument("-v", "--verbose", + dest="verbose", default=False, + action='store_true', + help="print verbose messages") + group.add_argument("-n", "--noop", + dest="noop", default=False, + action='store_true', + help="Don't send notifications, just list what we are going to do") + + group = parser.add_argument_group("Options") + group.add_argument("--pidfile", + dest="pidfile", default="/var/run/conduit_leds.pid", + help="Location of the PID file") + group.add_argument("--interval", + default=60.0, type=float, + help="Seconds to wait between checks") + group.add_argument("--hostname", + default="ec2-54-221-216-139.compute-1.amazonaws.com", + help="Hostname to check") + group.add_argument("--pings", + type=int, default=10, + help="Number of pings to receive") + group.add_argument("--foreground", "-f", + dest="foreground", default=False, + action='store_true', + help="Do not fork; run in foreground") + group.add_argument("--modem", + dest="modem", default="/dev/modem_at1", + help="Modem device for Cell service") + group.add_argument("--real-ppp-on-boot", + default="/var/config/ppp/ppp_on_boot", + help="Where to link /etc/ppp_on_boot to when enabling ppp") + group.add_argument("--ppp-on-boot", + default="/etc/ppp/ppp_on_boot", + help="Where system looks for ppp startup script") + + # Parse args + options = parser.parse_args() + + # --test implies --verbose + if options.noop: + options.debug = True + + # Init Logging + init_logging(options) + + return options + +def init_logging(options): + """ Set up logging """ + + logger = logging.getLogger() + logger.handlers = [] + syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0])) + syslog_handler = SysLogHandler(address="/dev/log", + facility=SysLogHandler.LOG_DAEMON) + syslog_handler.setFormatter(logging.Formatter(syslog_format)) + if not sys.stdout.isatty(): + logger.addHandler(syslog_handler) + else: + logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + if options.debug: + logger.setLevel('DEBUG') + elif options.verbose: + logger.setLevel('INFO') + else: + logger.setLevel('WARNING') + +if struct.pack("H",1) == "\x00\x01": # big endian + def checksum(pkt): + if len(pkt) % 2 == 1: + pkt += "\0" + s = sum(array.array("H", pkt)) + s = (s >> 16) + (s & 0xffff) + s += s >> 16 + s = ~s + return s & 0xffff +else: + def checksum(pkt): + if len(pkt) % 2 == 1: + pkt += "\0" + s = sum(array.array("H", pkt)) + s = (s >> 16) + (s & 0xffff) + s += s >> 16 + s = ~s + return (((s>>8)&0xff)|s<<8) & 0xffff + +def icmp_echo(dst_ip, interface = None, payload = b'hello', id_ = None, seq = 1): + if id_ is None: + id_ = os.getpid() & 0xFFFF + + # raw ICMP socket (IPv4) + sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP) + + if interface is not None: + # Bind to specific interface (Linux only). Requires root. + sock.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, interface.encode() + b'\0') + + # Build ICMP echo request header: type(8)=echo request, code=0, checksum, id, seq + icmp_type = 8 + icmp_code = 0 + header = struct.pack('!BBHHH', icmp_type, icmp_code, 0, id_, seq) + packet = header + payload + chksum = checksum(packet) + header = struct.pack('!BBHHH', icmp_type, icmp_code, chksum, id_, seq) + packet = header + payload + + t0 = time.time() + sock.sendto(packet, (dst_ip, 0)) + + deadline = t0 + 10.0 + while True: + remaining = deadline - time.time() + if remaining <= 0: + return False + + # Ignore errno 4 + try: + ready, _, _ = select.select([sock], [], [], remaining) + if not ready: + logging.debug("TIMEOUT") + return False + except (IOError, OSError): + continue + + recv_packet, addr = sock.recvfrom(65535) + + iph_len = (struct.unpack("!B", recv_packet[:1])[0] & 0xf) * 4 + icmp_packet = recv_packet[iph_len:] + if len(icmp_packet) < 8: + continue + r_type, r_code, r_chksum, r_id, r_seq = struct.unpack("!BBHHH", icmp_packet[:8]) + logging.debug("RECV type %d code %d id %d seq %d", r_type, r_code, r_id, r_seq) + + if r_type == 0 and r_id == id_ and r_seq == seq: + return True + + return False + +def set_rpfilter(options, value, interfaces): + """ Set values of rp_filter on the specified interface(s) """ + + for interface in interfaces: + try: + with open("/proc/sys/net/ipv4/conf/%s/rp_filter" % interface, "w") as fp: + fp.write(str(value)) + except FileNotFoundError: + pass + +class Route(object): + """ A routing table entry """ + + def __init__(self, header, parts): + + self._parts = {} + + for key, value in zip(header, parts): + if key == 'Iface': + self.__setattr__(key, value) + elif key in ['Destination', 'Gateway', 'Mask']: + self.__setattr__(key, ipaddress.ip_address(binascii.unhexlify(value)[::-1])) + elif key == 'Flags': + self.__setattr__(key, int(value, 16)) + else: + self.__setattr__(key, int(value)) + + def __repr__(self): + return self.__str__() + + def __str__(self): + return "Iface: %s Destination %s Gateway %s Flags %x RefCnt %d Use %d Metric %d Mask %s MTU %d Window %d IRTT %d" % ( + self.Iface, + self.Destination, + self.Gateway, + self.Flags, + self.RefCnt, + self.Use, + self.Metric, + self.Mask, + self.MTU, + self.Window, + self.IRTT) + +def read_routes(options): + """ Read the routing table """ + + _rt = [] + + with open("/proc/net/route", "r") as fp: + header = [] + for line in fp: + if len(line.strip()) == 0: + continue + parts = line.split() + if not header: + header = parts + continue + route = Route(header, parts) + _rt.append(route) + + return _rt + +def ppp_on_boot(options, enable): + """ Link or unlink system ppp startup script """ + + try: + link_target = os.readlink(options.ppp_on_boot) + except (OSError, IOError): + link_target = None + + logging.debug("ppp_on_boot(%s): %s -> %s", enable, options.ppp_on_boot, link_target) + + if enable: + if link_target and link_target != options.real_ppp_on_boot: + try: + os.unlink(options.ppp_on_boot) + os.symlink(options.real_ppp_on_boot, options.ppp_on_boot) + logging.debug("ppp_on_boot: %s linked", options.ppp_on_boot) + except OSError as error: + logging.error("Error linking %s -> %s", + options.ppp_on_boot, + options.real_ppp_on_boot, + error) + return + + if link_target: + try: + os.unlink(options.ppp_on_boot) + logging.debug("ppp_on_boot: %s un-linked", options.ppp_on_boot) + except OSError as error: + logging.error("Error un-linking %s", + options.ppp_on_boot, + error) + +def check_modem(options): + """ Run a set of checks """ + + have_modem = False + try: + modem_stat = os.stat(options.modem) + if stat.S_ISCHR(modem_stat.st_mode): + have_modem = True + except OSError: + pass + + have_sim = False + if have_modem: + cmd = ["radio-cmd", "AT+CPIN?"] + try: + output = subprocess.check_output(cmd) + logging.info("check_modem: %s returned: %s", " ".join(cmd), output) + except subprocess.CalledProcessError as error: + logging.debug("check_modem: %s returned: %s", " ".join(cmd), error) + if "+CPIN: READY" in output: + have_sim = True + + logging.debug("have_modem: %s, have_sim: %s", have_modem, have_sim) + return have_modem, have_sim + +def pppd(options, enable): + """ Start or stop pppd """ + + logging.debug("pppd(%s)", enable) + + ppp_on_boot(options, enable) + + try: + subprocess.check_call(["pidof", "pppd"]) + logging.debug("pppd is running") + ppp_is_running = True + except subprocess.CalledProcessError: + logging.debug("pppd is not running") + ppp_is_running = False + + if enable: + if not ppp_is_running: + try: + logging.debug("Starting %s", options.ppp_on_boot) + subprocess.check_call([options.ppp_on_boot]) + except subprocess.CalledProcessError as error: + logging.error("Starting %s: %s", options.ppp_on_boot, error) + return + + for service in [ 'ppp0', 'pppd']: + try: + logging.debug("Monitoring %s", service) + subprocess.check_call(["monit", "monitor", service]) + except subprocess.CalledProcessError as error: + logging.error("Monitoring %s: %s", service, error) + + return + + if ppp_is_running: + cmd = ["/etc/init.d/ppp", "stop"] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + pass + + for service in [ 'ppp0', 'pppd']: + cmd = ["monit", "unmonitor", service] + try: + logging.debug("Unonitoring %s", service) + subprocess.check_call(cmd) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + pass + +def main(): + """It all happens here""" + + options = parse_args() + + if not options.foreground: + if not daemonize(): + return 1 + + # Read the routing table and figure out if we have a non-ppp + # interface with a default route. This will tell us which is the + # primary interface. + rt = read_routes(options) + for route in rt: + if route.Flags & 0x3 != 0x3: + continue + if str(route.Destination) != "0.0.0.0": + continue + if route.Iface == "ppp0": + continue + default_interface = route.Iface + logging.info("Using a default interface of %s", default_interface) + break + else: + logging.fatal("Unable to find a non-ppp interface with a default route") + return 1 + + # Set rp_filter to allow RFC3704 Losse Reverse Path Each so we can + # receive pings that are not from the expected interface + set_rpfilter(options, 2, ["all", "default", default_interface, "ppp0"]) + + seq = -1 + while time.sleep(options.interval) is None: + + logging.debug("check_modem") + have_modem, have_sim = check_modem(options) + if not have_modem or not have_sim: + logging.warning("NO Modem or SIM, stopping pppd") + pppd(options, False) + continue + + # Test ping response of default_interface + # Seq is a unsigned 16 bit integer + responses = 0 + for ping in range(options.pings): + seq = seq + 1 if seq < 65535 else 0 + logging.debug("send_icmp (seq %d) via %s", seq, default_interface) + if icmp_echo(options.hostname, interface=default_interface, seq=seq): + responses += 1 + time.sleep(.1) + # Call it good if we get 80% of our pings back + if responses >= float(options.pings) * 0.80: + logging.warning("Received response on %s, stopping pppd", default_interface) + pppd(options, False) + continue + + logging.warning("No response received on %s, starting pppd", default_interface) + pppd(options, True) + continue + + return 0 + +if __name__ == "__main__": + rc = 1 + try: + rc = main() + except KeyboardInterrupt: + print("") + except Exception as exc: + logging.exception(exc) + + sys.exit(rc) + + + +# # # # XXX Adapt this and keep track of connection duration (by address and port) + +def parse_ip_port(hex_ip, hex_port): + ip = socket.inet_ntoa(struct.pack(" (pid, process_name).""" + inode_map = {} + for pid in filter(str.isdigit, os.listdir("/proc")): + fd_dir = os.path.join("/proc", pid, "fd") + comm_file = os.path.join("/proc", pid, "comm") + try: + with open(comm_file, "r") as f: + pname = f.read().strip() + except IOError: + pname = "unknown" + try: + for fd in os.listdir(fd_dir): + path = os.path.join(fd_dir, fd) + try: + target = os.readlink(path) + if target.startswith("socket:["): + inode = target[8:-1] + inode_map[inode] = (int(pid), pname) + except OSError: + continue + except OSError: + continue + return inode_map + +def get_established_tcp_connections(): + results = [] + inode_map = get_inode_to_process() + with open("/proc/net/tcp", "r") as f: + next(f) # skip header + for line in f: + parts = line.split() + local_ip, local_port = parts[1].split(":") + remote_ip, remote_port = parts[2].split(":") + state = parts[3] + inode = parts[9] + if state != "01": # only ESTABLISHED + continue + lip, lport = parse_ip_port(local_ip, local_port) + rip, rport = parse_ip_port(remote_ip, remote_port) + proc = inode_map.get(inode, (None, None)) + results.append({ + "local": (str(lip), lport), + "remote": (str(rip), rport), + "pid": proc[0], + "program": proc[1] + }) + return results + +# Example usage +if __name__ == "__main__": + conns = get_established_tcp_connections() + for c in conns: + print("%s:%d -> %s:%d (pid=%s, program=%s)" % + (c["local"][0], c["local"][1], + c["remote"][0], c["remote"][1], + c["pid"], c["program"])) diff --git a/roles/conduit/handlers/main.yml b/roles/conduit/handlers/main.yml index c65fac6..6895a74 100644 --- a/roles/conduit/handlers/main.yml +++ b/roles/conduit/handlers/main.yml @@ -58,6 +58,12 @@ - name: restart conduit_leds include_tasks: restart_conduit_leds.yml +# +# Restart monitor_modem +# +- name: restart monitor_modem + include_tasks: restart_monitor_modem.yml + # # Restart sshd after config changes # diff --git a/roles/conduit/tasks/monitor_modem.yml b/roles/conduit/tasks/monitor_modem.yml new file mode 100644 index 0000000..2f3079e --- /dev/null +++ b/roles/conduit/tasks/monitor_modem.yml @@ -0,0 +1,56 @@ +--- + +- name: monitor_modem /var/config/local/sbin/monitor_modem + ansible.builtin.copy: + dest: /var/config/local/sbin/monitor_modem + src: monitor_modem.py + mode: "0755" + owner: root + group: root + notify: + - restart monitor_modem + - Update system checksum file + when: "'modem_at0' in ansible_local.dev" + +- name: monitor_modem /var/config/init.d/monitor_modem + ansible.builtin.copy: + dest: /var/config/init.d/monitor_modem + src: monitor_modem.initd + mode: "0755" + owner: root + group: root + notify: + - restart monitor_modem + - Update system checksum file + when: "'modem_at0' in ansible_local.dev" + +- name: monitor_modem Link /etc/init.d/monitor_modem to /var/config/init.d/monitor_modem + ansible.builtin.file: + path: /etc/init.d/monitor_modem + state: link + src: /var/config/init.d/monitor_modem + force: yes + notify: + - restart monitor_modem + - Update system checksum file + when: "'modem_at0' in ansible_local.dev" + +- name: monitor_modem Remove if a modem is not present + ansible.builtin.file: + path: "{{ item }}" + state: absent + force: yes + loop: + - /var/config/local/sbin/monitor_modem + - /var/config/init.d/monitor_modem + notify: + - Update system checksum file + when: "'modem_at0' not in ansible_local.dev" + +- name: monitor_modem Delete service if modem not present + ansible.builtin.shell: "update-rc.d -f monitor_modem remove" + notify: + - Update system checksum file + when: "'modem_at0' not in ansible_local.dev" + +... diff --git a/roles/conduit/tasks/restart_monitor_modem.yml b/roles/conduit/tasks/restart_monitor_modem.yml new file mode 100644 index 0000000..0ca880f --- /dev/null +++ b/roles/conduit/tasks/restart_monitor_modem.yml @@ -0,0 +1,13 @@ +--- + +# +# Configure monitor_modem service and restart it +# + +- name: restart_monitor_modem Update rc.d to start monitor_modem + ansible.builtin.shell: "update-rc.d -f monitor_modem remove; update-rc.d monitor_modem defaults 10 50" + +- name: restart_monitor_modem Restart monitor_modem + ansible.builtin.command: /etc/init.d/monitor_modem restart + +... From 8684b4eb2260b7c938aebdb5ea3f90d951c77eb3 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sat, 15 Nov 2025 14:32:52 -0500 Subject: [PATCH 02/42] Linting of conduit_leds --- roles/conduit/files/conduit_leds.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index 3c91598..7fa555d 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -35,12 +35,10 @@ import logging from logging.handlers import SysLogHandler import os -import pprint import psutil import re import socket import stat -import struct import subprocess import sys import time @@ -83,7 +81,7 @@ def pidfilelock(name): fcntl.flock(lock_file, fcntl.LOCK_UN) os.unlink(pidfile_path) lock_file.close() - + class Defaults(object): """ Read a /etc/defaults file """ @@ -145,7 +143,7 @@ def write(self, name, value): with open(os.path.join(self.ROOT, name), "w") as fp: fp.write("%s\n" % value) - + class LEDs(object): """ Control LEDs """ @@ -211,7 +209,7 @@ def daemonize(): # exit first parent sys.exit(0) except OSError as err: - logging.exception("First fork failed") + logging.exception("First fork failed: %s", err) return False # decouple from parent environment @@ -225,7 +223,7 @@ def daemonize(): # exit from second parent sys.exit(0) except OSError as err: - logging.exception("Second fork failed") + logging.exception("Second fork failed: %s", err) return False # redirect standard file descriptors @@ -401,7 +399,7 @@ def check_ppp(options): logging.debug("check_ppp: No valid peer address found") return False - return True + return True def process(options, leds, device_path): """ Check all the services """ @@ -482,7 +480,7 @@ def main(): logging.warning("No device found for %s", lora_hwversion) try: - with pidfilelock(progname) as pid_file: + with pidfilelock(progname): leds = LEDs(mtsio) # XXX - Spread the tests out over 1/4 of the interval? From 8192b1b658005df00316d8197b08074c637682e9 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sat, 15 Nov 2025 14:33:46 -0500 Subject: [PATCH 03/42] Adjust monit alerts as scripts use more memory and CPU --- roles/conduit/defaults/main.yml | 7 ++++++- roles/conduit/templates/monitrc.j2 | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/roles/conduit/defaults/main.yml b/roles/conduit/defaults/main.yml index 141383c..3555408 100644 --- a/roles/conduit/defaults/main.yml +++ b/roles/conduit/defaults/main.yml @@ -176,11 +176,16 @@ monit_pktfwd_stop: '"/etc/init.d/{{ monit_pktfwd_initscript }} stop"' monit_pktfwd_restart: '"/etc/init.d/{{ monit_pktfwd_initscript }} restart"' monit_pktfwd_reboot: 'exec "/sbin/reboot"' +# Monit loadavg +monit_loadavg_1m: 2 +monit_loadavg_5m: 4 +monit_memory_usage: 30% + # Monitor space on / monit_root_test: "usage > 50%" # Monitor space on /var/config -monit_config_test: "usage > 15%" +monit_config_test: "usage > 20%" # Monitor space on /var/volatile monit_volatile_test: "usage > 75%" diff --git a/roles/conduit/templates/monitrc.j2 b/roles/conduit/templates/monitrc.j2 index c826c03..e79b514 100644 --- a/roles/conduit/templates/monitrc.j2 +++ b/roles/conduit/templates/monitrc.j2 @@ -15,9 +15,9 @@ set httpd # Monitor the system check system $HOST - if loadavg (1min) > 2 then alert - if loadavg (5min) > 4 then alert - if memory usage > 25% then alert + if loadavg (1min) > {{ monit_loadavg_1m }} then alert + if loadavg (5min) > {{ monit_loadavg_5m }} then alert + if memory usage > {{ monit_memory_usage }} then alert # Event Queue set eventqueue basedir {{ monit_eventqueue }} From dc386f5e6cdfb489bf4bae2f72ecd4b424a55df2 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sat, 15 Nov 2025 14:35:00 -0500 Subject: [PATCH 04/42] Fix name of ntp.defaults file and clean up turds --- roles/conduit/files/monitor_modem.py | 8 ++++---- roles/conduit/tasks/conduit_leds.yml | 12 +++++++++--- roles/conduit/tasks/time.yml | 15 +++++++++++---- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index e046291..a278629 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -50,7 +50,7 @@ try: FileNotFoundError except NameError: - FileNotFoundError = IOError + FileNotFoundError = IOError class LockFileTimeout(Exception): def __init__(self, error): @@ -332,7 +332,7 @@ def read_routes(options): """ Read the routing table """ _rt = [] - + with open("/proc/net/route", "r") as fp: header = [] for line in fp: @@ -345,7 +345,7 @@ def read_routes(options): route = Route(header, parts) _rt.append(route) - return _rt + return _rt def ppp_on_boot(options, enable): """ Link or unlink system ppp startup script """ @@ -378,7 +378,7 @@ def ppp_on_boot(options, enable): logging.error("Error un-linking %s", options.ppp_on_boot, error) - + def check_modem(options): """ Run a set of checks """ diff --git a/roles/conduit/tasks/conduit_leds.yml b/roles/conduit/tasks/conduit_leds.yml index 1cb652c..c1d7269 100644 --- a/roles/conduit/tasks/conduit_leds.yml +++ b/roles/conduit/tasks/conduit_leds.yml @@ -7,7 +7,9 @@ mode: "0755" owner: root group: root - notify: restart conduit_leds + notify: + - restart conduit_leds + - Update system checksum file - name: conduit_leds /var/config/init.d/conduit_leds copy: @@ -16,7 +18,9 @@ mode: "0755" owner: root group: root - notify: restart conduit_leds + notify: + - restart conduit_leds + - Update system checksum file - name: conduit_leds Link /etc/init.d/conduit_leds to /var/config/init.d/conduit_leds file: @@ -24,5 +28,7 @@ state: link src: /var/config/init.d/conduit_leds force: yes - notify: restart conduit_leds + notify: + - restart conduit_leds + - Update system checksum file ... diff --git a/roles/conduit/tasks/time.yml b/roles/conduit/tasks/time.yml index 9963bac..ed51c50 100644 --- a/roles/conduit/tasks/time.yml +++ b/roles/conduit/tasks/time.yml @@ -53,8 +53,8 @@ - name: time Ensure /var/config/default/ntpd.default exists copy: - src: /etc/default/ntpd.default - dest: /var/config/default/ntpd.default + src: /etc/default/ntpd + dest: /var/config/default/ntpd remote_src: yes force: no notify: @@ -64,7 +64,7 @@ file: dest: /etc/default/ntpd state: link - src: /var/config/default/ntpd.default + src: /var/config/default/ntpd force: yes notify: - restart ntpd @@ -76,7 +76,7 @@ - name: time Prevent ntpd from hanging on boot when net is down lineinfile: - dest: /var/config/default/ntpd.default + dest: /var/config/default/ntpd regexp: "^SET_SYSTEM_CLOCK=" line: "SET_SYSTEM_CLOCK={{ set_system_clock }}" state: present @@ -84,6 +84,13 @@ - restart ntpd - Update system checksum file +- name: time Clean up turds + ansible.builtin.file: + path: /var/config/default/ntpd.default + state: absent + notify: + - Update system checksum file + # # Monit # From 7e4326156c649c02093df26ae2be99902ed569b1 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sat, 15 Nov 2025 18:51:21 -0500 Subject: [PATCH 05/42] Fix error when there is no network --- roles/conduit/files/conduit_leds.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index 7fa555d..f1b92d2 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -43,6 +43,8 @@ import sys import time +cached_ip = None + class LockFileTimeout(Exception): def __init__(self, error): self.value = error From 9ded4812a26f20bba21fd1131c1e808bf8dd388f Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 16 Nov 2025 09:20:34 -0500 Subject: [PATCH 06/42] fixup! Add script to monitor connections to prefer Ethernet over PPP --- roles/conduit/files/monitor_modem.initd | 17 ++- roles/conduit/files/monitor_modem.py | 195 +++++++++--------------- roles/conduit/tasks/monitor_modem.yml | 2 +- roles/conduit/templates/monitrc.j2 | 8 + 4 files changed, 97 insertions(+), 125 deletions(-) diff --git a/roles/conduit/files/monitor_modem.initd b/roles/conduit/files/monitor_modem.initd index 3fe6f29..bfa8443 100644 --- a/roles/conduit/files/monitor_modem.initd +++ b/roles/conduit/files/monitor_modem.initd @@ -12,7 +12,7 @@ ### END INIT INFO PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin" -NAME=monitor_mondem +NAME=monitor_modem DAEMON=/usr/local/sbin/monitor_modem PIDFILE=/var/run/${NAME}.pid DAEMON_ARGS= @@ -27,7 +27,7 @@ fi [ -x ${DAEMON} ] || exit 0 is_running() { - pgrep -F ${PIDFILE} > /dev/null + pgrep -F ${PIDFILE} > /dev/null 2>&1 } start() { @@ -53,9 +53,18 @@ case "$1" in start ;; status) - is_running + if is_running; then + echo "${NAME} is running with PID $(cat ${PIDFILE})" >&2 + else + echo "${NAME} is not running" >&2 + exit 1 + fi ;; *) - echo "Usage: $0 {start|stop|status|restart}" + echo "Usage: $0 {start|stop|status|restart}" >&2 + exit 2 + ;; esac + +exit 0 #========================================= diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index a278629..dcf324c 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -288,6 +288,8 @@ def set_rpfilter(options, value, interfaces): """ Set values of rp_filter on the specified interface(s) """ for interface in interfaces: + if not interface: + continue try: with open("/proc/sys/net/ipv4/conf/%s/rp_filter" % interface, "w") as fp: fp.write(str(value)) @@ -445,27 +447,20 @@ def pppd(options, enable): logging.error("%s: %s", " ".join(cmd), error) pass - for service in [ 'ppp0', 'pppd']: - cmd = ["monit", "unmonitor", service] - try: - logging.debug("Unonitoring %s", service) - subprocess.check_call(cmd) - except subprocess.CalledProcessError as error: - logging.error("%s: %s", " ".join(cmd), error) - pass - -def main(): - """It all happens here""" - - options = parse_args() + for service in [ 'ppp0', 'pppd']: + cmd = ["monit", "unmonitor", service] + try: + logging.debug("Unonitoring %s", service) + subprocess.check_call(cmd) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + pass - if not options.foreground: - if not daemonize(): - return 1 +def find_default_interface(options): + """ Read the routing table and figure out if we have a non-ppp + interface with a default route. This will tell us which is the + primary interface. """ - # Read the routing table and figure out if we have a non-ppp - # interface with a default route. This will tell us which is the - # primary interface. rt = read_routes(options) for route in rt: if route.Flags & 0x3 != 0x3: @@ -474,45 +469,72 @@ def main(): continue if route.Iface == "ppp0": continue - default_interface = route.Iface - logging.info("Using a default interface of %s", default_interface) - break - else: - logging.fatal("Unable to find a non-ppp interface with a default route") - return 1 + logging.info("Using a default interface of %s", route.Iface) + return route.Iface - # Set rp_filter to allow RFC3704 Losse Reverse Path Each so we can - # receive pings that are not from the expected interface - set_rpfilter(options, 2, ["all", "default", default_interface, "ppp0"]) + logging.info("Unable to find a non-ppp interface with a default route") + return None - seq = -1 - while time.sleep(options.interval) is None: +def main(): + """It all happens here""" - logging.debug("check_modem") - have_modem, have_sim = check_modem(options) - if not have_modem or not have_sim: - logging.warning("NO Modem or SIM, stopping pppd") - pppd(options, False) - continue + progname = os.path.basename(sys.argv[0]) - # Test ping response of default_interface - # Seq is a unsigned 16 bit integer - responses = 0 - for ping in range(options.pings): - seq = seq + 1 if seq < 65535 else 0 - logging.debug("send_icmp (seq %d) via %s", seq, default_interface) - if icmp_echo(options.hostname, interface=default_interface, seq=seq): - responses += 1 - time.sleep(.1) - # Call it good if we get 80% of our pings back - if responses >= float(options.pings) * 0.80: - logging.warning("Received response on %s, stopping pppd", default_interface) - pppd(options, False) - continue + options = parse_args() - logging.warning("No response received on %s, starting pppd", default_interface) - pppd(options, True) - continue + if not options.foreground: + if not daemonize(): + return 1 + + try: + with pidfilelock(progname): + default_interface = find_default_interface(options) + + # Set rp_filter to allow RFC3704 Losse Reverse Path Each so we can + # receive pings that are not from the expected interface + set_rpfilter(options, 2, ["all", "default", default_interface, "ppp0"]) + + seq = -1 + while time.sleep(options.interval) is None: + logging.debug("check_modem") + + new_default_interface = find_default_interface(options) + if default_interface and new_default_interface != default_interface: + set_rpfilter(options, 2, [new_default_interface]) + + default_interface = new_default_interface + if not default_interface: + logging.warning("No default interface, starting pppd") + pppd(options, True) + continue + + have_modem, have_sim = check_modem(options) + if not have_modem or not have_sim: + logging.warning("No Modem or SIM, stopping pppd") + pppd(options, False) + continue + + # Test ping response of default_interface + # Seq is a unsigned 16 bit integer + responses = 0 + for ping in range(options.pings): + seq = seq + 1 if seq < 65535 else 0 + logging.debug("send_icmp (seq %d) via %s", seq, default_interface) + if icmp_echo(options.hostname, interface=default_interface, seq=seq): + responses += 1 + time.sleep(.1) + # Call it good if we get 80% of our pings back + if responses >= float(options.pings) * 0.80: + logging.warning("Received response on %s, stopping pppd", default_interface) + pppd(options, False) + continue + + logging.warning("No response received on %s, starting pppd", default_interface) + pppd(options, True) + continue + except LockFileTimeout: + logging.critical("Another instance of %s is running", progname) + return 1 return 0 @@ -526,70 +548,3 @@ def main(): logging.exception(exc) sys.exit(rc) - - - -# # # # XXX Adapt this and keep track of connection duration (by address and port) - -def parse_ip_port(hex_ip, hex_port): - ip = socket.inet_ntoa(struct.pack(" (pid, process_name).""" - inode_map = {} - for pid in filter(str.isdigit, os.listdir("/proc")): - fd_dir = os.path.join("/proc", pid, "fd") - comm_file = os.path.join("/proc", pid, "comm") - try: - with open(comm_file, "r") as f: - pname = f.read().strip() - except IOError: - pname = "unknown" - try: - for fd in os.listdir(fd_dir): - path = os.path.join(fd_dir, fd) - try: - target = os.readlink(path) - if target.startswith("socket:["): - inode = target[8:-1] - inode_map[inode] = (int(pid), pname) - except OSError: - continue - except OSError: - continue - return inode_map - -def get_established_tcp_connections(): - results = [] - inode_map = get_inode_to_process() - with open("/proc/net/tcp", "r") as f: - next(f) # skip header - for line in f: - parts = line.split() - local_ip, local_port = parts[1].split(":") - remote_ip, remote_port = parts[2].split(":") - state = parts[3] - inode = parts[9] - if state != "01": # only ESTABLISHED - continue - lip, lport = parse_ip_port(local_ip, local_port) - rip, rport = parse_ip_port(remote_ip, remote_port) - proc = inode_map.get(inode, (None, None)) - results.append({ - "local": (str(lip), lport), - "remote": (str(rip), rport), - "pid": proc[0], - "program": proc[1] - }) - return results - -# Example usage -if __name__ == "__main__": - conns = get_established_tcp_connections() - for c in conns: - print("%s:%d -> %s:%d (pid=%s, program=%s)" % - (c["local"][0], c["local"][1], - c["remote"][0], c["remote"][1], - c["pid"], c["program"])) diff --git a/roles/conduit/tasks/monitor_modem.yml b/roles/conduit/tasks/monitor_modem.yml index 2f3079e..865df62 100644 --- a/roles/conduit/tasks/monitor_modem.yml +++ b/roles/conduit/tasks/monitor_modem.yml @@ -52,5 +52,5 @@ notify: - Update system checksum file when: "'modem_at0' not in ansible_local.dev" - + ... diff --git a/roles/conduit/templates/monitrc.j2 b/roles/conduit/templates/monitrc.j2 index e79b514..eb419a2 100644 --- a/roles/conduit/templates/monitrc.j2 +++ b/roles/conduit/templates/monitrc.j2 @@ -97,6 +97,14 @@ check process conduit_leds PIDFILE /var/run/conduit_leds.pid stop program = "/etc/init.d/conduit_leds stop" {% endif -%} +{% if 'modem_at0' in ansible_facts['ansible_local'].dev -%} +# Monitor monitor_modem +check process monitor_modem PIDFILE /var/run/monitor_modem.pid + if does not exist for {{ monit_process_period }} then restart + start program = "/etc/init.d/monitor_modem start" with timeout 15 seconds + stop program = "/etc/init.d/monitor_modem stop" +{% endif -%} + # Monitor system directory checksums check program check_system_md5 with path /usr/local/lib/check_system_md5 every "{{ 60 | random(seed=inventory_hostname) }} * * * *" From d9ee533d89eea261312a82c5b6e3f27920b43985 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 16 Nov 2025 09:21:19 -0500 Subject: [PATCH 07/42] check_leds: Fix error with duplicate processes And make initd script status informative --- roles/conduit/files/conduit_leds.initd | 15 ++++++++++++--- roles/conduit/files/conduit_leds.py | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/roles/conduit/files/conduit_leds.initd b/roles/conduit/files/conduit_leds.initd index e790271..a9f793c 100644 --- a/roles/conduit/files/conduit_leds.initd +++ b/roles/conduit/files/conduit_leds.initd @@ -27,7 +27,7 @@ fi [ -x ${DAEMON} ] || exit 0 is_running() { - pgrep -F ${PIDFILE} > /dev/null + pgrep -F ${PIDFILE} > /dev/null 2>&1 } start() { @@ -53,9 +53,18 @@ case "$1" in start ;; status) - is_running + if is_running; then + echo "${NAME} is running with PID $(cat ${PIDFILE})" >&2 + else + echo "${NAME} is not running" >&2 + exit 1 + fi ;; *) - echo "Usage: $0 {start|stop|status|restart}" + echo "Usage: $0 {start|stop|status|restart}" >&2 + exit 2 + ;; esac + +exit 0 #========================================= diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index f1b92d2..1ef017d 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -70,7 +70,7 @@ def pidfilelock(name): if err.errno != errno.EAGAIN: raise err else: - logging.debug("Timeout trying to lock", pidfile_path) + logging.debug("Timeout trying to lock: %s", pidfile_path) time.sleep(1) time_left -= 1 if time_left == 0: From 2fccd7933b3905b928c5165c7f542e7ed00151f8 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 16 Nov 2025 09:22:45 -0500 Subject: [PATCH 08/42] Increase basic station tc_timeout on cellular --- bin/mt_station_conf | 11 +++++++++++ roles/conduit/tasks/ttn_basic_station.yml | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bin/mt_station_conf b/bin/mt_station_conf index 0c8bc8b..71a3322 100755 --- a/bin/mt_station_conf +++ b/bin/mt_station_conf @@ -109,6 +109,10 @@ def parse_args(): group.add_argument("--log-file", dest="log_file", help="Log file name") + group.add_argument("--tc-timeout", + dest="tc_timeout", + type=int, + help="Set tc_timeout value in seconds") options = parser.parse_args() if options.debug: @@ -164,6 +168,13 @@ def main(): if options.gps: data['station_conf']['gps'] = options.gps data['station_conf']['pps'] = "fuzzy" if options.fuzzy_pps else "gps" + if options.tc_timeout: + for key in data['station_conf'].keys(): + if key.lower() == "tc_timeout": + data['station_conf'][key] = "%ds" % options.tc_timeout + break + else: + data['station.conf']["tc_timeout"] = "%ds" % options.tc_timeout print(json.dumps(data, indent=4)) diff --git a/roles/conduit/tasks/ttn_basic_station.yml b/roles/conduit/tasks/ttn_basic_station.yml index c9f03c0..83f9acb 100644 --- a/roles/conduit/tasks/ttn_basic_station.yml +++ b/roles/conduit/tasks/ttn_basic_station.yml @@ -24,7 +24,7 @@ - forwader_version is defined - ansible_local.opkg.lora_basic_station is not defined or ansible_local.opkg.lora_basic_station != forwarder_version -- name: ttn_basic_station Install the desired version of lora_basic_station +- name: ttn_basic_station Install the desired version of lora_basic_station opkg: name: "lora_basic_station=={{ forwarder_version }}" state: present @@ -52,6 +52,7 @@ --log-level {{ basic_log_level }} --log-size {{ forwarder_logrotate_size }} --log-rotate {{ forwarder_logrotate_count }} + {{ '--tc-timeout 5' if use_cellular else '' }} {{ gps_arg }}" register: station_conf_raw - set_fact: From 67d798a5bb7dc526a4689aaf64395afc9e0c3f34 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 16 Nov 2025 09:23:06 -0500 Subject: [PATCH 09/42] Modify the real ppp_on_boot, not the link --- roles/conduit/tasks/ppp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/conduit/tasks/ppp.yml b/roles/conduit/tasks/ppp.yml index f650136..2633497 100644 --- a/roles/conduit/tasks/ppp.yml +++ b/roles/conduit/tasks/ppp.yml @@ -59,7 +59,7 @@ - name: ppp Set provider lineinfile: - path: /etc/ppp/ppp_on_boot + path: /var/config/ppp/ppp_on_boot regexp: '^\$PPPD call ' line: '$PPPD call {{ cellular_provider }}' notify: Start ppp From 0e14e9cbd74b62249a392575040e98316ac2a24b Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 16 Nov 2025 13:26:06 -0500 Subject: [PATCH 10/42] Fix configuration of ppp to align w/mlinux 1. Don't edit provider in /etc/ppp/ppp_on_boot, make /etc/ppp/peers/provider a symlink. 2. Fix the shebang in /etc/ppp/ppp_on_boot 3. Change perms on /etc/ppp/ppp_on_boot to enable/disable --- roles/conduit/tasks/ppp.yml | 40 +++++++++++++++------------- roles/conduit/templates/ppp.monit.j2 | 2 +- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/roles/conduit/tasks/ppp.yml b/roles/conduit/tasks/ppp.yml index 2633497..2fe29ce 100644 --- a/roles/conduit/tasks/ppp.yml +++ b/roles/conduit/tasks/ppp.yml @@ -24,23 +24,14 @@ state: link src: /var/config/ppp/ppp_on_boot force: true - when: ppp_on_boot.stat.islnk is defined and not ppp_on_boot.stat.islnk + when: + - ppp_on_boot.stat.lnk_target is not defined or ppp_on_boot.stat.lnk_target != '/var/config/ppp/ppp_on_boot' - name: ppp Make /var/config/ppp/ppp_on_boot executable when we are using cellular file: dest: /var/config/ppp/ppp_on_boot - mode: "755" + mode: "{{ '0755' if use_cellular and cellular_provider is defined else '0644' }}" notify: Start ppp - when: - - use_cellular - - cellular_provider is defined - -- name: ppp Make /var/config/ppp/ppp_on_boot not executable when we are not using cellular - file: - dest: /var/config/ppp/ppp_on_boot - mode: "644" - notify: Stop ppp - when: use_cellular == False or cellular_provider is not defined # # Set or reset APN @@ -57,15 +48,28 @@ - use_cellular - cellular_apn is defined -- name: ppp Set provider +# +# Setup PPP scripts +# + +- name: ppp Set /var/config/ppp/peers/provider + ansible.builtin.file: + path: /var/config/ppp/peers/provider + state: "{{ 'link' if use_cellular and cellular_provider is defined else 'absent' }}" + src: "/var/config/ppp/peers/{{ cellular_provider }}" + notify: Start ppp + +- name: ppp Restore default provider in /var/config/ppp/ppp_on_boot lineinfile: path: /var/config/ppp/ppp_on_boot regexp: '^\$PPPD call ' - line: '$PPPD call {{ cellular_provider }}' - notify: Start ppp - when: - - use_cellular - - cellular_provider is defined + line: '$PPPD call provider' + +- name: ppp make /var/config/ppp/ppp_on_boot a script + lineinfile: + path: /var/config/ppp/ppp_on_boot + regexp: '/bin/sh$' + line: '#!/bin/sh' # # Set PPP configuration options diff --git a/roles/conduit/templates/ppp.monit.j2 b/roles/conduit/templates/ppp.monit.j2 index 9e29936..05eec0f 100644 --- a/roles/conduit/templates/ppp.monit.j2 +++ b/roles/conduit/templates/ppp.monit.j2 @@ -2,7 +2,7 @@ check network "{{ monit_ppp_if }}" with interface "{{ monit_ppp_if }}" # if failed link then restart if changed link capacity then alert -check process pppd MATCHING "^/usr/sbin/pppd call {{ cellular_provider }}$" +check process pppd MATCHING "^/usr/sbin/pppd call provider$" if does not exist for 1 cycles then restart start program = {{ monit_ppp_start }} stop program = {{ monit_ppp_stop }} From 5794a842ef2f162b9bb99425c06e727f5d72bdfd Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 19 Nov 2025 11:03:34 -0500 Subject: [PATCH 11/42] Fix lock context manager to not overwrite pid file --- roles/conduit/files/conduit_leds.py | 50 ++++++++++++++++------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index 1ef017d..b25b0a1 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -55,26 +55,29 @@ def __str__(self): def pidfilelock(name): """ Context to lock a pid file """ - time_left = 30 + time_end = time.clock() + 30 pidfile_path = os.path.join("/var/run", name + ".pid") - lock_file = open(pidfile_path, 'w+') + fd = os.open(pidfile_path, os.O_RDWR | os.O_CREAT, 0o644) + lock_file = os.fdopen(fd, "r+") while True: try: logging.debug("Attempting to lock %s", pidfile_path) fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) - lock_file.write(str(os.getpid()) + '\n') - lock_file.flush() - logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) - break except IOError as err: if err.errno != errno.EAGAIN: raise err - else: - logging.debug("Timeout trying to lock: %s", pidfile_path) - time.sleep(1) - time_left -= 1 - if time_left == 0: - raise LockFileTimeout("Unable to lock %s" % pidfile_path) + logging.debug("Timeout trying to lock: %s", pidfile_path) + time.sleep(1) + if time.clock() >= time_end: + raise LockFileTimeout("Unable to lock %s" % pidfile_path) + continue + else: + lock_file.seek(0) + lock_file.truncate() + lock_file.write("%d\n" % os.getpid()) + lock_file.flush() + os.fsync(fd) + logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) try: yield lock_file @@ -319,17 +322,18 @@ def check_tunnel(options): if not cached_ip: logging.info("check_tunnel: Unable to resolve %s", remote_host) return False - remote_host = cached_ip - logging.info("check_tunnel: Using cached IP %s", remote_host) - - for conn in psutil.net_connections(): - if conn.type == socket.SOCK_STREAM and conn.status == psutil.CONN_ESTABLISHED and conn.raddr == (remote_ip, local_port): - logging.info("check_tunnel: Found connection to %s(%s):%s with PID %d", - remote_host, - remote_ip, - local_port, - conn.pid) - return True + remote_ip = cached_ip + logging.info("check_tunnel: Using cached IP %s", remote_ip) + + if remote_ip: + for conn in psutil.net_connections(): + if conn.type == socket.SOCK_STREAM and conn.status == psutil.CONN_ESTABLISHED and conn.raddr == (remote_ip, local_port): + logging.info("check_tunnel: Found connection to %s(%s):%s with PID %d", + remote_host, + remote_ip, + local_port, + conn.pid) + return True logging.info("check_tunnel: No connection found to %s(%s):%s", remote_host, remote_ip, local_port) return False From 3b70f8426ad744e972f361ab07542ce84f0ee616 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 19 Nov 2025 11:04:38 -0500 Subject: [PATCH 12/42] Lint --- roles/conduit/handlers/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/conduit/handlers/main.yml b/roles/conduit/handlers/main.yml index 6895a74..208eb76 100644 --- a/roles/conduit/handlers/main.yml +++ b/roles/conduit/handlers/main.yml @@ -6,7 +6,7 @@ # - name: update rc shell: "for init in ttn-pkt-forwarder lora-basic-station; do update-rc.d -f ${init} remove; test -f /etc/init.d/${init} && update-rc.d ${init} defaults 95 30 || true; done" - + # # Restart the packet forwarder # @@ -37,7 +37,7 @@ debug: msg: "Please go to https://console.thethingsnetwork.org/gateways and update the antenna Altitude and Placement your gateways. The - API does not allow these parameters to be set" + API does not allow these parameters to be set" run_once: true # @@ -73,7 +73,7 @@ ignore_errors: true # -# Remind +# Remind # - name: interface reboot debug: From 96c5e1aa878a02dfa0bcc8dc52f868595eea4773 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 19 Nov 2025 11:05:50 -0500 Subject: [PATCH 13/42] Have autossh check status more often --- roles/conduit/defaults/main.yml | 2 ++ roles/conduit/files/conduit_leds.py | 5 +++-- roles/conduit/templates/ssh_tunnel.j2 | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/roles/conduit/defaults/main.yml b/roles/conduit/defaults/main.yml index 3555408..a27b589 100644 --- a/roles/conduit/defaults/main.yml +++ b/roles/conduit/defaults/main.yml @@ -90,6 +90,8 @@ ssh_tunnel_ssh_key: /etc/ssh/ssh_host_rsa_key ssh_tunnel_daemon: /usr/bin/autossh ssh_tunnel_ssh_port: 22 ssh_tunnel_base_port: 0 +ssh_tunnel_first_poll: 120 +ssh_tunnel_poll: 60 # Static Nameserver defaults resolv_conf_static: /var/config/network/resolv.conf-static diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index b25b0a1..f2f5b39 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -55,7 +55,7 @@ def __str__(self): def pidfilelock(name): """ Context to lock a pid file """ - time_end = time.clock() + 30 + time_end = time.time() + 30 pidfile_path = os.path.join("/var/run", name + ".pid") fd = os.open(pidfile_path, os.O_RDWR | os.O_CREAT, 0o644) lock_file = os.fdopen(fd, "r+") @@ -68,7 +68,7 @@ def pidfilelock(name): raise err logging.debug("Timeout trying to lock: %s", pidfile_path) time.sleep(1) - if time.clock() >= time_end: + if time.time() >= time_end: raise LockFileTimeout("Unable to lock %s" % pidfile_path) continue else: @@ -78,6 +78,7 @@ def pidfilelock(name): lock_file.flush() os.fsync(fd) logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) + break try: yield lock_file diff --git a/roles/conduit/templates/ssh_tunnel.j2 b/roles/conduit/templates/ssh_tunnel.j2 index 0b8bb33..0d53b54 100644 --- a/roles/conduit/templates/ssh_tunnel.j2 +++ b/roles/conduit/templates/ssh_tunnel.j2 @@ -22,3 +22,5 @@ SSH_KEY={{ ssh_tunnel_ssh_key }} SSH_PORT={{ ssh_tunnel_ssh_port }} {% endif %} DAEMON_ARGS="{{ ssh_tunnel_daemon_args }}" +AUTOSSH_POLL={{ ssh_tunnel_poll }} +AUTOSSH_FIRST_POLL={{ ssh_tunnel_first_poll }} From 941700dd859b9085948cd545d50dc0b5e25392c6 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 10:21:41 -0500 Subject: [PATCH 14/42] Pass AUTOSSH_ environment settings to autossh --- roles/conduit/files/ssh_tunnel.initd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/roles/conduit/files/ssh_tunnel.initd b/roles/conduit/files/ssh_tunnel.initd index d8b6f4f..b28d4ad 100644 --- a/roles/conduit/files/ssh_tunnel.initd +++ b/roles/conduit/files/ssh_tunnel.initd @@ -47,6 +47,9 @@ is_running() { } start() { + for env in $(set | grep -E '^AUTOSSH_'); do + eval export ${env} + done start-stop-daemon --start --quiet --exec ${DAEMON} -- ${DAEMON_ARGS} } From f08eff45f2913f9ab57fd0860e199a77e9bd2060 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 10:22:14 -0500 Subject: [PATCH 15/42] Do not need do_restart hack from ssh_tunnel --- roles/conduit/files/monitor_modem.initd | 3 --- 1 file changed, 3 deletions(-) diff --git a/roles/conduit/files/monitor_modem.initd b/roles/conduit/files/monitor_modem.initd index bfa8443..6f83a33 100644 --- a/roles/conduit/files/monitor_modem.initd +++ b/roles/conduit/files/monitor_modem.initd @@ -46,9 +46,6 @@ case "$1" in stop ;; restart|reload) - nohup ${0} do_restart - ;; - do_restart) stop start ;; From 5ffb9dbac52d8acbee15744e9aeb7087bedf1521 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 10:22:47 -0500 Subject: [PATCH 16/42] Do not error on calls w/o env variables --- roles/conduit/files/ifup_restart | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/conduit/files/ifup_restart b/roles/conduit/files/ifup_restart index c6e2031..bc2c9b1 100644 --- a/roles/conduit/files/ifup_restart +++ b/roles/conduit/files/ifup_restart @@ -2,9 +2,9 @@ # Restart services when we get an address -[ $METHOD = "dhcp" ] && exit 0 +[ "${METHOD}" = "dhcp" ] && exit 0 -logger -s -t $(basename ${0}) -p daemon.info "$METHOD interface $IFACE is up, restarting services" +logger -s -t $(basename ${0}) -p daemon.info "${METHOD} interface ${IFACE} is up, restarting services" test -x /etc/init.d/ttn-pkt-forwarder && /etc/init.d/ttn-pkt-forwarder restart test -x /etc/init.d/lora-basic-station && /etc/init.d/lora-basic-station restart test -x /etc/init.d/ssh_tunnel && /etc/init.d/ssh_tunnel restart From 326dec7342083a642bb234f49fccba1aa219ff15 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 15:27:03 -0500 Subject: [PATCH 17/42] Remove extension from progname Which will prevent two copies from running if testing --- roles/conduit/files/conduit_leds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index f2f5b39..d471847 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -455,7 +455,7 @@ def init_logging(options): def main(): """It all happens here""" - progname = os.path.basename(sys.argv[0]) + progname = os.path.splitext(os.path.basename(sys.argv[0]))[0] options = parse_args() From a26cbdc6b2084cdcf77a95701731a3ef6abe44d0 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 15:28:41 -0500 Subject: [PATCH 18/42] Rewrite At most every 3 hours, check broadcast interfaces to see if they have connectivity. This is done by brining ppp0 down if any broadcast interfaces have link up and addresses assigned. Restart services when broadcast interface link transitions to up. Restart just ssh_tunnel if connection is not from an active interface. Should work if we have both WiFi and Ethernet. --- roles/conduit/files/monitor_modem.py | 622 +++++++++++++++++++-------- 1 file changed, 438 insertions(+), 184 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index dcf324c..c59d319 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -28,15 +28,15 @@ import array import argparse -import binascii from contextlib import contextmanager import errno import fcntl -import ipaddress import logging from logging.handlers import SysLogHandler import os +import psutil import select +import signal import socket import stat import struct @@ -58,30 +58,48 @@ def __init__(self, error): def __str__(self): return repr(self.value) +class DNSTimeout(Exception): + def __init__(self, error): + self.value = error + def __str__(self): + return repr(self.value) + +# Global flag to indicate shutdown +shutdown_requested = False + +def catch_interrupt(signum, frame): + global shutdown_requested + logging.warning("Received signal %s, initiating shutdown.", signum) + shutdown_requested = True + @contextmanager def pidfilelock(name): """ Context to lock a pid file """ - time_left = 30 + time_end = time.time() + 30 pidfile_path = os.path.join("/var/run", name + ".pid") - lock_file = open(pidfile_path, 'w+') + fd = os.open(pidfile_path, os.O_RDWR | os.O_CREAT, 0o644) + lock_file = os.fdopen(fd, "r+") while True: try: logging.debug("Attempting to lock %s", pidfile_path) fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) - lock_file.write(str(os.getpid()) + '\n') - lock_file.flush() - logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) - break except IOError as err: if err.errno != errno.EAGAIN: raise err - else: - logging.debug("Timeout trying to lock", pidfile_path) - time.sleep(1) - time_left -= 1 - if time_left == 0: - raise LockFileTimeout("Unable to lock %s" % pidfile_path) + logging.debug("Timeout trying to lock: %s", pidfile_path) + time.sleep(1) + if shutdown_requested or time.time() >= time_end: + raise LockFileTimeout("Unable to lock %s" % pidfile_path) + continue + else: + lock_file.seek(0) + lock_file.truncate() + lock_file.write("%d\n" % os.getpid()) + lock_file.flush() + os.fsync(fd) + logging.debug("Wrote %d to %s", os.getpid(), pidfile_path) + break try: yield lock_file @@ -179,6 +197,13 @@ def parse_args(): group.add_argument("--ppp-on-boot", default="/etc/ppp/ppp_on_boot", help="Where system looks for ppp startup script") + group.add_argument("--change-script", + default="/var/config/ifup_restart", + help="Script to run when status changes") + group.add_argument("--ignore-link-time", + default=60*60*3, + type=int, + help="How often to retry broadcase interfaces if they did not work when we tried them") # Parse args options = parser.parse_args() @@ -213,6 +238,9 @@ def init_logging(options): else: logger.setLevel('WARNING') +# Example usage: +# bytes_sent = sendmsg_with_pktinfo(sock, packet, "8.8.8.8", interface="eth0") +# If you prefer to supply interface index directly, you can call _ifname_to_index("eth0") yourself. if struct.pack("H",1) == "\x00\x01": # big endian def checksum(pkt): if len(pkt) % 2 == 1: @@ -232,7 +260,32 @@ def checksum(pkt): s = ~s return (((s>>8)&0xff)|s<<8) & 0xffff -def icmp_echo(dst_ip, interface = None, payload = b'hello', id_ = None, seq = 1): +def resolve_with_timeout(hostname, timeout=5): + def handler(signum, frame): + raise DNSTimeout("Timeout during name resolution") + + old_handler = signal.signal(signal.SIGALRM, handler) + signal.alarm(timeout) # seconds + + ip_addr = None + try: + ip_addr = socket.getaddrinfo(hostname, None)[0][4][0] + except DNSTimeout as error: + logging.error("resolve_with_timeout: Timeout resolving: %s: %s", hostname, error) + except socket.gaierror as error: + logging.error("resolve_with_timeout: error resolving %s: %s", hostname, error) + else: + logging.debug("resolve_with_timeout: %s -> %s", hostname, ip_addr) + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + + return ip_addr + +def icmp_echo(dst_name, interface=None, payload=b'hello', id_=None, seq=1): + + logging.debug("icmp_echo(%s, interface=%s, id=%s, seq=%d)", dst_name, interface, id_, seq) + if id_ is None: id_ = os.getpid() & 0xFFFF @@ -244,16 +297,28 @@ def icmp_echo(dst_ip, interface = None, payload = b'hello', id_ = None, seq = 1) sock.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, interface.encode() + b'\0') # Build ICMP echo request header: type(8)=echo request, code=0, checksum, id, seq - icmp_type = 8 - icmp_code = 0 - header = struct.pack('!BBHHH', icmp_type, icmp_code, 0, id_, seq) + ICMP_TYPE = 8 + ICMP_CODE = 0 + header = struct.pack('!BBHHH', ICMP_TYPE, ICMP_CODE, 0, id_, seq) packet = header + payload chksum = checksum(packet) - header = struct.pack('!BBHHH', icmp_type, icmp_code, chksum, id_, seq) + header = struct.pack('!BBHHH', ICMP_TYPE, ICMP_CODE, chksum, id_, seq) packet = header + payload + try: + dst_ip = resolve_with_timeout(dst_name, timeout=1) + except DNSTimeout: + return False + else: + if dst_ip is None: + return False + t0 = time.time() - sock.sendto(packet, (dst_ip, 0)) + try: + sock.sendto(packet, (dst_ip, 0)) + except socket.gaierror as error: + logging.error("sendto error: %s", error) + return False deadline = t0 + 10.0 while True: @@ -261,125 +326,170 @@ def icmp_echo(dst_ip, interface = None, payload = b'hello', id_ = None, seq = 1) if remaining <= 0: return False - # Ignore errno 4 + # Wait for the socket to be ready try: ready, _, _ = select.select([sock], [], [], remaining) if not ready: - logging.debug("TIMEOUT") + logging.debug("icmp_echo: timeout") return False except (IOError, OSError): continue - recv_packet, addr = sock.recvfrom(65535) + # Read pending packets + while True: + try: + recv_packet, addr = sock.recvfrom(65535, socket.MSG_DONTWAIT) + except (OSError, IOError): + return False - iph_len = (struct.unpack("!B", recv_packet[:1])[0] & 0xf) * 4 - icmp_packet = recv_packet[iph_len:] - if len(icmp_packet) < 8: - continue - r_type, r_code, r_chksum, r_id, r_seq = struct.unpack("!BBHHH", icmp_packet[:8]) - logging.debug("RECV type %d code %d id %d seq %d", r_type, r_code, r_id, r_seq) + iph_len = (struct.unpack("!B", recv_packet[:1])[0] & 0xf) * 4 + icmp_packet = recv_packet[iph_len:] + if len(icmp_packet) < 8: + continue + r_type, r_code, r_chksum, r_id, r_seq = struct.unpack("!BBHHH", icmp_packet[:8]) + logging.debug("RECV type %d code %d id %d seq %d", r_type, r_code, r_id, r_seq) - if r_type == 0 and r_id == id_ and r_seq == seq: - return True + if r_type == 0 and r_id == id_ and r_seq == seq: + return True return False -def set_rpfilter(options, value, interfaces): - """ Set values of rp_filter on the specified interface(s) """ +def get_ppp_addresses(): + """ + Returns IP addresses of ppp interfaces + """ + + result = set() - for interface in interfaces: - if not interface: + # Get interface addresses and stats + addrs = psutil.net_if_addrs() + stats = psutil.net_if_stats() + + for iface, iface_addrs in addrs.items(): + if iface != 'ppp0': + continue + iface_stat = stats.get(iface) + if not iface_stat: continue - try: - with open("/proc/sys/net/ipv4/conf/%s/rp_filter" % interface, "w") as fp: - fp.write(str(value)) - except FileNotFoundError: - pass -class Route(object): - """ A routing table entry """ + # Skip interfaces that are down + if not iface_stat.isup: + continue - def __init__(self, header, parts): + # Check for IPv4 with broadcast + for addr in iface_addrs: + if addr.family == 2: # AF_INET (IPv4) + if addr.address and addr.ptp: + result.add(addr.address) + + return result + +def get_broadcast_interfaces(): + """ + Returns a list of interface names that: + - Are up (`isup` flag) + - Have an IPv4 address assigned + - Have a broadcast address assigned + - Have carrier detected (physical link up for Ethernet) + """ + result = [] + + # Get interface addresses and stats + addrs = psutil.net_if_addrs() + stats = psutil.net_if_stats() + + for iface, iface_addrs in addrs.items(): + iface_stat = stats.get(iface) + if not iface_stat: + continue - self._parts = {} + # Skip interfaces that are down + if not iface_stat.isup: + continue - for key, value in zip(header, parts): - if key == 'Iface': - self.__setattr__(key, value) - elif key in ['Destination', 'Gateway', 'Mask']: - self.__setattr__(key, ipaddress.ip_address(binascii.unhexlify(value)[::-1])) - elif key == 'Flags': - self.__setattr__(key, int(value, 16)) - else: - self.__setattr__(key, int(value)) + # Check for IPv4 with broadcast + iface_address = None + for addr in iface_addrs: + if addr.family == 2: # AF_INET (IPv4) + if addr.address and addr.broadcast: + iface_address = addr.address + break + if not iface_address: + continue - def __repr__(self): - return self.__str__() + # Check carrier + carrier_file = "/sys/class/net/{}/carrier".format(iface) + try: + with open(carrier_file, 'r') as f: + carrier = f.read().strip() + if carrier != '1': + continue + except IOError: + # If the file doesn't exist, assume link is up (virtual interface) + pass - def __str__(self): - return "Iface: %s Destination %s Gateway %s Flags %x RefCnt %d Use %d Metric %d Mask %s MTU %d Window %d IRTT %d" % ( - self.Iface, - self.Destination, - self.Gateway, - self.Flags, - self.RefCnt, - self.Use, - self.Metric, - self.Mask, - self.MTU, - self.Window, - self.IRTT) - -def read_routes(options): - """ Read the routing table """ - - _rt = [] - - with open("/proc/net/route", "r") as fp: - header = [] - for line in fp: - if len(line.strip()) == 0: - continue - parts = line.split() - if not header: - header = parts - continue - route = Route(header, parts) - _rt.append(route) + # Passed all checks + result.append((iface, iface_address)) - return _rt + return result def ppp_on_boot(options, enable): """ Link or unlink system ppp startup script """ try: - link_target = os.readlink(options.ppp_on_boot) + ppp_on_boot_stat = os.stat(options.ppp_on_boot) except (OSError, IOError): - link_target = None + logging.error("Unable to get stat info about %s", options.ppp_on_boot) + return - logging.debug("ppp_on_boot(%s): %s -> %s", enable, options.ppp_on_boot, link_target) + logging.debug("ppp_on_boot(%s): %s -> %o", enable, options.ppp_on_boot, ppp_on_boot_stat.st_mode) if enable: - if link_target and link_target != options.real_ppp_on_boot: + if ppp_on_boot_stat.st_mode & 0o111 != 0o111: try: - os.unlink(options.ppp_on_boot) - os.symlink(options.real_ppp_on_boot, options.ppp_on_boot) - logging.debug("ppp_on_boot: %s linked", options.ppp_on_boot) + os.chmod(options.ppp_on_boot, 0o755) + logging.info("ppp_on_boot: %s set to executable", options.ppp_on_boot) except OSError as error: - logging.error("Error linking %s -> %s", + logging.error("Error making %s executable: %s", options.ppp_on_boot, - options.real_ppp_on_boot, error) - return + return - if link_target: + if ppp_on_boot_stat.st_mode & 0o111 != 0: try: - os.unlink(options.ppp_on_boot) - logging.debug("ppp_on_boot: %s un-linked", options.ppp_on_boot) + os.chmod(options.ppp_on_boot, 0o644) + logging.info("ppp_on_boot: %s set to non-executable", options.ppp_on_boot) except OSError as error: - logging.error("Error un-linking %s", - options.ppp_on_boot, - error) + logging.error("Error making %s non-executable: %s", + options.ppp_on_boot, + error) + +def tunnel_addresses(options): + """ Return local addresses of all established tunnel connections """ + + addresses = set() + + try: + dst_ip = resolve_with_timeout(options.hostname, timeout=1) + except DNSTimeout: + return addresses + else: + if dst_ip is None: + return addresses + + for conn in psutil.net_connections('inet4'): + if conn.type != socket.SOCK_STREAM: + continue + if not conn.raddr: + continue + if conn.raddr.port != 22 or conn.raddr.ip != dst_ip: + continue + if conn.status != 'ESTABLISHED': + continue + + addresses.add(conn.laddr.ip) + + return addresses def check_modem(options): """ Run a set of checks """ @@ -389,15 +499,15 @@ def check_modem(options): modem_stat = os.stat(options.modem) if stat.S_ISCHR(modem_stat.st_mode): have_modem = True - except OSError: - pass + except OSError as error: + logging.error("Unable to stat %s: %s", options.modem, error) have_sim = False if have_modem: cmd = ["radio-cmd", "AT+CPIN?"] try: output = subprocess.check_output(cmd) - logging.info("check_modem: %s returned: %s", " ".join(cmd), output) + logging.debug("check_modem: %s returned: %s", " ".join(cmd), output) except subprocess.CalledProcessError as error: logging.debug("check_modem: %s returned: %s", " ".join(cmd), error) if "+CPIN: READY" in output: @@ -411,10 +521,8 @@ def pppd(options, enable): logging.debug("pppd(%s)", enable) - ppp_on_boot(options, enable) - try: - subprocess.check_call(["pidof", "pppd"]) + subprocess.check_output(["pidof", "pppd"], stderr=subprocess.STDOUT) logging.debug("pppd is running") ppp_is_running = True except subprocess.CalledProcessError: @@ -423,62 +531,247 @@ def pppd(options, enable): if enable: if not ppp_is_running: + cmd = ["/etc/init.d/ppp", "start"] try: - logging.debug("Starting %s", options.ppp_on_boot) - subprocess.check_call([options.ppp_on_boot]) + logging.info("Running: %s", " ".join(cmd)) + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as error: - logging.error("Starting %s: %s", options.ppp_on_boot, error) - return + logging.error("%s: %s", " ".join(cmd), error) + return False + else: + logging.debug("%s: %s", " ".join(cmd), result.strip()) + + for service in [ 'ppp0', 'pppd']: + cmd = ["/usr/bin/monit", "monitor", service] + try: + logging.info("Running: %s", " ".join(cmd)) + result = subprocess.check_output(["monit", "monitor", service], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + else: + logging.debug("%s: %s", " ".join(cmd), result.strip()) - for service in [ 'ppp0', 'pppd']: - try: - logging.debug("Monitoring %s", service) - subprocess.check_call(["monit", "monitor", service]) - except subprocess.CalledProcessError as error: - logging.error("Monitoring %s: %s", service, error) + return True - return + return False if ppp_is_running: cmd = ["/etc/init.d/ppp", "stop"] try: - subprocess.check_call(cmd) + logging.info("Running: %s", " ".join(cmd)) + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as error: logging.error("%s: %s", " ".join(cmd), error) - pass + else: + logging.debug("%s: %s", " ".join(cmd), result.strip()) for service in [ 'ppp0', 'pppd']: cmd = ["monit", "unmonitor", service] try: - logging.debug("Unonitoring %s", service) - subprocess.check_call(cmd) + logging.info("Running: %s", " ".join(cmd)) + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as error: logging.error("%s: %s", " ".join(cmd), error) - pass + else: + logging.debug("%s: %s", " ".join(cmd), result.strip()) -def find_default_interface(options): - """ Read the routing table and figure out if we have a non-ppp - interface with a default route. This will tell us which is the - primary interface. """ + return True - rt = read_routes(options) - for route in rt: - if route.Flags & 0x3 != 0x3: - continue - if str(route.Destination) != "0.0.0.0": - continue - if route.Iface == "ppp0": - continue - logging.info("Using a default interface of %s", route.Iface) - return route.Iface + return False + +class IfState(object): + """ Store IF State """ + def __init__(self, name, address): + self.name = name + self.address = address + self.ignore_time = time.time() + self.link_state = True + self.seen = False + self.seq = -1 + self.responding = False + + def __str__(self): + return "%s: %s ignore: %f state: %s, seen: %s, seq: %d, responding: %s" % ( + self.name, + self.address, + self.ignore_time - time.time(), + self.link_state, + self.seen, + self.seq, + self.responding) + +def get_interface_for_dest(dest_ip): + """ + Return the outgoing interface name that the kernel would use to + reach the given destination IP. Uses 'ip route get'. + """ + + try: + output = subprocess.check_output( + ["ip", "-4", "route", "get", dest_ip], + stderr=subprocess.STDOUT + ).strip() + except subprocess.CalledProcessError: + return None + + # Example outputs: + # "8.8.8.8 via 192.168.1.1 dev eth0 src 192.168.1.10" + # "192.168.1.50 dev eth0 src 192.168.1.10" + # "local 192.168.1.100 dev lo src 192.168.1.100" + + parts = output.split() + + # The interface always follows "dev" + if "dev" in parts: + idx = parts.index("dev") + if idx + 1 < len(parts): + return parts[idx + 1] - logging.info("Unable to find a non-ppp interface with a default route") return None +def process_interface(options, if_state): + + logging.debug("process_interface(%s)", if_state) + + if_state.responding = False + + # Test ping response of default_interface + # Seq is a unsigned 16 bit integer + responses = 0 + for ping in range(options.pings): + if_state.seq = if_state.seq + 1 if if_state.seq < 65535 else 0 + logging.debug("send_icmp (seq %d) via %s", if_state.seq, if_state.name) + if icmp_echo(options.hostname, interface=if_state.name, seq=if_state.seq): + responses += 1 + time.sleep(.1) + if shutdown_requested: + if_state.responding = False + return + + # Call it good if we get 80% of our pings back + if responses >= float(options.pings) * 0.80: + logging.info("Received response on %s, pppd not needed", if_state.name) + if_state.responding = True + return + + return + +def process(options, progname): + """ runs tests in a loop """ + + if_states = {} + while time.sleep(options.interval) is None: + global shutdown_requested + if shutdown_requested: + return + + logging.debug("check_modem") + + have_modem, have_sim = check_modem(options) + if not have_modem or not have_sim: + logging.info("No Modem or SIM, stopping pppd") + ppp_on_boot(options, False) + pppd(options, False) + continue + + # If we have a modem and sim, ensure ppp_on_boot is enabled + ppp_on_boot(options, True) + + # Mark interfaces as not seen + for if_name, if_state in if_states.items(): + if_state.seen = False + + ppp_new_state = True + do_restart = False + for if_name, if_address in get_broadcast_interfaces(): + if_state = if_states.setdefault(if_name, IfState(if_name, if_address)) + + logging.debug("looking at %s", if_state) + + # Mark as seen + if_state.seen = True + + # Check if we are supposed to be ignoring this link + if if_state.ignore_time > time.time(): + logging.info("%s: ignoring", if_name) + continue + + # Does the default route point here? + default_if_name = get_interface_for_dest("1.1.1.1") + if default_if_name == "ppp0": + # No, tell ppp to stop + ppp_new_state = False + logging.info("%s: up, telling ppp to stop", if_name) + continue + if default_if_name != if_name: + # Not at us, continue + logging.info("%s: up, not default", if_name) + continue + + # It's us, try pinging + was_responding = if_state.responding + process_interface(options, if_state) + if shutdown_requested: + return + + if if_state.responding: + ppp_new_state = False + logging.info("%s: responding", if_name) + + # Restarte if it's now responding + if not was_responding: + do_restart = True + continue + + # Not responding, ignore it for a while + logging.info("%s: not responding, ignoring", if_name) + if_state.ignore_time = time.time() + options.ignore_link_time + + # Mark current link state + active_addresses = set() + active_ifs = set() + for if_name, if_state in if_states.items(): + if_state.link_state = if_state.seen + if if_state.responding: + active_addresses.add(if_state.address) + active_ifs.add(if_name) + + # Ensure pppd is in the correct state + pppd(options, ppp_new_state) + + if do_restart: + cmd = [options.change_script] + env = os.environ.copy() + env["METHOD"] = "monitor_modem" + if active_ifs: + env["IFACE"] = ", ".join(list(active_ifs)) + try: + logging.warning("Running %s", " ".join(cmd)) + subprocess.check_call(cmd, env=env) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + else: + if ppp_new_state: + active_addresses = get_ppp_addresses() + + if active_addresses: + tunnel_addrs = tunnel_addresses(options) + logging.debug("Checkting that tunnel sources %s is in %s", + " ".join(list(tunnel_addrs)), + " ".join(list(active_addresses))) + if not active_addresses.intersection(tunnel_addrs): + # No tunnel connections from an active interface + cmd = ["/etc/init.d/ssh_tunnel", "restart"] + try: + logging.warning("Running %s", " ".join(cmd)) + subprocess.check_call(cmd) + except subprocess.CalledProcessError as error: + logging.error("%s: %s", " ".join(cmd), error) + def main(): """It all happens here""" - progname = os.path.basename(sys.argv[0]) + progname = os.path.splitext(os.path.basename(sys.argv[0]))[0] options = parse_args() @@ -486,52 +779,13 @@ def main(): if not daemonize(): return 1 + # Register signal handlers once (in your main code) + signal.signal(signal.SIGTERM, catch_interrupt) + signal.signal(signal.SIGINT, catch_interrupt) + try: with pidfilelock(progname): - default_interface = find_default_interface(options) - - # Set rp_filter to allow RFC3704 Losse Reverse Path Each so we can - # receive pings that are not from the expected interface - set_rpfilter(options, 2, ["all", "default", default_interface, "ppp0"]) - - seq = -1 - while time.sleep(options.interval) is None: - logging.debug("check_modem") - - new_default_interface = find_default_interface(options) - if default_interface and new_default_interface != default_interface: - set_rpfilter(options, 2, [new_default_interface]) - - default_interface = new_default_interface - if not default_interface: - logging.warning("No default interface, starting pppd") - pppd(options, True) - continue - - have_modem, have_sim = check_modem(options) - if not have_modem or not have_sim: - logging.warning("No Modem or SIM, stopping pppd") - pppd(options, False) - continue - - # Test ping response of default_interface - # Seq is a unsigned 16 bit integer - responses = 0 - for ping in range(options.pings): - seq = seq + 1 if seq < 65535 else 0 - logging.debug("send_icmp (seq %d) via %s", seq, default_interface) - if icmp_echo(options.hostname, interface=default_interface, seq=seq): - responses += 1 - time.sleep(.1) - # Call it good if we get 80% of our pings back - if responses >= float(options.pings) * 0.80: - logging.warning("Received response on %s, stopping pppd", default_interface) - pppd(options, False) - continue - - logging.warning("No response received on %s, starting pppd", default_interface) - pppd(options, True) - continue + process(options, progname) except LockFileTimeout: logging.critical("Another instance of %s is running", progname) return 1 From 032175041f848021f8fd2b781495062992e5fca5 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 15:30:49 -0500 Subject: [PATCH 19/42] Improve check for ppp running --- roles/conduit/templates/ppp.monit.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/conduit/templates/ppp.monit.j2 b/roles/conduit/templates/ppp.monit.j2 index 05eec0f..bb8eb28 100644 --- a/roles/conduit/templates/ppp.monit.j2 +++ b/roles/conduit/templates/ppp.monit.j2 @@ -1,8 +1,8 @@ check network "{{ monit_ppp_if }}" with interface "{{ monit_ppp_if }}" -# if failed link then restart + if link up then exec "/usr/bin/env METHOD=monit IFACE=ppp0 /var/config/ifup_restart" if changed link capacity then alert -check process pppd MATCHING "^/usr/sbin/pppd call provider$" +check process pppd MATCHING "^(/usr/sbin/)?pppd call provider$" if does not exist for 1 cycles then restart start program = {{ monit_ppp_start }} stop program = {{ monit_ppp_stop }} From fb69bda02fa97e43723096a6a99ba5ce04ffcaa8 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 15:31:23 -0500 Subject: [PATCH 20/42] Summarize failed sections at end Not all sections are supported, yet --- roles/conduit/handlers/main.yml | 7 ++++ roles/conduit/tasks/main.yml | 57 ++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/roles/conduit/handlers/main.yml b/roles/conduit/handlers/main.yml index 208eb76..8567f85 100644 --- a/roles/conduit/handlers/main.yml +++ b/roles/conduit/handlers/main.yml @@ -124,3 +124,10 @@ - name: Update system checksum file include_tasks: update_checksums.yml + +# +# Summary +# +- name: Rescued sections + ansible.builtin.debug: + msg: "Failed sections: {{ rescued | join(', ') }}" diff --git a/roles/conduit/tasks/main.yml b/roles/conduit/tasks/main.yml index 2abdfd2..ea29384 100644 --- a/roles/conduit/tasks/main.yml +++ b/roles/conduit/tasks/main.yml @@ -1,5 +1,14 @@ --- +# +# +# +- name: main Create list of failed sections + ansible.builtin.set_fact: + rescued: [] + tags: + - always + # # Build the list of authorized keys # @@ -177,6 +186,10 @@ rescue: - debug: msg: "main: TTN setup failed, continuing" + - ansible.builtin.set_fact: + rescued: "{{ rescued + ['ttn'] }}" + changed_when: true + notify: Rescued sections tags: - ttn @@ -189,26 +202,62 @@ rescue: - debug: msg: "main: SSH tunnel setup failed, continuing..." + - ansible.builtin.set_fact: + rescued: "{{ rescued + ['ssh_tunnel'] }}" + notify: Rescued sections tags: - ssh_tunnel - monit - setup # -# Secure ssh +# Enable status leds # - name: main Set up conduit_leds - import_tasks: conduit_leds.yml - when: ansible_local.conduit.hw_version.startswith("MTCDT-") + block: + - name: import conduit_leds.yml + import_tasks: conduit_leds.yml + when: ansible_local.conduit.hw_version.startswith("MTCDT-") + rescue: + - debug: + msg: "main: conduit_leds setup failed, continuing..." + - ansible.builtin.set_fact: + rescued: "{{ rescued + ['conduit_leds'] }}" + notify: Rescued sections tags: - conduit_leds - setup +# +# Enable modem monitoring +# +- name: main Set up monitor_modem + block: + - name: import monitor_modem.yml + import_tasks: monitor_modem.yml + rescue: + - debug: + msg: "main: monitor_modem setup failed, continuing..." + - ansible.builtin.set_fact: + rescued: "{{ rescued + ['monitor_modem'] }}" + notify: Rescued sections + tags: + - monitor_modem + - setup + # # Set up monit # - name: main Set up moniit - import_tasks: monit.yml + block: + - name: import monit.yml + import_tasks: monit.yml + rescue: + - debug: + msg: "main: monit setup failed, continuing..." + - ansible.builtin.set_fact: + rescued: "{{ rescued + ['monit'] }}" + notify: Rescued sections tags: - monit - setup From b6e1711ec5c3eda271b79a71b03e8b06f2642040 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 21:57:25 -0500 Subject: [PATCH 21/42] Initialize logging after we daemonize. Otherwise we will fail if syslogd is not running --- roles/conduit/files/conduit_leds.py | 25 +++++++++++++++++++------ roles/conduit/files/monitor_modem.py | 22 ++++++++++++++++------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index d471847..7e44009 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -45,6 +45,11 @@ cached_ip = None +try: + FileNotFoundError +except NameError: + FileNotFoundError = IOError + class LockFileTimeout(Exception): def __init__(self, error): self.value = error @@ -292,9 +297,6 @@ def parse_args(): if options.noop: options.debug = True - # Init Logging - init_logging(options) - return options def check_tunnel(options): @@ -437,10 +439,18 @@ def init_logging(options): logger = logging.getLogger() logger.handlers = [] syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0])) - syslog_handler = SysLogHandler(address="/dev/log", - facility=SysLogHandler.LOG_DAEMON) - syslog_handler.setFormatter(logging.Formatter(syslog_format)) if not sys.stdout.isatty(): + # Repeat until syslog is available + while True: + try: + syslog_handler = SysLogHandler(address="/dev/log", + facility=SysLogHandler.LOG_DAEMON) + except FileNotFoundError as error: + print("%s" % error) + time.sleep(1) + else: + break + syslog_handler.setFormatter(logging.Formatter(syslog_format)) logger.addHandler(syslog_handler) else: logger.addHandler(logging.StreamHandler(stream=sys.stdout)) @@ -463,6 +473,9 @@ def main(): if not daemonize(): return 1 + # Do this after daemonize or we'll hang the system startup. + init_logging(options) + mtsio = MTSIO() hwversion = mtsio.read('hw-version') diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index c59d319..1e6a65e 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -212,8 +212,8 @@ def parse_args(): if options.noop: options.debug = True - # Init Logging - init_logging(options) + if options.debug: + options.verbose = True return options @@ -223,11 +223,18 @@ def init_logging(options): logger = logging.getLogger() logger.handlers = [] syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0])) - syslog_handler = SysLogHandler(address="/dev/log", - facility=SysLogHandler.LOG_DAEMON) - syslog_handler.setFormatter(logging.Formatter(syslog_format)) if not sys.stdout.isatty(): - logger.addHandler(syslog_handler) + # Repeat until syslog is available + while True: + try: + syslog_handler = SysLogHandler(address="/dev/log", + facility=SysLogHandler.LOG_DAEMON) + except FileNotFoundError: + time.sleep(1) + else: + break + syslog_handler.setFormatter(logging.Formatter(syslog_format)) + logger.addHandler(syslog_handler) else: logger.addHandler(logging.StreamHandler(stream=sys.stdout)) @@ -779,6 +786,9 @@ def main(): if not daemonize(): return 1 + # Do this after daemonize or we'll hang the system startup. + init_logging(options) + # Register signal handlers once (in your main code) signal.signal(signal.SIGTERM, catch_interrupt) signal.signal(signal.SIGINT, catch_interrupt) From fda8cb93cfed64aec37983a6576227ec14560155 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Thu, 20 Nov 2025 21:58:14 -0500 Subject: [PATCH 22/42] Do not restart services when link comes up the first time Just because we just started, doesn't mean it wasn't up. --- roles/conduit/files/monitor_modem.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 1e6a65e..f5a7efe 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -595,7 +595,7 @@ def __init__(self, name, address): self.link_state = True self.seen = False self.seq = -1 - self.responding = False + self.responding = None def __str__(self): return "%s: %s ignore: %f state: %s, seen: %s, seq: %d, responding: %s" % ( @@ -652,7 +652,6 @@ def process_interface(options, if_state): responses += 1 time.sleep(.1) if shutdown_requested: - if_state.responding = False return # Call it good if we get 80% of our pings back @@ -726,7 +725,7 @@ def process(options, progname): logging.info("%s: responding", if_name) # Restarte if it's now responding - if not was_responding: + if was_responding is False: do_restart = True continue From a5739e849f5675114930f573235bb58e8088b8f4 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Fri, 21 Nov 2025 17:33:03 -0500 Subject: [PATCH 23/42] Include mlinux-version info in semtech version report --- roles/conduit/tasks/ppp.yml | 2 +- roles/conduit/templates/lora-basic-station.j2 | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/roles/conduit/tasks/ppp.yml b/roles/conduit/tasks/ppp.yml index 2fe29ce..e53117e 100644 --- a/roles/conduit/tasks/ppp.yml +++ b/roles/conduit/tasks/ppp.yml @@ -56,7 +56,7 @@ ansible.builtin.file: path: /var/config/ppp/peers/provider state: "{{ 'link' if use_cellular and cellular_provider is defined else 'absent' }}" - src: "/var/config/ppp/peers/{{ cellular_provider }}" + src: "{{ '/var/config/ppp/peers/' + cellular_provider if use_cellular and cellular_provider is defined else '' }}" notify: Start ppp - name: ppp Restore default provider in /var/config/ppp/ppp_on_boot diff --git a/roles/conduit/templates/lora-basic-station.j2 b/roles/conduit/templates/lora-basic-station.j2 index 2c5108b..3023b36 100644 --- a/roles/conduit/templates/lora-basic-station.j2 +++ b/roles/conduit/templates/lora-basic-station.j2 @@ -124,6 +124,11 @@ do_start() { test -f "${conf_dir}/${file}" && cp "${conf_dir}/${file}" "${run_dir}/1/" done + # + # copy mlinux-version info + # + echo "$(head -1 /etc/mlinux-version) - $(opkg status lora-basic-station | sed -n '/Version:/s/Version: //p')" > ${run_dir}/1/version.txt + # # reset concentrator # From 4dfc40d71828f4ce8dfbc270fefac97f049ac91d Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 24 Nov 2025 09:47:05 -0500 Subject: [PATCH 24/42] Make check_leds and monitor_modem init.d scripts more informative --- roles/conduit/files/conduit_leds.initd | 39 ++++++++++++++++++++----- roles/conduit/files/monitor_modem.initd | 36 ++++++++++++++++++++--- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/roles/conduit/files/conduit_leds.initd b/roles/conduit/files/conduit_leds.initd index a9f793c..6c1379a 100644 --- a/roles/conduit/files/conduit_leds.initd +++ b/roles/conduit/files/conduit_leds.initd @@ -40,21 +40,46 @@ stop() { case "$1" in start) - is_running || start + if is_running; then + echo "${NAME} is already running: $(pgrep -F ${PIDFILE} -a)" >&2 + exit 1 + fi + echo "Starting ${NAME}" >&2 + start ;; stop) - stop + if is_running; then + echo "Stopping ${NAME}" >&2 + stop + else + echo "${NAME} is not running" >&2 + fi ;; restart|reload) - nohup ${0} do_restart - ;; - do_restart) - stop + if is_running; then + echo "Stopping ${NAME}" >&2 + stop + else + echo "${NAME} is not running" >&2 + fi + + # Wait for it to stop + tries=10 + while is_running; do + if [ "${tries}" -eq 0 ]; then + echo "${NAME} failed to stop" >&2 + exit 1 + fi + tries=$((tries - 1)) + sleep 1 + done + + echo "Starting ${NAME}" >&2 start ;; status) if is_running; then - echo "${NAME} is running with PID $(cat ${PIDFILE})" >&2 + echo "${NAME} is running with PID $(pgrep -F ${PIDFILE} -a)" >&2 else echo "${NAME} is not running" >&2 exit 1 diff --git a/roles/conduit/files/monitor_modem.initd b/roles/conduit/files/monitor_modem.initd index 6f83a33..2d60ff9 100644 --- a/roles/conduit/files/monitor_modem.initd +++ b/roles/conduit/files/monitor_modem.initd @@ -40,18 +40,46 @@ stop() { case "$1" in start) - is_running || start + if is_running; then + echo "${NAME} is already running: $(pgrep -F ${PIDFILE} -a)" >&2 + exit 1 + fi + echo "Starting ${NAME}" >&2 + start ;; stop) - stop + if is_running; then + echo "Stopping ${NAME}" >&2 + stop + else + echo "${NAME} is not running" >&2 + fi ;; restart|reload) - stop + if is_running; then + echo "Stopping ${NAME}" >&2 + stop + else + echo "${NAME} is not running" >&2 + fi + + # Wait for it to stop + tries=10 + while is_running; do + if [ "${tries}" -eq 0 ]; then + echo "${NAME} failed to stop" >&2 + exit 1 + fi + tries=$((tries - 1)) + sleep 1 + done + + echo "Starting ${NAME}" >&2 start ;; status) if is_running; then - echo "${NAME} is running with PID $(cat ${PIDFILE})" >&2 + echo "${NAME} is running with PID $(pgrep -F ${PIDFILE} -a)" >&2 else echo "${NAME} is not running" >&2 exit 1 From def0727c6d5bf0c14f02f8586366ddff2a79dc89 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 24 Nov 2025 09:47:58 -0500 Subject: [PATCH 25/42] Log more status all the time --- roles/conduit/files/monitor_modem.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index f5a7efe..a9df6de 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -455,7 +455,7 @@ def ppp_on_boot(options, enable): if ppp_on_boot_stat.st_mode & 0o111 != 0o111: try: os.chmod(options.ppp_on_boot, 0o755) - logging.info("ppp_on_boot: %s set to executable", options.ppp_on_boot) + logging.warning("ppp_on_boot: %s set to executable", options.ppp_on_boot) except OSError as error: logging.error("Error making %s executable: %s", options.ppp_on_boot, @@ -465,7 +465,7 @@ def ppp_on_boot(options, enable): if ppp_on_boot_stat.st_mode & 0o111 != 0: try: os.chmod(options.ppp_on_boot, 0o644) - logging.info("ppp_on_boot: %s set to non-executable", options.ppp_on_boot) + logging.warning("ppp_on_boot: %s set to non-executable", options.ppp_on_boot) except OSError as error: logging.error("Error making %s non-executable: %s", options.ppp_on_boot, @@ -654,11 +654,13 @@ def process_interface(options, if_state): if shutdown_requested: return - # Call it good if we get 80% of our pings back - if responses >= float(options.pings) * 0.80: - logging.info("Received response on %s, pppd not needed", if_state.name) - if_state.responding = True - return + # Call it good if we get 80% of our pings back + if responses >= float(options.pings) * 0.80: + logging.warning("Received %d/%d responses on %s, pppd not needed", + responses, + options.pings, + if_state.name) + if_state.responding = True return @@ -675,7 +677,7 @@ def process(options, progname): have_modem, have_sim = check_modem(options) if not have_modem or not have_sim: - logging.info("No Modem or SIM, stopping pppd") + logging.warning("No Modem or SIM, stopping pppd") ppp_on_boot(options, False) pppd(options, False) continue @@ -707,7 +709,7 @@ def process(options, progname): if default_if_name == "ppp0": # No, tell ppp to stop ppp_new_state = False - logging.info("%s: up, telling ppp to stop", if_name) + logging.warning("%s: up, telling ppp to stop", if_name) continue if default_if_name != if_name: # Not at us, continue @@ -746,13 +748,14 @@ def process(options, progname): pppd(options, ppp_new_state) if do_restart: + logging.warning("Restarting services") cmd = [options.change_script] env = os.environ.copy() env["METHOD"] = "monitor_modem" if active_ifs: env["IFACE"] = ", ".join(list(active_ifs)) try: - logging.warning("Running %s", " ".join(cmd)) + logging.info("Running %s", " ".join(cmd)) subprocess.check_call(cmd, env=env) except subprocess.CalledProcessError as error: logging.error("%s: %s", " ".join(cmd), error) @@ -766,10 +769,11 @@ def process(options, progname): " ".join(list(tunnel_addrs)), " ".join(list(active_addresses))) if not active_addresses.intersection(tunnel_addrs): + logging.warning("No tunnel sources from active addresses, restarting") # No tunnel connections from an active interface cmd = ["/etc/init.d/ssh_tunnel", "restart"] try: - logging.warning("Running %s", " ".join(cmd)) + logging.info("Running %s", " ".join(cmd)) subprocess.check_call(cmd) except subprocess.CalledProcessError as error: logging.error("%s: %s", " ".join(cmd), error) From 0804b518b410be4007e4e475b297d5a9c0bcd1e6 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:07:16 -0500 Subject: [PATCH 26/42] Collect facts about mts-io f/s --- roles/conduit/files/facts.d/mts_io.fact | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 roles/conduit/files/facts.d/mts_io.fact diff --git a/roles/conduit/files/facts.d/mts_io.fact b/roles/conduit/files/facts.d/mts_io.fact new file mode 100755 index 0000000..ecfee2e --- /dev/null +++ b/roles/conduit/files/facts.d/mts_io.fact @@ -0,0 +1,15 @@ +#!/bin/bash + +cd /sys/devices/platform/mts-io || exit 1 + +devs=$(find * -type f | ( + comma= + while read dev; do + echo -n "${comma}\"${dev//-/_}\": \"$(cat ${dev} 2>/dev/null)\"" + comma=", " + done +)) + +echo '{' +echo " ${devs}" +echo '}' From 2dfe12a26a27d519b100124af17568b41478a4f7 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:08:03 -0500 Subject: [PATCH 27/42] Use mts-io/has_radio to check for the presence of cellular --- roles/conduit/tasks/monitor_modem.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/roles/conduit/tasks/monitor_modem.yml b/roles/conduit/tasks/monitor_modem.yml index 865df62..d6c7d71 100644 --- a/roles/conduit/tasks/monitor_modem.yml +++ b/roles/conduit/tasks/monitor_modem.yml @@ -10,7 +10,7 @@ notify: - restart monitor_modem - Update system checksum file - when: "'modem_at0' in ansible_local.dev" + when: ansible_local.mts_io.has_radio == "1" - name: monitor_modem /var/config/init.d/monitor_modem ansible.builtin.copy: @@ -22,7 +22,7 @@ notify: - restart monitor_modem - Update system checksum file - when: "'modem_at0' in ansible_local.dev" + when: ansible_local.mts_io.has_radio == "1" - name: monitor_modem Link /etc/init.d/monitor_modem to /var/config/init.d/monitor_modem ansible.builtin.file: @@ -33,7 +33,7 @@ notify: - restart monitor_modem - Update system checksum file - when: "'modem_at0' in ansible_local.dev" + when: ansible_local.mts_io.has_radio == "1" - name: monitor_modem Remove if a modem is not present ansible.builtin.file: @@ -45,12 +45,12 @@ - /var/config/init.d/monitor_modem notify: - Update system checksum file - when: "'modem_at0' not in ansible_local.dev" + when: ansible_local.mts_io.has_radio == "0" - name: monitor_modem Delete service if modem not present ansible.builtin.shell: "update-rc.d -f monitor_modem remove" notify: - Update system checksum file - when: "'modem_at0' not in ansible_local.dev" + when: ansible_local.mts_io.has_radio == "0" ... From 11927b0bd53b816bd04b6c76d9a7f6e1cd6064c6 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:08:37 -0500 Subject: [PATCH 28/42] Lint firmware.fact --- roles/conduit/files/facts.d/firmware.fact | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/roles/conduit/files/facts.d/firmware.fact b/roles/conduit/files/facts.d/firmware.fact index 15779b5..d94618b 100755 --- a/roles/conduit/files/facts.d/firmware.fact +++ b/roles/conduit/files/facts.d/firmware.fact @@ -2,12 +2,11 @@ import json import os -import sys if os.path.isdir("/lib/firmware"): all_files = [] for realroot, dirs, files in os.walk("/lib/firmware", topdown=True): - if realroot is "/lib/firmware": + if realroot == "/lib/firmware": root = "" else: root = realroot.replace("/lib/firmware/", "") @@ -16,7 +15,3 @@ if os.path.isdir("/lib/firmware"): if len(all_files): print(json.dumps(all_files, indent=4)) - - - - From 5617c9a95e4e5f769cee1d33f8a99b27c0ca0807 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:09:08 -0500 Subject: [PATCH 29/42] Fix logging setup (indentation) --- roles/conduit/files/monitor_modem.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index a9df6de..1081852 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -221,7 +221,6 @@ def init_logging(options): """ Set up logging """ logger = logging.getLogger() - logger.handlers = [] syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0])) if not sys.stdout.isatty(): # Repeat until syslog is available @@ -229,13 +228,16 @@ def init_logging(options): try: syslog_handler = SysLogHandler(address="/dev/log", facility=SysLogHandler.LOG_DAEMON) - except FileNotFoundError: + except FileNotFoundError as err: + logging.warning("Unable to open /dev/log: %s, waiting", err) time.sleep(1) else: break - syslog_handler.setFormatter(logging.Formatter(syslog_format)) - logger.addHandler(syslog_handler) + syslog_handler.setFormatter(logging.Formatter(syslog_format)) + logger.handlers = [] + logger.addHandler(syslog_handler) else: + logger.handlers = [] logger.addHandler(logging.StreamHandler(stream=sys.stdout)) if options.debug: From 267481f48f09b6a754d2b8f95c8fe0ea9dd18ec8 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:09:51 -0500 Subject: [PATCH 30/42] Properly catch socket errors on Python 2.7 --- roles/conduit/files/monitor_modem.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 1081852..9359f30 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -341,14 +341,24 @@ def icmp_echo(dst_name, interface=None, payload=b'hello', id_=None, seq=1): if not ready: logging.debug("icmp_echo: timeout") return False - except (IOError, OSError): + except select.error: + if shutdown_requested: + return continue # Read pending packets while True: + if shutdown_requested: + return + try: recv_packet, addr = sock.recvfrom(65535, socket.MSG_DONTWAIT) - except (OSError, IOError): + except socket.error as err: + if err[0] == errno.EINTR: + continue + if err[0] in (errno.EAGAIN, errno.EWOULDBLOCK): + return False + logging.warning("recvfrom returns: %s", err) return False iph_len = (struct.unpack("!B", recv_packet[:1])[0] & 0xf) * 4 From d43b691ebf2453d60733a875d99cac036cfbb50c Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Wed, 26 Nov 2025 11:10:35 -0500 Subject: [PATCH 31/42] Ensure we don't create empty logging handlers Probably not necessary --- roles/conduit/files/conduit_leds.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index 7e44009..c2391a2 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -437,7 +437,6 @@ def init_logging(options): """ Set up logging """ logger = logging.getLogger() - logger.handlers = [] syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0])) if not sys.stdout.isatty(): # Repeat until syslog is available @@ -445,14 +444,16 @@ def init_logging(options): try: syslog_handler = SysLogHandler(address="/dev/log", facility=SysLogHandler.LOG_DAEMON) - except FileNotFoundError as error: - print("%s" % error) + except FileNotFoundError as err: + logging.warning("Unable to open /dev/log: %s, waiting", err) time.sleep(1) else: break syslog_handler.setFormatter(logging.Formatter(syslog_format)) + logger.handlers = [] logger.addHandler(syslog_handler) else: + logger.handlers = [] logger.addHandler(logging.StreamHandler(stream=sys.stdout)) if options.debug: From 15c21af3881f880aefa6c42938e0da091a708b6e Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 30 Nov 2025 16:00:37 -0500 Subject: [PATCH 32/42] Lint --- roles/conduit/files/monitor_modem.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 9359f30..0c60bd5 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -38,7 +38,6 @@ import select import signal import socket -import stat import struct import subprocess import sys @@ -439,8 +438,8 @@ def get_broadcast_interfaces(): # Check carrier carrier_file = "/sys/class/net/{}/carrier".format(iface) try: - with open(carrier_file, 'r') as f: - carrier = f.read().strip() + with open(carrier_file, 'r') as fp: + carrier = fp.read().strip() if carrier != '1': continue except IOError: From 8851866c0abf149961aa24278615b2f37a1c0b15 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 30 Nov 2025 16:02:33 -0500 Subject: [PATCH 33/42] Use has-radio instead of checking for presense of a device --- roles/conduit/files/monitor_modem.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 0c60bd5..abe54db 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -187,9 +187,9 @@ def parse_args(): dest="foreground", default=False, action='store_true', help="Do not fork; run in foreground") - group.add_argument("--modem", - dest="modem", default="/dev/modem_at1", - help="Modem device for Cell service") + group.add_argument("--has-radio", + dest="has_radio", default="/sys/devices/platform/mts-io/has-radio", + help="Device file that indicates presence of modem") group.add_argument("--real-ppp-on-boot", default="/var/config/ppp/ppp_on_boot", help="Where to link /etc/ppp_on_boot to when enabling ppp") @@ -514,11 +514,10 @@ def check_modem(options): have_modem = False try: - modem_stat = os.stat(options.modem) - if stat.S_ISCHR(modem_stat.st_mode): - have_modem = True - except OSError as error: - logging.error("Unable to stat %s: %s", options.modem, error) + with open(options.has_radio, "r") as fp: + have_modem = fp.read().strip() == '1' + except IOError as error: + logging.error("Reading %s: %s", options.has_radio, error) have_sim = False if have_modem: From d36194fae7a804f00a50a3ec323cde8d34685ff0 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 30 Nov 2025 16:02:52 -0500 Subject: [PATCH 34/42] Fix use before set when radio-cmd fails --- roles/conduit/files/monitor_modem.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index abe54db..f76d0a3 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -525,12 +525,13 @@ def check_modem(options): try: output = subprocess.check_output(cmd) logging.debug("check_modem: %s returned: %s", " ".join(cmd), output) + if "+CPIN: READY" in output: + have_sim = True except subprocess.CalledProcessError as error: - logging.debug("check_modem: %s returned: %s", " ".join(cmd), error) - if "+CPIN: READY" in output: - have_sim = True + logging.warning("check_modem: %s returned: %s", " ".join(cmd), error) logging.debug("have_modem: %s, have_sim: %s", have_modem, have_sim) + return have_modem, have_sim def pppd(options, enable): From 2e62dac3399c6c86a065d2db1fdffe0f8da7d55d Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 30 Nov 2025 16:10:41 -0500 Subject: [PATCH 35/42] Only mark broadcast intf as failed if it fails for 5 cycles --- roles/conduit/files/monitor_modem.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index f76d0a3..230431f 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -607,6 +607,7 @@ def __init__(self, name, address): self.seen = False self.seq = -1 self.responding = None + self.missed_cycles = None def __str__(self): return "%s: %s ignore: %f state: %s, seen: %s, seq: %d, responding: %s" % ( @@ -672,6 +673,7 @@ def process_interface(options, if_state): options.pings, if_state.name) if_state.responding = True + if_state.missed_cycles = 0 return @@ -735,16 +737,23 @@ def process(options, progname): if if_state.responding: ppp_new_state = False - logging.info("%s: responding", if_name) # Restarte if it's now responding if was_responding is False: + logging.info("%s: is now responding", if_name) do_restart = True continue - # Not responding, ignore it for a while - logging.info("%s: not responding, ignoring", if_name) - if_state.ignore_time = time.time() + options.ignore_link_time + # Fail a few times before we mark it down? + + if_state.missed_cycles += 1 + if if_state.missed_cycles > 4: + # Not responding, ignore it for a while + if_state.ignore_time = time.time() + options.ignore_link_time + logging.warning("%s: not responding, ignoring until: %s", + if_name, + time.strftime("%Y-%m-%d %H:%M:%S", + time.localtime(if_state.ignore_time))) # Mark current link state active_addresses = set() From 7b19cd065a91e8edf7d498695495ec554d4632fa Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Sun, 30 Nov 2025 16:11:07 -0500 Subject: [PATCH 36/42] Improve logging --- roles/conduit/files/monitor_modem.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 230431f..b2db0b7 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -714,7 +714,10 @@ def process(options, progname): # Check if we are supposed to be ignoring this link if if_state.ignore_time > time.time(): - logging.info("%s: ignoring", if_name) + logging.warning("%s: ignoring until %s", + if_name, + time.strftime("%Y-%m-%d %H:%M:%S", + time.localtime(if_state.ignore_time))) continue # Does the default route point here? @@ -785,11 +788,11 @@ def process(options, progname): if active_addresses: tunnel_addrs = tunnel_addresses(options) - logging.debug("Checkting that tunnel sources %s is in %s", + logging.debug("Checking that tunnel sources %s is in %s", " ".join(list(tunnel_addrs)), " ".join(list(active_addresses))) if not active_addresses.intersection(tunnel_addrs): - logging.warning("No tunnel sources from active addresses, restarting") + logging.warning("No ssh_tunnel sources from active addresses, restarting") # No tunnel connections from an active interface cmd = ["/etc/init.d/ssh_tunnel", "restart"] try: @@ -812,7 +815,8 @@ def main(): # Do this after daemonize or we'll hang the system startup. init_logging(options) - # Register signal handlers once (in your main code) + logging.warning("%s: Started", progname) + signal.signal(signal.SIGTERM, catch_interrupt) signal.signal(signal.SIGINT, catch_interrupt) From c116caef4a7685a054b88cb38560d024db7dde2c Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Fri, 5 Dec 2025 12:10:14 -0500 Subject: [PATCH 37/42] Document status LEDs --- roles/conduit/README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/roles/conduit/README.md b/roles/conduit/README.md index 9b4cbdb..723572e 100644 --- a/roles/conduit/README.md +++ b/roles/conduit/README.md @@ -114,6 +114,26 @@ The following tags can be used to run a subset of the playbook.
Sets up an ssh tunnel back to a control host
+LED Status +---------- + +On Conduits (not Conduit APs), the LED indications are: + +| LED | Meaning | +|--------|------------------------------------------------------------------------------------------------------| +| Status | Blinks twice a second when the OS is running | +| LS | Blicks every couple seconds when Cellular modem is connected | +| A | PPP has an IP address asssigned | +| B | An SSH tunnel connection to the jump host is in Established state | +| C | A packet forwarder is running and has the LoRa device open (does not work on I2C cards running mp) N | +| D | DNS resolution of google.com works | + +The A-D LEDs are the ones on the right and have different lables on +older Conduits (CD and one to 3 bars). + +The A-D LEDs reverse periodically to indicate that the check program +is running. + License ------- @@ -123,4 +143,3 @@ Author Information ------------------ Jeffrey Honig - From e186ca75bb316816f4c850e684346d975f8704d3 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 8 Dec 2025 16:48:14 -0500 Subject: [PATCH 38/42] Linting --- roles/conduit/files/conduit_leds.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index c2391a2..39084f8 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -31,7 +31,6 @@ from contextlib import contextmanager import errno import fcntl -import ipaddress import logging from logging.handlers import SysLogHandler import os @@ -274,9 +273,6 @@ def parse_args(): help="Don't send notifications, just list what we are going to do") group = parser.add_argument_group("Options") - group.add_argument("--pidfile", - dest="pidfile", default="/var/run/conduit_leds.pid", - help="Location of the PID file") group.add_argument("--interval", default=60.0, type=float, help="Seconds to wait between checks") From 9bf58d783ca5922298d113b9dfc48a04d2022f76 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 8 Dec 2025 16:49:39 -0500 Subject: [PATCH 39/42] LED A now indicates that we are using PPP Monitor_modem writes /run/using_ppp when pppd is requested and conduit_leds sets LED A based on the existence of that file. --- roles/conduit/files/conduit_leds.py | 54 ++++++---------------------- roles/conduit/files/monitor_modem.py | 34 ++++++++++++++---- 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index 39084f8..b794a4d 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -283,9 +283,9 @@ def parse_args(): dest="foreground", default=False, action='store_true', help="Do not fork; run in foreground") - group.add_argument("--modem", - dest="modem", default="/dev/modem_at0", - help="Modem device for Cell service") + group.add_argument("--want-ppp-file", + default="/var/run/using_ppp", + help="File to exist if we want to be using PPP") # Parse args options = parser.parse_args() @@ -365,48 +365,16 @@ def check_lora(options, device_path): return True -# PPPd assigns one of the following addresses until we receive one (add ppp interface index) -HISADDR_STATIC = ipaddress.ip_address(u"10.64.64.64") -HISADDR_DYNAMIC = ipaddress.ip_address(u"10.112.112.112") -PPP_RE = re.compile(r'ppp(?P\d+)$') - -def check_ppp(options): - """ Check status of ppp connection """ +def check_ppp(options, mtsio): + """ Check if monitor_modem wants PPP to be running """ try: - modem_stat = os.stat(options.modem) - if not stat.S_ISCHR(modem_stat.st_mode): - logging.debug("check_ppp: %s not a character device", options.modem) - return False - except OSError as error: - logging.debug("check_ppp: %s: %s", options.modem, error) + return stat.S_ISREG(os.stat(options.want_ppp_file).st_mode) + except OSError: + # Not using PPP return False - peer_addr = None - for ifname, ifaddrs in psutil.net_if_addrs().items(): - match = PPP_RE.match(ifname) - if not match: - continue - ppp_ifnum = int(match.group('index')) - for ifaddr in ifaddrs: - if ifaddr.family != socket.AF_INET: - continue - if ifaddr.ptp is None: - continue - if ifaddr.ptp in [str(HISADDR_STATIC + ppp_ifnum), str(HISADDR_DYNAMIC + ppp_ifnum)]: - # Remote has not given us an address yet - logging.debug("check_ppp: Remote has not provided an address for %s: %s", ifname, ifaddr.ptp) - continue - peer_addr = ifaddr.ptp - break - - if not peer_addr: - logging.debug("check_ppp: No valid peer address found") - return False - - return True - -def process(options, leds, device_path): +def process(options, mtsio, leds, device_path): """ Check all the services """ if check_dns(options): @@ -424,7 +392,7 @@ def process(options, leds, device_path): else: leds.clear(LEDs.LED_B) - if check_ppp(options): + if check_ppp(options, mtsio): leds.set(LEDs.LED_A) else: leds.clear(LEDs.LED_A) @@ -509,7 +477,7 @@ def main(): while time.time() > next_time: next_time += options.interval logging.debug("Checking status") - process(options, leds, device_path) + process(options, mtsio, leds, device_path) else: logging.debug("Flashing LEDs") leds.flashall() diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index b2db0b7..b42cc5c 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -171,9 +171,6 @@ def parse_args(): help="Don't send notifications, just list what we are going to do") group = parser.add_argument_group("Options") - group.add_argument("--pidfile", - dest="pidfile", default="/var/run/conduit_leds.pid", - help="Location of the PID file") group.add_argument("--interval", default=60.0, type=float, help="Seconds to wait between checks") @@ -199,10 +196,13 @@ def parse_args(): group.add_argument("--change-script", default="/var/config/ifup_restart", help="Script to run when status changes") + group.add_argument("--want-ppp-file", + default="/var/run/using_ppp", + help="File to exist if we want to be using PPP") group.add_argument("--ignore-link-time", default=60*60*3, type=int, - help="How often to retry broadcase interfaces if they did not work when we tried them") + help="How often (in seconds) to retry broadcast interfaces if they did not work when we tried them") # Parse args options = parser.parse_args() @@ -535,7 +535,10 @@ def check_modem(options): return have_modem, have_sim def pppd(options, enable): - """ Start or stop pppd """ + """ Start or stop pppd + + Returns True if state was changed. + """ logging.debug("pppd(%s)", enable) @@ -548,6 +551,15 @@ def pppd(options, enable): ppp_is_running = False if enable: + # Tell conduit_leds that we want PPP + try: + fd = os.open(options.want_ppp_file, os.O_CREAT|os.O_EXCL|os.O_WRONLY, 0o644) + with os.fdopen(fd, 'w') as fp: + fp.write("1\n") + except OSError as error: + if error.errno != errno.EEXIST: + logging.error("Writing %s: %s", options.want_ppp_file, error) + if not ppp_is_running: cmd = ["/etc/init.d/ppp", "start"] try: @@ -573,6 +585,15 @@ def pppd(options, enable): return False + # We do not want PPP + + # Inform conduit_leds + try: + os.unlink(options.want_ppp_file) + except OSError as error: + if error.errno != errno.ENOENT: + logging.error("Deleting %s: %s", options.want_ppp_file, error) + if ppp_is_running: cmd = ["/etc/init.d/ppp", "stop"] try: @@ -747,8 +768,7 @@ def process(options, progname): do_restart = True continue - # Fail a few times before we mark it down? - + # Fail a few times before we mark it down if_state.missed_cycles += 1 if if_state.missed_cycles > 4: # Not responding, ignore it for a while From 7fcb22199cf4e613effa124d397b48543ddd32f9 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 8 Dec 2025 16:52:47 -0500 Subject: [PATCH 40/42] Add a bit more time for DNS timeout --- roles/conduit/files/monitor_modem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index b42cc5c..7768a77 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -488,7 +488,7 @@ def tunnel_addresses(options): addresses = set() try: - dst_ip = resolve_with_timeout(options.hostname, timeout=1) + dst_ip = resolve_with_timeout(options.hostname, timeout=5) except DNSTimeout: return addresses else: From f7a2965f2d42bcc5ea25cfef7fdb4b4e7c5d5448 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 8 Dec 2025 16:53:06 -0500 Subject: [PATCH 41/42] Improve logging from monitor_modem --- roles/conduit/files/monitor_modem.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py index 7768a77..a58b625 100755 --- a/roles/conduit/files/monitor_modem.py +++ b/roles/conduit/files/monitor_modem.py @@ -746,7 +746,7 @@ def process(options, progname): if default_if_name == "ppp0": # No, tell ppp to stop ppp_new_state = False - logging.warning("%s: up, telling ppp to stop", if_name) + logging.warning("%s: up, ppp is not needed", if_name) continue if default_if_name != if_name: # Not at us, continue @@ -788,7 +788,7 @@ def process(options, progname): active_ifs.add(if_name) # Ensure pppd is in the correct state - pppd(options, ppp_new_state) + ppp_state_changed = pppd(options, ppp_new_state) if do_restart: logging.warning("Restarting services") @@ -803,16 +803,18 @@ def process(options, progname): except subprocess.CalledProcessError as error: logging.error("%s: %s", " ".join(cmd), error) else: - if ppp_new_state: - active_addresses = get_ppp_addresses() + if ppp_new_state and not ppp_state_changed: + active_addresses.update(get_ppp_addresses()) if active_addresses: tunnel_addrs = tunnel_addresses(options) - logging.debug("Checking that tunnel sources %s is in %s", + logging.debug("Checking that tunnel sources (%s) is in (%s)", " ".join(list(tunnel_addrs)), " ".join(list(active_addresses))) if not active_addresses.intersection(tunnel_addrs): - logging.warning("No ssh_tunnel sources from active addresses, restarting") + logging.warning("No ssh_tunnel sources (%s) from active addresses (%s), restarting", + " ".join(list(tunnel_addrs)), + " ".join(list(active_addresses))) # No tunnel connections from an active interface cmd = ["/etc/init.d/ssh_tunnel", "restart"] try: From 99d1f138d80155c122174d1920f58841ac311876 Mon Sep 17 00:00:00 2001 From: Jeffrey C Honig Date: Mon, 8 Dec 2025 16:56:37 -0500 Subject: [PATCH 42/42] Prevent ^C and exceptions from resetting LEDS when not locked --- roles/conduit/README.md | 2 +- roles/conduit/files/conduit_leds.py | 39 ++++++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/roles/conduit/README.md b/roles/conduit/README.md index 723572e..bf2caf7 100644 --- a/roles/conduit/README.md +++ b/roles/conduit/README.md @@ -123,7 +123,7 @@ On Conduits (not Conduit APs), the LED indications are: |--------|------------------------------------------------------------------------------------------------------| | Status | Blinks twice a second when the OS is running | | LS | Blicks every couple seconds when Cellular modem is connected | -| A | PPP has an IP address asssigned | +| A | We are trying to use PPP | | B | An SSH tunnel connection to the jump host is in Established state | | C | A packet forwarder is running and has the LoRa device open (does not work on I2C cards running mp) N | | D | DNS resolution of google.com works | diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py index b794a4d..09e84e6 100755 --- a/roles/conduit/files/conduit_leds.py +++ b/roles/conduit/files/conduit_leds.py @@ -471,20 +471,27 @@ def main(): # XXX - Spread the tests out over 1/4 of the interval? # XXX - Ping the remote side of the PPP connection? Requires exec - next_time = time.time() - while True: - if time.time() > next_time: - while time.time() > next_time: - next_time += options.interval - logging.debug("Checking status") - process(options, mtsio, leds, device_path) - else: - logging.debug("Flashing LEDs") - leds.flashall() - duration = min(5.0, next_time - time.time()) - if duration > 0: - logging.debug("Sleeping for %f seconds", duration) - time.sleep(duration) + try: + next_time = time.time() + while True: + if time.time() > next_time: + while time.time() > next_time: + next_time += options.interval + logging.debug("Checking status") + process(options, mtsio, leds, device_path) + else: + logging.debug("Flashing LEDs") + leds.flashall() + duration = min(5.0, next_time - time.time()) + if duration > 0: + logging.debug("Sleeping for %f seconds", duration) + time.sleep(duration) + except KeyboardInterrupt: + print("") + LEDs(MTSIO()) + except Exception as exc: + logging.exception(exc) + LEDs(MTSIO()) except LockFileTimeout: logging.critical("Another instance of %s is running", progname) return 1 @@ -496,9 +503,5 @@ def main(): rc = main() except KeyboardInterrupt: print("") - LEDs(MTSIO()) - except Exception as exc: - logging.exception(exc) - LEDs(MTSIO()) sys.exit(rc)