diff --git a/bin/mt_station_conf b/bin/mt_station_conf
index 0c8bc8b..71a3322 100755
--- a/bin/mt_station_conf
+++ b/bin/mt_station_conf
@@ -109,6 +109,10 @@ def parse_args():
group.add_argument("--log-file",
dest="log_file",
help="Log file name")
+ group.add_argument("--tc-timeout",
+ dest="tc_timeout",
+ type=int,
+ help="Set tc_timeout value in seconds")
options = parser.parse_args()
if options.debug:
@@ -164,6 +168,13 @@ def main():
if options.gps:
data['station_conf']['gps'] = options.gps
data['station_conf']['pps'] = "fuzzy" if options.fuzzy_pps else "gps"
+ if options.tc_timeout:
+ for key in data['station_conf'].keys():
+ if key.lower() == "tc_timeout":
+ data['station_conf'][key] = "%ds" % options.tc_timeout
+ break
+ else:
+ data['station.conf']["tc_timeout"] = "%ds" % options.tc_timeout
print(json.dumps(data, indent=4))
diff --git a/roles/conduit/README.md b/roles/conduit/README.md
index 9b4cbdb..bf2caf7 100644
--- a/roles/conduit/README.md
+++ b/roles/conduit/README.md
@@ -114,6 +114,26 @@ The following tags can be used to run a subset of the playbook.
Sets up an ssh tunnel back to a control host
+LED Status
+----------
+
+On Conduits (not Conduit APs), the LED indications are:
+
+| LED | Meaning |
+|--------|------------------------------------------------------------------------------------------------------|
+| Status | Blinks twice a second when the OS is running |
+| LS | Blicks every couple seconds when Cellular modem is connected |
+| A | We are trying to use PPP |
+| B | An SSH tunnel connection to the jump host is in Established state |
+| C | A packet forwarder is running and has the LoRa device open (does not work on I2C cards running mp) N |
+| D | DNS resolution of google.com works |
+
+The A-D LEDs are the ones on the right and have different lables on
+older Conduits (CD and one to 3 bars).
+
+The A-D LEDs reverse periodically to indicate that the check program
+is running.
+
License
-------
@@ -123,4 +143,3 @@ Author Information
------------------
Jeffrey Honig
-
diff --git a/roles/conduit/defaults/main.yml b/roles/conduit/defaults/main.yml
index 141383c..a27b589 100644
--- a/roles/conduit/defaults/main.yml
+++ b/roles/conduit/defaults/main.yml
@@ -90,6 +90,8 @@ ssh_tunnel_ssh_key: /etc/ssh/ssh_host_rsa_key
ssh_tunnel_daemon: /usr/bin/autossh
ssh_tunnel_ssh_port: 22
ssh_tunnel_base_port: 0
+ssh_tunnel_first_poll: 120
+ssh_tunnel_poll: 60
# Static Nameserver defaults
resolv_conf_static: /var/config/network/resolv.conf-static
@@ -176,11 +178,16 @@ monit_pktfwd_stop: '"/etc/init.d/{{ monit_pktfwd_initscript }} stop"'
monit_pktfwd_restart: '"/etc/init.d/{{ monit_pktfwd_initscript }} restart"'
monit_pktfwd_reboot: 'exec "/sbin/reboot"'
+# Monit loadavg
+monit_loadavg_1m: 2
+monit_loadavg_5m: 4
+monit_memory_usage: 30%
+
# Monitor space on /
monit_root_test: "usage > 50%"
# Monitor space on /var/config
-monit_config_test: "usage > 15%"
+monit_config_test: "usage > 20%"
# Monitor space on /var/volatile
monit_volatile_test: "usage > 75%"
diff --git a/roles/conduit/files/conduit_leds.initd b/roles/conduit/files/conduit_leds.initd
index e790271..6c1379a 100644
--- a/roles/conduit/files/conduit_leds.initd
+++ b/roles/conduit/files/conduit_leds.initd
@@ -27,7 +27,7 @@ fi
[ -x ${DAEMON} ] || exit 0
is_running() {
- pgrep -F ${PIDFILE} > /dev/null
+ pgrep -F ${PIDFILE} > /dev/null 2>&1
}
start() {
@@ -40,22 +40,56 @@ stop() {
case "$1" in
start)
- is_running || start
+ if is_running; then
+ echo "${NAME} is already running: $(pgrep -F ${PIDFILE} -a)" >&2
+ exit 1
+ fi
+ echo "Starting ${NAME}" >&2
+ start
;;
stop)
- stop
+ if is_running; then
+ echo "Stopping ${NAME}" >&2
+ stop
+ else
+ echo "${NAME} is not running" >&2
+ fi
;;
restart|reload)
- nohup ${0} do_restart
- ;;
- do_restart)
- stop
+ if is_running; then
+ echo "Stopping ${NAME}" >&2
+ stop
+ else
+ echo "${NAME} is not running" >&2
+ fi
+
+ # Wait for it to stop
+ tries=10
+ while is_running; do
+ if [ "${tries}" -eq 0 ]; then
+ echo "${NAME} failed to stop" >&2
+ exit 1
+ fi
+ tries=$((tries - 1))
+ sleep 1
+ done
+
+ echo "Starting ${NAME}" >&2
start
;;
status)
- is_running
+ if is_running; then
+ echo "${NAME} is running with PID $(pgrep -F ${PIDFILE} -a)" >&2
+ else
+ echo "${NAME} is not running" >&2
+ exit 1
+ fi
;;
*)
- echo "Usage: $0 {start|stop|status|restart}"
+ echo "Usage: $0 {start|stop|status|restart}" >&2
+ exit 2
+ ;;
esac
+
+exit 0
#=========================================
diff --git a/roles/conduit/files/conduit_leds.py b/roles/conduit/files/conduit_leds.py
index 3c91598..09e84e6 100755
--- a/roles/conduit/files/conduit_leds.py
+++ b/roles/conduit/files/conduit_leds.py
@@ -31,20 +31,24 @@
from contextlib import contextmanager
import errno
import fcntl
-import ipaddress
import logging
from logging.handlers import SysLogHandler
import os
-import pprint
import psutil
import re
import socket
import stat
-import struct
import subprocess
import sys
import time
+cached_ip = None
+
+try:
+ FileNotFoundError
+except NameError:
+ FileNotFoundError = IOError
+
class LockFileTimeout(Exception):
def __init__(self, error):
self.value = error
@@ -55,26 +59,30 @@ def __str__(self):
def pidfilelock(name):
""" Context to lock a pid file """
- time_left = 30
+ time_end = time.time() + 30
pidfile_path = os.path.join("/var/run", name + ".pid")
- lock_file = open(pidfile_path, 'w+')
+ fd = os.open(pidfile_path, os.O_RDWR | os.O_CREAT, 0o644)
+ lock_file = os.fdopen(fd, "r+")
while True:
try:
logging.debug("Attempting to lock %s", pidfile_path)
fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
- lock_file.write(str(os.getpid()) + '\n')
- lock_file.flush()
- logging.debug("Wrote %d to %s", os.getpid(), pidfile_path)
- break
except IOError as err:
if err.errno != errno.EAGAIN:
raise err
- else:
- logging.debug("Timeout trying to lock", pidfile_path)
- time.sleep(1)
- time_left -= 1
- if time_left == 0:
- raise LockFileTimeout("Unable to lock %s" % pidfile_path)
+ logging.debug("Timeout trying to lock: %s", pidfile_path)
+ time.sleep(1)
+ if time.time() >= time_end:
+ raise LockFileTimeout("Unable to lock %s" % pidfile_path)
+ continue
+ else:
+ lock_file.seek(0)
+ lock_file.truncate()
+ lock_file.write("%d\n" % os.getpid())
+ lock_file.flush()
+ os.fsync(fd)
+ logging.debug("Wrote %d to %s", os.getpid(), pidfile_path)
+ break
try:
yield lock_file
@@ -83,7 +91,7 @@ def pidfilelock(name):
fcntl.flock(lock_file, fcntl.LOCK_UN)
os.unlink(pidfile_path)
lock_file.close()
-
+
class Defaults(object):
""" Read a /etc/defaults file """
@@ -145,7 +153,7 @@ def write(self, name, value):
with open(os.path.join(self.ROOT, name), "w") as fp:
fp.write("%s\n" % value)
-
+
class LEDs(object):
""" Control LEDs """
@@ -211,7 +219,7 @@ def daemonize():
# exit first parent
sys.exit(0)
except OSError as err:
- logging.exception("First fork failed")
+ logging.exception("First fork failed: %s", err)
return False
# decouple from parent environment
@@ -225,7 +233,7 @@ def daemonize():
# exit from second parent
sys.exit(0)
except OSError as err:
- logging.exception("Second fork failed")
+ logging.exception("Second fork failed: %s", err)
return False
# redirect standard file descriptors
@@ -265,9 +273,6 @@ def parse_args():
help="Don't send notifications, just list what we are going to do")
group = parser.add_argument_group("Options")
- group.add_argument("--pidfile",
- dest="pidfile", default="/var/run/conduit_leds.pid",
- help="Location of the PID file")
group.add_argument("--interval",
default=60.0, type=float,
help="Seconds to wait between checks")
@@ -278,9 +283,9 @@ def parse_args():
dest="foreground", default=False,
action='store_true',
help="Do not fork; run in foreground")
- group.add_argument("--modem",
- dest="modem", default="/dev/modem_at0",
- help="Modem device for Cell service")
+ group.add_argument("--want-ppp-file",
+ default="/var/run/using_ppp",
+ help="File to exist if we want to be using PPP")
# Parse args
options = parser.parse_args()
@@ -288,9 +293,6 @@ def parse_args():
if options.noop:
options.debug = True
- # Init Logging
- init_logging(options)
-
return options
def check_tunnel(options):
@@ -319,17 +321,18 @@ def check_tunnel(options):
if not cached_ip:
logging.info("check_tunnel: Unable to resolve %s", remote_host)
return False
- remote_host = cached_ip
- logging.info("check_tunnel: Using cached IP %s", remote_host)
-
- for conn in psutil.net_connections():
- if conn.type == socket.SOCK_STREAM and conn.status == psutil.CONN_ESTABLISHED and conn.raddr == (remote_ip, local_port):
- logging.info("check_tunnel: Found connection to %s(%s):%s with PID %d",
- remote_host,
- remote_ip,
- local_port,
- conn.pid)
- return True
+ remote_ip = cached_ip
+ logging.info("check_tunnel: Using cached IP %s", remote_ip)
+
+ if remote_ip:
+ for conn in psutil.net_connections():
+ if conn.type == socket.SOCK_STREAM and conn.status == psutil.CONN_ESTABLISHED and conn.raddr == (remote_ip, local_port):
+ logging.info("check_tunnel: Found connection to %s(%s):%s with PID %d",
+ remote_host,
+ remote_ip,
+ local_port,
+ conn.pid)
+ return True
logging.info("check_tunnel: No connection found to %s(%s):%s", remote_host, remote_ip, local_port)
return False
@@ -362,48 +365,16 @@ def check_lora(options, device_path):
return True
-# PPPd assigns one of the following addresses until we receive one (add ppp interface index)
-HISADDR_STATIC = ipaddress.ip_address(u"10.64.64.64")
-HISADDR_DYNAMIC = ipaddress.ip_address(u"10.112.112.112")
-PPP_RE = re.compile(r'ppp(?P\d+)$')
-
-def check_ppp(options):
- """ Check status of ppp connection """
+def check_ppp(options, mtsio):
+ """ Check if monitor_modem wants PPP to be running """
try:
- modem_stat = os.stat(options.modem)
- if not stat.S_ISCHR(modem_stat.st_mode):
- logging.debug("check_ppp: %s not a character device", options.modem)
- return False
- except OSError as error:
- logging.debug("check_ppp: %s: %s", options.modem, error)
+ return stat.S_ISREG(os.stat(options.want_ppp_file).st_mode)
+ except OSError:
+ # Not using PPP
return False
- peer_addr = None
- for ifname, ifaddrs in psutil.net_if_addrs().items():
- match = PPP_RE.match(ifname)
- if not match:
- continue
- ppp_ifnum = int(match.group('index'))
- for ifaddr in ifaddrs:
- if ifaddr.family != socket.AF_INET:
- continue
- if ifaddr.ptp is None:
- continue
- if ifaddr.ptp in [str(HISADDR_STATIC + ppp_ifnum), str(HISADDR_DYNAMIC + ppp_ifnum)]:
- # Remote has not given us an address yet
- logging.debug("check_ppp: Remote has not provided an address for %s: %s", ifname, ifaddr.ptp)
- continue
- peer_addr = ifaddr.ptp
- break
-
- if not peer_addr:
- logging.debug("check_ppp: No valid peer address found")
- return False
-
- return True
-
-def process(options, leds, device_path):
+def process(options, mtsio, leds, device_path):
""" Check all the services """
if check_dns(options):
@@ -421,7 +392,7 @@ def process(options, leds, device_path):
else:
leds.clear(LEDs.LED_B)
- if check_ppp(options):
+ if check_ppp(options, mtsio):
leds.set(LEDs.LED_A)
else:
leds.clear(LEDs.LED_A)
@@ -430,14 +401,23 @@ def init_logging(options):
""" Set up logging """
logger = logging.getLogger()
- logger.handlers = []
syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0]))
- syslog_handler = SysLogHandler(address="/dev/log",
- facility=SysLogHandler.LOG_DAEMON)
- syslog_handler.setFormatter(logging.Formatter(syslog_format))
if not sys.stdout.isatty():
+ # Repeat until syslog is available
+ while True:
+ try:
+ syslog_handler = SysLogHandler(address="/dev/log",
+ facility=SysLogHandler.LOG_DAEMON)
+ except FileNotFoundError as err:
+ logging.warning("Unable to open /dev/log: %s, waiting", err)
+ time.sleep(1)
+ else:
+ break
+ syslog_handler.setFormatter(logging.Formatter(syslog_format))
+ logger.handlers = []
logger.addHandler(syslog_handler)
else:
+ logger.handlers = []
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
if options.debug:
@@ -450,7 +430,7 @@ def init_logging(options):
def main():
"""It all happens here"""
- progname = os.path.basename(sys.argv[0])
+ progname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
options = parse_args()
@@ -458,6 +438,9 @@ def main():
if not daemonize():
return 1
+ # Do this after daemonize or we'll hang the system startup.
+ init_logging(options)
+
mtsio = MTSIO()
hwversion = mtsio.read('hw-version')
@@ -482,26 +465,33 @@ def main():
logging.warning("No device found for %s", lora_hwversion)
try:
- with pidfilelock(progname) as pid_file:
+ with pidfilelock(progname):
leds = LEDs(mtsio)
# XXX - Spread the tests out over 1/4 of the interval?
# XXX - Ping the remote side of the PPP connection? Requires exec
- next_time = time.time()
- while True:
- if time.time() > next_time:
- while time.time() > next_time:
- next_time += options.interval
- logging.debug("Checking status")
- process(options, leds, device_path)
- else:
- logging.debug("Flashing LEDs")
- leds.flashall()
- duration = min(5.0, next_time - time.time())
- if duration > 0:
- logging.debug("Sleeping for %f seconds", duration)
- time.sleep(duration)
+ try:
+ next_time = time.time()
+ while True:
+ if time.time() > next_time:
+ while time.time() > next_time:
+ next_time += options.interval
+ logging.debug("Checking status")
+ process(options, mtsio, leds, device_path)
+ else:
+ logging.debug("Flashing LEDs")
+ leds.flashall()
+ duration = min(5.0, next_time - time.time())
+ if duration > 0:
+ logging.debug("Sleeping for %f seconds", duration)
+ time.sleep(duration)
+ except KeyboardInterrupt:
+ print("")
+ LEDs(MTSIO())
+ except Exception as exc:
+ logging.exception(exc)
+ LEDs(MTSIO())
except LockFileTimeout:
logging.critical("Another instance of %s is running", progname)
return 1
@@ -513,9 +503,5 @@ def main():
rc = main()
except KeyboardInterrupt:
print("")
- LEDs(MTSIO())
- except Exception as exc:
- logging.exception(exc)
- LEDs(MTSIO())
sys.exit(rc)
diff --git a/roles/conduit/files/facts.d/firmware.fact b/roles/conduit/files/facts.d/firmware.fact
index 15779b5..d94618b 100755
--- a/roles/conduit/files/facts.d/firmware.fact
+++ b/roles/conduit/files/facts.d/firmware.fact
@@ -2,12 +2,11 @@
import json
import os
-import sys
if os.path.isdir("/lib/firmware"):
all_files = []
for realroot, dirs, files in os.walk("/lib/firmware", topdown=True):
- if realroot is "/lib/firmware":
+ if realroot == "/lib/firmware":
root = ""
else:
root = realroot.replace("/lib/firmware/", "")
@@ -16,7 +15,3 @@ if os.path.isdir("/lib/firmware"):
if len(all_files):
print(json.dumps(all_files, indent=4))
-
-
-
-
diff --git a/roles/conduit/files/facts.d/mts_io.fact b/roles/conduit/files/facts.d/mts_io.fact
new file mode 100755
index 0000000..ecfee2e
--- /dev/null
+++ b/roles/conduit/files/facts.d/mts_io.fact
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+cd /sys/devices/platform/mts-io || exit 1
+
+devs=$(find * -type f | (
+ comma=
+ while read dev; do
+ echo -n "${comma}\"${dev//-/_}\": \"$(cat ${dev} 2>/dev/null)\""
+ comma=", "
+ done
+))
+
+echo '{'
+echo " ${devs}"
+echo '}'
diff --git a/roles/conduit/files/ifup_restart b/roles/conduit/files/ifup_restart
index c6e2031..bc2c9b1 100644
--- a/roles/conduit/files/ifup_restart
+++ b/roles/conduit/files/ifup_restart
@@ -2,9 +2,9 @@
# Restart services when we get an address
-[ $METHOD = "dhcp" ] && exit 0
+[ "${METHOD}" = "dhcp" ] && exit 0
-logger -s -t $(basename ${0}) -p daemon.info "$METHOD interface $IFACE is up, restarting services"
+logger -s -t $(basename ${0}) -p daemon.info "${METHOD} interface ${IFACE} is up, restarting services"
test -x /etc/init.d/ttn-pkt-forwarder && /etc/init.d/ttn-pkt-forwarder restart
test -x /etc/init.d/lora-basic-station && /etc/init.d/lora-basic-station restart
test -x /etc/init.d/ssh_tunnel && /etc/init.d/ssh_tunnel restart
diff --git a/roles/conduit/files/monitor_modem.initd b/roles/conduit/files/monitor_modem.initd
new file mode 100644
index 0000000..2d60ff9
--- /dev/null
+++ b/roles/conduit/files/monitor_modem.initd
@@ -0,0 +1,95 @@
+#!/bin/sh
+#
+#Start monitor_modem as a service
+
+### BEGIN INIT INFO
+# Provides: monitor_modem
+# Required-Start: $local_fs $network $syslog $dbus
+# Required-Stop: $local_fs $network $syslog $dbus
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Description: Display connection status in Conduit LEDs
+### END INIT INFO
+
+PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin"
+NAME=monitor_modem
+DAEMON=/usr/local/sbin/monitor_modem
+PIDFILE=/var/run/${NAME}.pid
+DAEMON_ARGS=
+
+# source function library
+. /etc/init.d/functions
+
+if [ -r /etc/default/${NAME} ]; then
+ . /etc/default/${NAME}
+fi
+
+[ -x ${DAEMON} ] || exit 0
+
+is_running() {
+ pgrep -F ${PIDFILE} > /dev/null 2>&1
+}
+
+start() {
+ start-stop-daemon --start --quiet -p ${PIDFILE} --exec ${DAEMON} -- ${DAEMON_ARGS}
+}
+
+stop() {
+ start-stop-daemon --stop --quiet --p ${PIDFILE}
+}
+
+case "$1" in
+ start)
+ if is_running; then
+ echo "${NAME} is already running: $(pgrep -F ${PIDFILE} -a)" >&2
+ exit 1
+ fi
+ echo "Starting ${NAME}" >&2
+ start
+ ;;
+ stop)
+ if is_running; then
+ echo "Stopping ${NAME}" >&2
+ stop
+ else
+ echo "${NAME} is not running" >&2
+ fi
+ ;;
+ restart|reload)
+ if is_running; then
+ echo "Stopping ${NAME}" >&2
+ stop
+ else
+ echo "${NAME} is not running" >&2
+ fi
+
+ # Wait for it to stop
+ tries=10
+ while is_running; do
+ if [ "${tries}" -eq 0 ]; then
+ echo "${NAME} failed to stop" >&2
+ exit 1
+ fi
+ tries=$((tries - 1))
+ sleep 1
+ done
+
+ echo "Starting ${NAME}" >&2
+ start
+ ;;
+ status)
+ if is_running; then
+ echo "${NAME} is running with PID $(pgrep -F ${PIDFILE} -a)" >&2
+ else
+ echo "${NAME} is not running" >&2
+ exit 1
+ fi
+ ;;
+ *)
+ echo "Usage: $0 {start|stop|status|restart}" >&2
+ exit 2
+ ;;
+esac
+
+exit 0
+#=========================================
diff --git a/roles/conduit/files/monitor_modem.py b/roles/conduit/files/monitor_modem.py
new file mode 100755
index 0000000..a58b625
--- /dev/null
+++ b/roles/conduit/files/monitor_modem.py
@@ -0,0 +1,863 @@
+#!/usr/bin/env python
+
+"""
+MIT License
+
+Copyright (c) 2025 Jeffrey C Honig
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+from __future__ import print_function
+
+import array
+import argparse
+from contextlib import contextmanager
+import errno
+import fcntl
+import logging
+from logging.handlers import SysLogHandler
+import os
+import psutil
+import select
+import signal
+import socket
+import struct
+import subprocess
+import sys
+import time
+
+if not hasattr(socket, 'SO_BINDTODEVICE'):
+ socket.SO_BINDTODEVICE = 25
+
+try:
+ FileNotFoundError
+except NameError:
+ FileNotFoundError = IOError
+
+class LockFileTimeout(Exception):
+ def __init__(self, error):
+ self.value = error
+ def __str__(self):
+ return repr(self.value)
+
+class DNSTimeout(Exception):
+ def __init__(self, error):
+ self.value = error
+ def __str__(self):
+ return repr(self.value)
+
+# Global flag to indicate shutdown
+shutdown_requested = False
+
+def catch_interrupt(signum, frame):
+ global shutdown_requested
+ logging.warning("Received signal %s, initiating shutdown.", signum)
+ shutdown_requested = True
+
+@contextmanager
+def pidfilelock(name):
+ """ Context to lock a pid file """
+
+ time_end = time.time() + 30
+ pidfile_path = os.path.join("/var/run", name + ".pid")
+ fd = os.open(pidfile_path, os.O_RDWR | os.O_CREAT, 0o644)
+ lock_file = os.fdopen(fd, "r+")
+ while True:
+ try:
+ logging.debug("Attempting to lock %s", pidfile_path)
+ fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as err:
+ if err.errno != errno.EAGAIN:
+ raise err
+ logging.debug("Timeout trying to lock: %s", pidfile_path)
+ time.sleep(1)
+ if shutdown_requested or time.time() >= time_end:
+ raise LockFileTimeout("Unable to lock %s" % pidfile_path)
+ continue
+ else:
+ lock_file.seek(0)
+ lock_file.truncate()
+ lock_file.write("%d\n" % os.getpid())
+ lock_file.flush()
+ os.fsync(fd)
+ logging.debug("Wrote %d to %s", os.getpid(), pidfile_path)
+ break
+
+ try:
+ yield lock_file
+ finally:
+ logging.debug("Unlocking %s", pidfile_path)
+ fcntl.flock(lock_file, fcntl.LOCK_UN)
+ os.unlink(pidfile_path)
+ lock_file.close()
+
+def daemonize():
+ """ Run as a daemon """
+
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # exit first parent
+ sys.exit(0)
+ except OSError:
+ logging.exception("First fork failed")
+ return False
+
+ # decouple from parent environment
+ os.chdir('/')
+ os.setsid()
+ os.umask(0)
+ # do second fork
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # exit from second parent
+ sys.exit(0)
+ except OSError:
+ logging.exception("Second fork failed")
+ return False
+
+ # redirect standard file descriptors
+ sys.stdout.flush()
+ sys.stderr.flush()
+ si = open(os.devnull, 'r')
+ so = open(os.devnull, 'w')
+ se = open(os.devnull, 'w')
+ os.dup2(si.fileno(), sys.stdin.fileno())
+ os.dup2(so.fileno(), sys.stdout.fileno())
+ os.dup2(se.fileno(), sys.stderr.fileno())
+
+ return True
+
+def parse_args():
+ """ What do we need to do """
+
+ parser = argparse.ArgumentParser(description="Check for errors in Basic Station log")
+
+ # Debugging
+ group = parser.add_argument_group("Debugging options")
+ group.add_argument("-d", "--debug",
+ dest="debug", default=False,
+ action='store_true',
+ help="print debugging messages")
+ group.add_argument("--nodebug",
+ dest="debug",
+ action='store_false',
+ help="print debugging messages")
+ group.add_argument("-v", "--verbose",
+ dest="verbose", default=False,
+ action='store_true',
+ help="print verbose messages")
+ group.add_argument("-n", "--noop",
+ dest="noop", default=False,
+ action='store_true',
+ help="Don't send notifications, just list what we are going to do")
+
+ group = parser.add_argument_group("Options")
+ group.add_argument("--interval",
+ default=60.0, type=float,
+ help="Seconds to wait between checks")
+ group.add_argument("--hostname",
+ default="ec2-54-221-216-139.compute-1.amazonaws.com",
+ help="Hostname to check")
+ group.add_argument("--pings",
+ type=int, default=10,
+ help="Number of pings to receive")
+ group.add_argument("--foreground", "-f",
+ dest="foreground", default=False,
+ action='store_true',
+ help="Do not fork; run in foreground")
+ group.add_argument("--has-radio",
+ dest="has_radio", default="/sys/devices/platform/mts-io/has-radio",
+ help="Device file that indicates presence of modem")
+ group.add_argument("--real-ppp-on-boot",
+ default="/var/config/ppp/ppp_on_boot",
+ help="Where to link /etc/ppp_on_boot to when enabling ppp")
+ group.add_argument("--ppp-on-boot",
+ default="/etc/ppp/ppp_on_boot",
+ help="Where system looks for ppp startup script")
+ group.add_argument("--change-script",
+ default="/var/config/ifup_restart",
+ help="Script to run when status changes")
+ group.add_argument("--want-ppp-file",
+ default="/var/run/using_ppp",
+ help="File to exist if we want to be using PPP")
+ group.add_argument("--ignore-link-time",
+ default=60*60*3,
+ type=int,
+ help="How often (in seconds) to retry broadcast interfaces if they did not work when we tried them")
+
+ # Parse args
+ options = parser.parse_args()
+
+ # --test implies --verbose
+ if options.noop:
+ options.debug = True
+
+ if options.debug:
+ options.verbose = True
+
+ return options
+
+def init_logging(options):
+ """ Set up logging """
+
+ logger = logging.getLogger()
+ syslog_format = '%s[%%(process)s]: %%(message)s' % (os.path.basename(sys.argv[0]))
+ if not sys.stdout.isatty():
+ # Repeat until syslog is available
+ while True:
+ try:
+ syslog_handler = SysLogHandler(address="/dev/log",
+ facility=SysLogHandler.LOG_DAEMON)
+ except FileNotFoundError as err:
+ logging.warning("Unable to open /dev/log: %s, waiting", err)
+ time.sleep(1)
+ else:
+ break
+ syslog_handler.setFormatter(logging.Formatter(syslog_format))
+ logger.handlers = []
+ logger.addHandler(syslog_handler)
+ else:
+ logger.handlers = []
+ logger.addHandler(logging.StreamHandler(stream=sys.stdout))
+
+ if options.debug:
+ logger.setLevel('DEBUG')
+ elif options.verbose:
+ logger.setLevel('INFO')
+ else:
+ logger.setLevel('WARNING')
+
+# Example usage:
+# bytes_sent = sendmsg_with_pktinfo(sock, packet, "8.8.8.8", interface="eth0")
+# If you prefer to supply interface index directly, you can call _ifname_to_index("eth0") yourself.
+if struct.pack("H",1) == "\x00\x01": # big endian
+ def checksum(pkt):
+ if len(pkt) % 2 == 1:
+ pkt += "\0"
+ s = sum(array.array("H", pkt))
+ s = (s >> 16) + (s & 0xffff)
+ s += s >> 16
+ s = ~s
+ return s & 0xffff
+else:
+ def checksum(pkt):
+ if len(pkt) % 2 == 1:
+ pkt += "\0"
+ s = sum(array.array("H", pkt))
+ s = (s >> 16) + (s & 0xffff)
+ s += s >> 16
+ s = ~s
+ return (((s>>8)&0xff)|s<<8) & 0xffff
+
+def resolve_with_timeout(hostname, timeout=5):
+ def handler(signum, frame):
+ raise DNSTimeout("Timeout during name resolution")
+
+ old_handler = signal.signal(signal.SIGALRM, handler)
+ signal.alarm(timeout) # seconds
+
+ ip_addr = None
+ try:
+ ip_addr = socket.getaddrinfo(hostname, None)[0][4][0]
+ except DNSTimeout as error:
+ logging.error("resolve_with_timeout: Timeout resolving: %s: %s", hostname, error)
+ except socket.gaierror as error:
+ logging.error("resolve_with_timeout: error resolving %s: %s", hostname, error)
+ else:
+ logging.debug("resolve_with_timeout: %s -> %s", hostname, ip_addr)
+ finally:
+ signal.alarm(0)
+ signal.signal(signal.SIGALRM, old_handler)
+
+ return ip_addr
+
+def icmp_echo(dst_name, interface=None, payload=b'hello', id_=None, seq=1):
+
+ logging.debug("icmp_echo(%s, interface=%s, id=%s, seq=%d)", dst_name, interface, id_, seq)
+
+ if id_ is None:
+ id_ = os.getpid() & 0xFFFF
+
+ # raw ICMP socket (IPv4)
+ sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP)
+
+ if interface is not None:
+ # Bind to specific interface (Linux only). Requires root.
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, interface.encode() + b'\0')
+
+ # Build ICMP echo request header: type(8)=echo request, code=0, checksum, id, seq
+ ICMP_TYPE = 8
+ ICMP_CODE = 0
+ header = struct.pack('!BBHHH', ICMP_TYPE, ICMP_CODE, 0, id_, seq)
+ packet = header + payload
+ chksum = checksum(packet)
+ header = struct.pack('!BBHHH', ICMP_TYPE, ICMP_CODE, chksum, id_, seq)
+ packet = header + payload
+
+ try:
+ dst_ip = resolve_with_timeout(dst_name, timeout=1)
+ except DNSTimeout:
+ return False
+ else:
+ if dst_ip is None:
+ return False
+
+ t0 = time.time()
+ try:
+ sock.sendto(packet, (dst_ip, 0))
+ except socket.gaierror as error:
+ logging.error("sendto error: %s", error)
+ return False
+
+ deadline = t0 + 10.0
+ while True:
+ remaining = deadline - time.time()
+ if remaining <= 0:
+ return False
+
+ # Wait for the socket to be ready
+ try:
+ ready, _, _ = select.select([sock], [], [], remaining)
+ if not ready:
+ logging.debug("icmp_echo: timeout")
+ return False
+ except select.error:
+ if shutdown_requested:
+ return
+ continue
+
+ # Read pending packets
+ while True:
+ if shutdown_requested:
+ return
+
+ try:
+ recv_packet, addr = sock.recvfrom(65535, socket.MSG_DONTWAIT)
+ except socket.error as err:
+ if err[0] == errno.EINTR:
+ continue
+ if err[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
+ return False
+ logging.warning("recvfrom returns: %s", err)
+ return False
+
+ iph_len = (struct.unpack("!B", recv_packet[:1])[0] & 0xf) * 4
+ icmp_packet = recv_packet[iph_len:]
+ if len(icmp_packet) < 8:
+ continue
+ r_type, r_code, r_chksum, r_id, r_seq = struct.unpack("!BBHHH", icmp_packet[:8])
+ logging.debug("RECV type %d code %d id %d seq %d", r_type, r_code, r_id, r_seq)
+
+ if r_type == 0 and r_id == id_ and r_seq == seq:
+ return True
+
+ return False
+
+def get_ppp_addresses():
+ """
+ Returns IP addresses of ppp interfaces
+ """
+
+ result = set()
+
+ # Get interface addresses and stats
+ addrs = psutil.net_if_addrs()
+ stats = psutil.net_if_stats()
+
+ for iface, iface_addrs in addrs.items():
+ if iface != 'ppp0':
+ continue
+ iface_stat = stats.get(iface)
+ if not iface_stat:
+ continue
+
+ # Skip interfaces that are down
+ if not iface_stat.isup:
+ continue
+
+ # Check for IPv4 with broadcast
+ for addr in iface_addrs:
+ if addr.family == 2: # AF_INET (IPv4)
+ if addr.address and addr.ptp:
+ result.add(addr.address)
+
+ return result
+
+def get_broadcast_interfaces():
+ """
+ Returns a list of interface names that:
+ - Are up (`isup` flag)
+ - Have an IPv4 address assigned
+ - Have a broadcast address assigned
+ - Have carrier detected (physical link up for Ethernet)
+ """
+ result = []
+
+ # Get interface addresses and stats
+ addrs = psutil.net_if_addrs()
+ stats = psutil.net_if_stats()
+
+ for iface, iface_addrs in addrs.items():
+ iface_stat = stats.get(iface)
+ if not iface_stat:
+ continue
+
+ # Skip interfaces that are down
+ if not iface_stat.isup:
+ continue
+
+ # Check for IPv4 with broadcast
+ iface_address = None
+ for addr in iface_addrs:
+ if addr.family == 2: # AF_INET (IPv4)
+ if addr.address and addr.broadcast:
+ iface_address = addr.address
+ break
+ if not iface_address:
+ continue
+
+ # Check carrier
+ carrier_file = "/sys/class/net/{}/carrier".format(iface)
+ try:
+ with open(carrier_file, 'r') as fp:
+ carrier = fp.read().strip()
+ if carrier != '1':
+ continue
+ except IOError:
+ # If the file doesn't exist, assume link is up (virtual interface)
+ pass
+
+ # Passed all checks
+ result.append((iface, iface_address))
+
+ return result
+
+def ppp_on_boot(options, enable):
+ """ Link or unlink system ppp startup script """
+
+ try:
+ ppp_on_boot_stat = os.stat(options.ppp_on_boot)
+ except (OSError, IOError):
+ logging.error("Unable to get stat info about %s", options.ppp_on_boot)
+ return
+
+ logging.debug("ppp_on_boot(%s): %s -> %o", enable, options.ppp_on_boot, ppp_on_boot_stat.st_mode)
+
+ if enable:
+ if ppp_on_boot_stat.st_mode & 0o111 != 0o111:
+ try:
+ os.chmod(options.ppp_on_boot, 0o755)
+ logging.warning("ppp_on_boot: %s set to executable", options.ppp_on_boot)
+ except OSError as error:
+ logging.error("Error making %s executable: %s",
+ options.ppp_on_boot,
+ error)
+ return
+
+ if ppp_on_boot_stat.st_mode & 0o111 != 0:
+ try:
+ os.chmod(options.ppp_on_boot, 0o644)
+ logging.warning("ppp_on_boot: %s set to non-executable", options.ppp_on_boot)
+ except OSError as error:
+ logging.error("Error making %s non-executable: %s",
+ options.ppp_on_boot,
+ error)
+
+def tunnel_addresses(options):
+ """ Return local addresses of all established tunnel connections """
+
+ addresses = set()
+
+ try:
+ dst_ip = resolve_with_timeout(options.hostname, timeout=5)
+ except DNSTimeout:
+ return addresses
+ else:
+ if dst_ip is None:
+ return addresses
+
+ for conn in psutil.net_connections('inet4'):
+ if conn.type != socket.SOCK_STREAM:
+ continue
+ if not conn.raddr:
+ continue
+ if conn.raddr.port != 22 or conn.raddr.ip != dst_ip:
+ continue
+ if conn.status != 'ESTABLISHED':
+ continue
+
+ addresses.add(conn.laddr.ip)
+
+ return addresses
+
+def check_modem(options):
+ """ Run a set of checks """
+
+ have_modem = False
+ try:
+ with open(options.has_radio, "r") as fp:
+ have_modem = fp.read().strip() == '1'
+ except IOError as error:
+ logging.error("Reading %s: %s", options.has_radio, error)
+
+ have_sim = False
+ if have_modem:
+ cmd = ["radio-cmd", "AT+CPIN?"]
+ try:
+ output = subprocess.check_output(cmd)
+ logging.debug("check_modem: %s returned: %s", " ".join(cmd), output)
+ if "+CPIN: READY" in output:
+ have_sim = True
+ except subprocess.CalledProcessError as error:
+ logging.warning("check_modem: %s returned: %s", " ".join(cmd), error)
+
+ logging.debug("have_modem: %s, have_sim: %s", have_modem, have_sim)
+
+ return have_modem, have_sim
+
+def pppd(options, enable):
+ """ Start or stop pppd
+
+ Returns True if state was changed.
+ """
+
+ logging.debug("pppd(%s)", enable)
+
+ try:
+ subprocess.check_output(["pidof", "pppd"], stderr=subprocess.STDOUT)
+ logging.debug("pppd is running")
+ ppp_is_running = True
+ except subprocess.CalledProcessError:
+ logging.debug("pppd is not running")
+ ppp_is_running = False
+
+ if enable:
+ # Tell conduit_leds that we want PPP
+ try:
+ fd = os.open(options.want_ppp_file, os.O_CREAT|os.O_EXCL|os.O_WRONLY, 0o644)
+ with os.fdopen(fd, 'w') as fp:
+ fp.write("1\n")
+ except OSError as error:
+ if error.errno != errno.EEXIST:
+ logging.error("Writing %s: %s", options.want_ppp_file, error)
+
+ if not ppp_is_running:
+ cmd = ["/etc/init.d/ppp", "start"]
+ try:
+ logging.info("Running: %s", " ".join(cmd))
+ result = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+ return False
+ else:
+ logging.debug("%s: %s", " ".join(cmd), result.strip())
+
+ for service in [ 'ppp0', 'pppd']:
+ cmd = ["/usr/bin/monit", "monitor", service]
+ try:
+ logging.info("Running: %s", " ".join(cmd))
+ result = subprocess.check_output(["monit", "monitor", service], stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+ else:
+ logging.debug("%s: %s", " ".join(cmd), result.strip())
+
+ return True
+
+ return False
+
+ # We do not want PPP
+
+ # Inform conduit_leds
+ try:
+ os.unlink(options.want_ppp_file)
+ except OSError as error:
+ if error.errno != errno.ENOENT:
+ logging.error("Deleting %s: %s", options.want_ppp_file, error)
+
+ if ppp_is_running:
+ cmd = ["/etc/init.d/ppp", "stop"]
+ try:
+ logging.info("Running: %s", " ".join(cmd))
+ result = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+ else:
+ logging.debug("%s: %s", " ".join(cmd), result.strip())
+
+ for service in [ 'ppp0', 'pppd']:
+ cmd = ["monit", "unmonitor", service]
+ try:
+ logging.info("Running: %s", " ".join(cmd))
+ result = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+ else:
+ logging.debug("%s: %s", " ".join(cmd), result.strip())
+
+ return True
+
+ return False
+
+class IfState(object):
+ """ Store IF State """
+ def __init__(self, name, address):
+ self.name = name
+ self.address = address
+ self.ignore_time = time.time()
+ self.link_state = True
+ self.seen = False
+ self.seq = -1
+ self.responding = None
+ self.missed_cycles = None
+
+ def __str__(self):
+ return "%s: %s ignore: %f state: %s, seen: %s, seq: %d, responding: %s" % (
+ self.name,
+ self.address,
+ self.ignore_time - time.time(),
+ self.link_state,
+ self.seen,
+ self.seq,
+ self.responding)
+
+def get_interface_for_dest(dest_ip):
+ """
+ Return the outgoing interface name that the kernel would use to
+ reach the given destination IP. Uses 'ip route get'.
+ """
+
+ try:
+ output = subprocess.check_output(
+ ["ip", "-4", "route", "get", dest_ip],
+ stderr=subprocess.STDOUT
+ ).strip()
+ except subprocess.CalledProcessError:
+ return None
+
+ # Example outputs:
+ # "8.8.8.8 via 192.168.1.1 dev eth0 src 192.168.1.10"
+ # "192.168.1.50 dev eth0 src 192.168.1.10"
+ # "local 192.168.1.100 dev lo src 192.168.1.100"
+
+ parts = output.split()
+
+ # The interface always follows "dev"
+ if "dev" in parts:
+ idx = parts.index("dev")
+ if idx + 1 < len(parts):
+ return parts[idx + 1]
+
+ return None
+
+def process_interface(options, if_state):
+
+ logging.debug("process_interface(%s)", if_state)
+
+ if_state.responding = False
+
+ # Test ping response of default_interface
+ # Seq is a unsigned 16 bit integer
+ responses = 0
+ for ping in range(options.pings):
+ if_state.seq = if_state.seq + 1 if if_state.seq < 65535 else 0
+ logging.debug("send_icmp (seq %d) via %s", if_state.seq, if_state.name)
+ if icmp_echo(options.hostname, interface=if_state.name, seq=if_state.seq):
+ responses += 1
+ time.sleep(.1)
+ if shutdown_requested:
+ return
+
+ # Call it good if we get 80% of our pings back
+ if responses >= float(options.pings) * 0.80:
+ logging.warning("Received %d/%d responses on %s, pppd not needed",
+ responses,
+ options.pings,
+ if_state.name)
+ if_state.responding = True
+ if_state.missed_cycles = 0
+
+ return
+
+def process(options, progname):
+ """ runs tests in a loop """
+
+ if_states = {}
+ while time.sleep(options.interval) is None:
+ global shutdown_requested
+ if shutdown_requested:
+ return
+
+ logging.debug("check_modem")
+
+ have_modem, have_sim = check_modem(options)
+ if not have_modem or not have_sim:
+ logging.warning("No Modem or SIM, stopping pppd")
+ ppp_on_boot(options, False)
+ pppd(options, False)
+ continue
+
+ # If we have a modem and sim, ensure ppp_on_boot is enabled
+ ppp_on_boot(options, True)
+
+ # Mark interfaces as not seen
+ for if_name, if_state in if_states.items():
+ if_state.seen = False
+
+ ppp_new_state = True
+ do_restart = False
+ for if_name, if_address in get_broadcast_interfaces():
+ if_state = if_states.setdefault(if_name, IfState(if_name, if_address))
+
+ logging.debug("looking at %s", if_state)
+
+ # Mark as seen
+ if_state.seen = True
+
+ # Check if we are supposed to be ignoring this link
+ if if_state.ignore_time > time.time():
+ logging.warning("%s: ignoring until %s",
+ if_name,
+ time.strftime("%Y-%m-%d %H:%M:%S",
+ time.localtime(if_state.ignore_time)))
+ continue
+
+ # Does the default route point here?
+ default_if_name = get_interface_for_dest("1.1.1.1")
+ if default_if_name == "ppp0":
+ # No, tell ppp to stop
+ ppp_new_state = False
+ logging.warning("%s: up, ppp is not needed", if_name)
+ continue
+ if default_if_name != if_name:
+ # Not at us, continue
+ logging.info("%s: up, not default", if_name)
+ continue
+
+ # It's us, try pinging
+ was_responding = if_state.responding
+ process_interface(options, if_state)
+ if shutdown_requested:
+ return
+
+ if if_state.responding:
+ ppp_new_state = False
+
+ # Restarte if it's now responding
+ if was_responding is False:
+ logging.info("%s: is now responding", if_name)
+ do_restart = True
+ continue
+
+ # Fail a few times before we mark it down
+ if_state.missed_cycles += 1
+ if if_state.missed_cycles > 4:
+ # Not responding, ignore it for a while
+ if_state.ignore_time = time.time() + options.ignore_link_time
+ logging.warning("%s: not responding, ignoring until: %s",
+ if_name,
+ time.strftime("%Y-%m-%d %H:%M:%S",
+ time.localtime(if_state.ignore_time)))
+
+ # Mark current link state
+ active_addresses = set()
+ active_ifs = set()
+ for if_name, if_state in if_states.items():
+ if_state.link_state = if_state.seen
+ if if_state.responding:
+ active_addresses.add(if_state.address)
+ active_ifs.add(if_name)
+
+ # Ensure pppd is in the correct state
+ ppp_state_changed = pppd(options, ppp_new_state)
+
+ if do_restart:
+ logging.warning("Restarting services")
+ cmd = [options.change_script]
+ env = os.environ.copy()
+ env["METHOD"] = "monitor_modem"
+ if active_ifs:
+ env["IFACE"] = ", ".join(list(active_ifs))
+ try:
+ logging.info("Running %s", " ".join(cmd))
+ subprocess.check_call(cmd, env=env)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+ else:
+ if ppp_new_state and not ppp_state_changed:
+ active_addresses.update(get_ppp_addresses())
+
+ if active_addresses:
+ tunnel_addrs = tunnel_addresses(options)
+ logging.debug("Checking that tunnel sources (%s) is in (%s)",
+ " ".join(list(tunnel_addrs)),
+ " ".join(list(active_addresses)))
+ if not active_addresses.intersection(tunnel_addrs):
+ logging.warning("No ssh_tunnel sources (%s) from active addresses (%s), restarting",
+ " ".join(list(tunnel_addrs)),
+ " ".join(list(active_addresses)))
+ # No tunnel connections from an active interface
+ cmd = ["/etc/init.d/ssh_tunnel", "restart"]
+ try:
+ logging.info("Running %s", " ".join(cmd))
+ subprocess.check_call(cmd)
+ except subprocess.CalledProcessError as error:
+ logging.error("%s: %s", " ".join(cmd), error)
+
+def main():
+ """It all happens here"""
+
+ progname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
+
+ options = parse_args()
+
+ if not options.foreground:
+ if not daemonize():
+ return 1
+
+ # Do this after daemonize or we'll hang the system startup.
+ init_logging(options)
+
+ logging.warning("%s: Started", progname)
+
+ signal.signal(signal.SIGTERM, catch_interrupt)
+ signal.signal(signal.SIGINT, catch_interrupt)
+
+ try:
+ with pidfilelock(progname):
+ process(options, progname)
+ except LockFileTimeout:
+ logging.critical("Another instance of %s is running", progname)
+ return 1
+
+ return 0
+
+if __name__ == "__main__":
+ rc = 1
+ try:
+ rc = main()
+ except KeyboardInterrupt:
+ print("")
+ except Exception as exc:
+ logging.exception(exc)
+
+ sys.exit(rc)
diff --git a/roles/conduit/files/ssh_tunnel.initd b/roles/conduit/files/ssh_tunnel.initd
index d8b6f4f..b28d4ad 100644
--- a/roles/conduit/files/ssh_tunnel.initd
+++ b/roles/conduit/files/ssh_tunnel.initd
@@ -47,6 +47,9 @@ is_running() {
}
start() {
+ for env in $(set | grep -E '^AUTOSSH_'); do
+ eval export ${env}
+ done
start-stop-daemon --start --quiet --exec ${DAEMON} -- ${DAEMON_ARGS}
}
diff --git a/roles/conduit/handlers/main.yml b/roles/conduit/handlers/main.yml
index c65fac6..8567f85 100644
--- a/roles/conduit/handlers/main.yml
+++ b/roles/conduit/handlers/main.yml
@@ -6,7 +6,7 @@
#
- name: update rc
shell: "for init in ttn-pkt-forwarder lora-basic-station; do update-rc.d -f ${init} remove; test -f /etc/init.d/${init} && update-rc.d ${init} defaults 95 30 || true; done"
-
+
#
# Restart the packet forwarder
#
@@ -37,7 +37,7 @@
debug:
msg: "Please go to https://console.thethingsnetwork.org/gateways
and update the antenna Altitude and Placement your gateways. The
- API does not allow these parameters to be set"
+ API does not allow these parameters to be set"
run_once: true
#
@@ -58,6 +58,12 @@
- name: restart conduit_leds
include_tasks: restart_conduit_leds.yml
+#
+# Restart monitor_modem
+#
+- name: restart monitor_modem
+ include_tasks: restart_monitor_modem.yml
+
#
# Restart sshd after config changes
#
@@ -67,7 +73,7 @@
ignore_errors: true
#
-# Remind
+# Remind
#
- name: interface reboot
debug:
@@ -118,3 +124,10 @@
- name: Update system checksum file
include_tasks: update_checksums.yml
+
+#
+# Summary
+#
+- name: Rescued sections
+ ansible.builtin.debug:
+ msg: "Failed sections: {{ rescued | join(', ') }}"
diff --git a/roles/conduit/tasks/conduit_leds.yml b/roles/conduit/tasks/conduit_leds.yml
index 1cb652c..c1d7269 100644
--- a/roles/conduit/tasks/conduit_leds.yml
+++ b/roles/conduit/tasks/conduit_leds.yml
@@ -7,7 +7,9 @@
mode: "0755"
owner: root
group: root
- notify: restart conduit_leds
+ notify:
+ - restart conduit_leds
+ - Update system checksum file
- name: conduit_leds /var/config/init.d/conduit_leds
copy:
@@ -16,7 +18,9 @@
mode: "0755"
owner: root
group: root
- notify: restart conduit_leds
+ notify:
+ - restart conduit_leds
+ - Update system checksum file
- name: conduit_leds Link /etc/init.d/conduit_leds to /var/config/init.d/conduit_leds
file:
@@ -24,5 +28,7 @@
state: link
src: /var/config/init.d/conduit_leds
force: yes
- notify: restart conduit_leds
+ notify:
+ - restart conduit_leds
+ - Update system checksum file
...
diff --git a/roles/conduit/tasks/main.yml b/roles/conduit/tasks/main.yml
index 2abdfd2..ea29384 100644
--- a/roles/conduit/tasks/main.yml
+++ b/roles/conduit/tasks/main.yml
@@ -1,5 +1,14 @@
---
+#
+#
+#
+- name: main Create list of failed sections
+ ansible.builtin.set_fact:
+ rescued: []
+ tags:
+ - always
+
#
# Build the list of authorized keys
#
@@ -177,6 +186,10 @@
rescue:
- debug:
msg: "main: TTN setup failed, continuing"
+ - ansible.builtin.set_fact:
+ rescued: "{{ rescued + ['ttn'] }}"
+ changed_when: true
+ notify: Rescued sections
tags:
- ttn
@@ -189,26 +202,62 @@
rescue:
- debug:
msg: "main: SSH tunnel setup failed, continuing..."
+ - ansible.builtin.set_fact:
+ rescued: "{{ rescued + ['ssh_tunnel'] }}"
+ notify: Rescued sections
tags:
- ssh_tunnel
- monit
- setup
#
-# Secure ssh
+# Enable status leds
#
- name: main Set up conduit_leds
- import_tasks: conduit_leds.yml
- when: ansible_local.conduit.hw_version.startswith("MTCDT-")
+ block:
+ - name: import conduit_leds.yml
+ import_tasks: conduit_leds.yml
+ when: ansible_local.conduit.hw_version.startswith("MTCDT-")
+ rescue:
+ - debug:
+ msg: "main: conduit_leds setup failed, continuing..."
+ - ansible.builtin.set_fact:
+ rescued: "{{ rescued + ['conduit_leds'] }}"
+ notify: Rescued sections
tags:
- conduit_leds
- setup
+#
+# Enable modem monitoring
+#
+- name: main Set up monitor_modem
+ block:
+ - name: import monitor_modem.yml
+ import_tasks: monitor_modem.yml
+ rescue:
+ - debug:
+ msg: "main: monitor_modem setup failed, continuing..."
+ - ansible.builtin.set_fact:
+ rescued: "{{ rescued + ['monitor_modem'] }}"
+ notify: Rescued sections
+ tags:
+ - monitor_modem
+ - setup
+
#
# Set up monit
#
- name: main Set up moniit
- import_tasks: monit.yml
+ block:
+ - name: import monit.yml
+ import_tasks: monit.yml
+ rescue:
+ - debug:
+ msg: "main: monit setup failed, continuing..."
+ - ansible.builtin.set_fact:
+ rescued: "{{ rescued + ['monit'] }}"
+ notify: Rescued sections
tags:
- monit
- setup
diff --git a/roles/conduit/tasks/monitor_modem.yml b/roles/conduit/tasks/monitor_modem.yml
new file mode 100644
index 0000000..d6c7d71
--- /dev/null
+++ b/roles/conduit/tasks/monitor_modem.yml
@@ -0,0 +1,56 @@
+---
+
+- name: monitor_modem /var/config/local/sbin/monitor_modem
+ ansible.builtin.copy:
+ dest: /var/config/local/sbin/monitor_modem
+ src: monitor_modem.py
+ mode: "0755"
+ owner: root
+ group: root
+ notify:
+ - restart monitor_modem
+ - Update system checksum file
+ when: ansible_local.mts_io.has_radio == "1"
+
+- name: monitor_modem /var/config/init.d/monitor_modem
+ ansible.builtin.copy:
+ dest: /var/config/init.d/monitor_modem
+ src: monitor_modem.initd
+ mode: "0755"
+ owner: root
+ group: root
+ notify:
+ - restart monitor_modem
+ - Update system checksum file
+ when: ansible_local.mts_io.has_radio == "1"
+
+- name: monitor_modem Link /etc/init.d/monitor_modem to /var/config/init.d/monitor_modem
+ ansible.builtin.file:
+ path: /etc/init.d/monitor_modem
+ state: link
+ src: /var/config/init.d/monitor_modem
+ force: yes
+ notify:
+ - restart monitor_modem
+ - Update system checksum file
+ when: ansible_local.mts_io.has_radio == "1"
+
+- name: monitor_modem Remove if a modem is not present
+ ansible.builtin.file:
+ path: "{{ item }}"
+ state: absent
+ force: yes
+ loop:
+ - /var/config/local/sbin/monitor_modem
+ - /var/config/init.d/monitor_modem
+ notify:
+ - Update system checksum file
+ when: ansible_local.mts_io.has_radio == "0"
+
+- name: monitor_modem Delete service if modem not present
+ ansible.builtin.shell: "update-rc.d -f monitor_modem remove"
+ notify:
+ - Update system checksum file
+ when: ansible_local.mts_io.has_radio == "0"
+
+...
diff --git a/roles/conduit/tasks/ppp.yml b/roles/conduit/tasks/ppp.yml
index f650136..e53117e 100644
--- a/roles/conduit/tasks/ppp.yml
+++ b/roles/conduit/tasks/ppp.yml
@@ -24,23 +24,14 @@
state: link
src: /var/config/ppp/ppp_on_boot
force: true
- when: ppp_on_boot.stat.islnk is defined and not ppp_on_boot.stat.islnk
+ when:
+ - ppp_on_boot.stat.lnk_target is not defined or ppp_on_boot.stat.lnk_target != '/var/config/ppp/ppp_on_boot'
- name: ppp Make /var/config/ppp/ppp_on_boot executable when we are using cellular
file:
dest: /var/config/ppp/ppp_on_boot
- mode: "755"
+ mode: "{{ '0755' if use_cellular and cellular_provider is defined else '0644' }}"
notify: Start ppp
- when:
- - use_cellular
- - cellular_provider is defined
-
-- name: ppp Make /var/config/ppp/ppp_on_boot not executable when we are not using cellular
- file:
- dest: /var/config/ppp/ppp_on_boot
- mode: "644"
- notify: Stop ppp
- when: use_cellular == False or cellular_provider is not defined
#
# Set or reset APN
@@ -57,15 +48,28 @@
- use_cellular
- cellular_apn is defined
-- name: ppp Set provider
+#
+# Setup PPP scripts
+#
+
+- name: ppp Set /var/config/ppp/peers/provider
+ ansible.builtin.file:
+ path: /var/config/ppp/peers/provider
+ state: "{{ 'link' if use_cellular and cellular_provider is defined else 'absent' }}"
+ src: "{{ '/var/config/ppp/peers/' + cellular_provider if use_cellular and cellular_provider is defined else '' }}"
+ notify: Start ppp
+
+- name: ppp Restore default provider in /var/config/ppp/ppp_on_boot
lineinfile:
- path: /etc/ppp/ppp_on_boot
+ path: /var/config/ppp/ppp_on_boot
regexp: '^\$PPPD call '
- line: '$PPPD call {{ cellular_provider }}'
- notify: Start ppp
- when:
- - use_cellular
- - cellular_provider is defined
+ line: '$PPPD call provider'
+
+- name: ppp make /var/config/ppp/ppp_on_boot a script
+ lineinfile:
+ path: /var/config/ppp/ppp_on_boot
+ regexp: '/bin/sh$'
+ line: '#!/bin/sh'
#
# Set PPP configuration options
diff --git a/roles/conduit/tasks/restart_monitor_modem.yml b/roles/conduit/tasks/restart_monitor_modem.yml
new file mode 100644
index 0000000..0ca880f
--- /dev/null
+++ b/roles/conduit/tasks/restart_monitor_modem.yml
@@ -0,0 +1,13 @@
+---
+
+#
+# Configure monitor_modem service and restart it
+#
+
+- name: restart_monitor_modem Update rc.d to start monitor_modem
+ ansible.builtin.shell: "update-rc.d -f monitor_modem remove; update-rc.d monitor_modem defaults 10 50"
+
+- name: restart_monitor_modem Restart monitor_modem
+ ansible.builtin.command: /etc/init.d/monitor_modem restart
+
+...
diff --git a/roles/conduit/tasks/time.yml b/roles/conduit/tasks/time.yml
index 9963bac..ed51c50 100644
--- a/roles/conduit/tasks/time.yml
+++ b/roles/conduit/tasks/time.yml
@@ -53,8 +53,8 @@
- name: time Ensure /var/config/default/ntpd.default exists
copy:
- src: /etc/default/ntpd.default
- dest: /var/config/default/ntpd.default
+ src: /etc/default/ntpd
+ dest: /var/config/default/ntpd
remote_src: yes
force: no
notify:
@@ -64,7 +64,7 @@
file:
dest: /etc/default/ntpd
state: link
- src: /var/config/default/ntpd.default
+ src: /var/config/default/ntpd
force: yes
notify:
- restart ntpd
@@ -76,7 +76,7 @@
- name: time Prevent ntpd from hanging on boot when net is down
lineinfile:
- dest: /var/config/default/ntpd.default
+ dest: /var/config/default/ntpd
regexp: "^SET_SYSTEM_CLOCK="
line: "SET_SYSTEM_CLOCK={{ set_system_clock }}"
state: present
@@ -84,6 +84,13 @@
- restart ntpd
- Update system checksum file
+- name: time Clean up turds
+ ansible.builtin.file:
+ path: /var/config/default/ntpd.default
+ state: absent
+ notify:
+ - Update system checksum file
+
#
# Monit
#
diff --git a/roles/conduit/tasks/ttn_basic_station.yml b/roles/conduit/tasks/ttn_basic_station.yml
index c9f03c0..83f9acb 100644
--- a/roles/conduit/tasks/ttn_basic_station.yml
+++ b/roles/conduit/tasks/ttn_basic_station.yml
@@ -24,7 +24,7 @@
- forwader_version is defined
- ansible_local.opkg.lora_basic_station is not defined or ansible_local.opkg.lora_basic_station != forwarder_version
-- name: ttn_basic_station Install the desired version of lora_basic_station
+- name: ttn_basic_station Install the desired version of lora_basic_station
opkg:
name: "lora_basic_station=={{ forwarder_version }}"
state: present
@@ -52,6 +52,7 @@
--log-level {{ basic_log_level }}
--log-size {{ forwarder_logrotate_size }}
--log-rotate {{ forwarder_logrotate_count }}
+ {{ '--tc-timeout 5' if use_cellular else '' }}
{{ gps_arg }}"
register: station_conf_raw
- set_fact:
diff --git a/roles/conduit/templates/lora-basic-station.j2 b/roles/conduit/templates/lora-basic-station.j2
index 2c5108b..3023b36 100644
--- a/roles/conduit/templates/lora-basic-station.j2
+++ b/roles/conduit/templates/lora-basic-station.j2
@@ -124,6 +124,11 @@ do_start() {
test -f "${conf_dir}/${file}" && cp "${conf_dir}/${file}" "${run_dir}/1/"
done
+ #
+ # copy mlinux-version info
+ #
+ echo "$(head -1 /etc/mlinux-version) - $(opkg status lora-basic-station | sed -n '/Version:/s/Version: //p')" > ${run_dir}/1/version.txt
+
#
# reset concentrator
#
diff --git a/roles/conduit/templates/monitrc.j2 b/roles/conduit/templates/monitrc.j2
index c826c03..eb419a2 100644
--- a/roles/conduit/templates/monitrc.j2
+++ b/roles/conduit/templates/monitrc.j2
@@ -15,9 +15,9 @@ set httpd
# Monitor the system
check system $HOST
- if loadavg (1min) > 2 then alert
- if loadavg (5min) > 4 then alert
- if memory usage > 25% then alert
+ if loadavg (1min) > {{ monit_loadavg_1m }} then alert
+ if loadavg (5min) > {{ monit_loadavg_5m }} then alert
+ if memory usage > {{ monit_memory_usage }} then alert
# Event Queue
set eventqueue basedir {{ monit_eventqueue }}
@@ -97,6 +97,14 @@ check process conduit_leds PIDFILE /var/run/conduit_leds.pid
stop program = "/etc/init.d/conduit_leds stop"
{% endif -%}
+{% if 'modem_at0' in ansible_facts['ansible_local'].dev -%}
+# Monitor monitor_modem
+check process monitor_modem PIDFILE /var/run/monitor_modem.pid
+ if does not exist for {{ monit_process_period }} then restart
+ start program = "/etc/init.d/monitor_modem start" with timeout 15 seconds
+ stop program = "/etc/init.d/monitor_modem stop"
+{% endif -%}
+
# Monitor system directory checksums
check program check_system_md5 with path /usr/local/lib/check_system_md5
every "{{ 60 | random(seed=inventory_hostname) }} * * * *"
diff --git a/roles/conduit/templates/ppp.monit.j2 b/roles/conduit/templates/ppp.monit.j2
index 9e29936..bb8eb28 100644
--- a/roles/conduit/templates/ppp.monit.j2
+++ b/roles/conduit/templates/ppp.monit.j2
@@ -1,8 +1,8 @@
check network "{{ monit_ppp_if }}" with interface "{{ monit_ppp_if }}"
-# if failed link then restart
+ if link up then exec "/usr/bin/env METHOD=monit IFACE=ppp0 /var/config/ifup_restart"
if changed link capacity then alert
-check process pppd MATCHING "^/usr/sbin/pppd call {{ cellular_provider }}$"
+check process pppd MATCHING "^(/usr/sbin/)?pppd call provider$"
if does not exist for 1 cycles then restart
start program = {{ monit_ppp_start }}
stop program = {{ monit_ppp_stop }}
diff --git a/roles/conduit/templates/ssh_tunnel.j2 b/roles/conduit/templates/ssh_tunnel.j2
index 0b8bb33..0d53b54 100644
--- a/roles/conduit/templates/ssh_tunnel.j2
+++ b/roles/conduit/templates/ssh_tunnel.j2
@@ -22,3 +22,5 @@ SSH_KEY={{ ssh_tunnel_ssh_key }}
SSH_PORT={{ ssh_tunnel_ssh_port }}
{% endif %}
DAEMON_ARGS="{{ ssh_tunnel_daemon_args }}"
+AUTOSSH_POLL={{ ssh_tunnel_poll }}
+AUTOSSH_FIRST_POLL={{ ssh_tunnel_first_poll }}