From e73c25434ab7a4c677dd071f44604a046f575e1b Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 09:47:59 +0000 Subject: [PATCH 01/93] Implement handshake and rolling buffers in TCP proxy for multi-client support and add configurable node total broadcast channel --- .env.example | 2 + README.md | 38 ++++++++++++++---- src/bot.py | 16 ++++---- src/commands/status.py | 2 +- src/helpers.py | 10 +++++ src/tcp_proxy.py | 89 ++++++++++++++++++++++-------------------- 6 files changed, 98 insertions(+), 59 deletions(-) diff --git a/.env.example b/.env.example index 02bdbfd..ee29a54 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,8 @@ STORAGE_API_TOKEN=... # Features ENABLE_TCP_PROXY=true +ENABLE_FEATURE_NODE_TOTALS=true +CHANNEL_FOR_NODE_TOTAL_BROADCAST=2 # Commands ENABLE_COMMAND_PING=true diff --git a/README.md b/README.md index bf23504..076057d 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,35 @@ The bot listens for messages and responds to commands. You can interact with it ### Supported Commands -| Command | Description | -|-----------|------------------------------------------------| -| `!help` | Displays a list of available commands | -| `!hello` | Displays information about the bot | -| `!ping` | Responds with "Pong!" | -| `!nodes` | Displays a list of connected nodes, stats, etc | -| `!whoami` | Displays information about the sender | +| Command | Description | +|-----------|---------------------------------------------------------------| +| `!help` | Displays a list of available commands | +| `!hello` | Displays information about the bot | +| `!ping` | Responds with "Pong!" | +| `!nodes` | Displays a list of connected nodes, stats, etc | +| `!nodes totals` | Manually triggers a node count report | +| `!whoami` | Displays information about the sender | +| `!tr` | Performs a traceroute to the sender (outbound & inbound) | +| `!status` | Displays bot status and radio connection details | + +## Features + +### Node Count Reporting +The bot monitors mesh visibility and provides automated reporting: +- **Scheduled Reports:** Every 3 hours, a status update is sent to a configurable channel (defaulting to Channel 2) with the current online node count. This can be adjusted using `CHANNEL_FOR_NODE_TOTAL_BROADCAST` in your `.env` file. +- **Immediate Alerts:** If the visible node count drops to zero, the bot sends an immediate warning. +- **Manual Check:** Use `!nodes totals` to get an instant report via DM. + +### Enhanced Connectivity (TCP Proxy) +The bot now includes a built-in TCP proxy to manage the connection to the Meshtastic node. This improves stability and allows for automatic reconnection if the radio connection is lost. + +### Improved Logging +Messages received on named Group Channels (e.g., 'LongRange', 'PrivateChat') are now logged with their specific channel name, making it easier to track conversations across different mesh networks. + +### Advanced Traceroute +The `!tr` command has been upgraded to show the full path: +- **Outbound:** The route from the bot to your node. +- **Inbound:** The route back from your node to the bot (if available). --- @@ -152,4 +174,4 @@ Contributions are welcome! Please fork the repository and submit a pull request. ## License -This project is licensed under the MIT License. \ No newline at end of file +This project is licensed under the MIT License. diff --git a/src/bot.py b/src/bot.py index 7802805..9dfdc4d 100644 --- a/src/bot.py +++ b/src/bot.py @@ -12,7 +12,7 @@ from src.api.StorageAPI import StorageAPIWrapper from src.commands.factory import CommandFactory from src.data_classes import MeshNode -from src.helpers import pretty_print_last_heard, safe_encode_node_name +from src.helpers import pretty_print_last_heard, safe_encode_node_name, get_env_bool, get_env_int from src.persistence.commands_logger import AbstractCommandLogger from src.persistence.node_db import AbstractNodeDB from src.persistence.node_info import AbstractNodeInfoStore @@ -181,7 +181,6 @@ def handle_public_message(self, packet: MeshPacket): if words: command_name = words[0].lower() if command_name in ["!tr", "!ping", "!hello", "!nodes", "!status", "!whoami"]: - from src.helpers import get_env_bool env_var_name = f"ENABLE_COMMAND_{command_name.lstrip('!').upper()}" if get_env_bool(env_var_name, True): logging.info(f"Received public {command_name} from {sender_name}") @@ -339,12 +338,15 @@ def print_nodes(self): logging.info(f"- Plus {len(offline_nodes)} offline nodes") - def report_node_count(self, destination=None, channel_index=2): + def report_node_count(self, destination=None, channel_index=None): """Report the current node count to a specific channel or destination.""" if not self.init_complete or not self.interface: logging.warning("Skipping node count report: interface not ready.") return + if channel_index is None: + channel_index = get_env_int('CHANNEL_FOR_NODE_TOTAL_BROADCAST', 2) + online_nodes = self.node_info.get_online_nodes() count = len(online_nodes) @@ -360,7 +362,6 @@ def report_node_count(self, destination=None, channel_index=2): if destination: self.interface.sendText(message, destinationId=destination, wantAck=True) else: - # Default to Channel 2 (GregPrivate) self.interface.sendText(message, channelIndex=channel_index, wantAck=True) except Exception as e: logging.error(f"Failed to report node count: {e}") @@ -389,8 +390,9 @@ def get_global_context(self): def start_scheduler(self): schedule.every().day.at("00:00").do(self.node_info.reset_packets_today) - schedule.every(3).hours.do(self.report_node_count) - schedule.every(1).minutes.do(self.check_for_zero_nodes) + if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): + schedule.every(3).hours.do(self.report_node_count) + schedule.every(1).minutes.do(self.check_for_zero_nodes) while True: schedule.run_pending() try: @@ -402,4 +404,4 @@ def get_node_by_short_name(self, short_name: str) -> MeshNode.User | None: for node in self.node_db.list_nodes(): if node.short_name.lower() == short_name.lower(): return node - return None \ No newline at end of file + return None diff --git a/src/commands/status.py b/src/commands/status.py index bcbeb21..d7e9642 100644 --- a/src/commands/status.py +++ b/src/commands/status.py @@ -22,7 +22,7 @@ def handle_packet(self, packet): status = self.bot.proxy.get_status() if isinstance(status, dict): state = "Online" if status['connected'] else "Reconnecting" - proxy_info = f"{state}, {status['clients']} clients, last radio data {status['silence_secs']}s ago" + proxy_info = f"{state}, {status['clients']} clients, {status['cached_kb']}KB cache, last radio {status['silence_secs']}s ago" else: proxy_info = status diff --git a/src/helpers.py b/src/helpers.py index 0c6fc77..764cd45 100644 --- a/src/helpers.py +++ b/src/helpers.py @@ -11,6 +11,16 @@ def get_env_bool(name: str, default: bool = True) -> bool: return value.lower() in ('true', '1', 't', 'y', 'yes') +def get_env_int(name: str, default: int) -> int: + value = os.getenv(name) + if value is None: + return default + try: + return int(value) + except (ValueError, TypeError): + return default + + def pretty_print_last_heard(last_heard_timestamp: int | datetime) -> str: if not isinstance(last_heard_timestamp, datetime): last_heard = datetime.fromtimestamp(last_heard_timestamp, timezone.utc) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 456d932..d017926 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -3,6 +3,7 @@ import threading import logging import time +from collections import deque class TcpProxy: def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): @@ -14,9 +15,15 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.target_socket = None self.clients = [] self.running = False - self.init_buffer = b'' - self.init_buffer_done = False - self.buffer_time = 5.0 # seconds to buffer startup data (increased for safety) + + # Buffer for the initial handshake/config (first 64KB of the session) + self.handshake_buffer = b'' + self.handshake_max = 65536 + + # Rolling buffer for recent data (last 512KB) + self.rolling_buffer = deque(maxlen=524288) + + self.last_target_activity = time.time() def start(self): self.running = True @@ -41,11 +48,12 @@ def get_status(self): if not self.running: return "Proxy: Offline" - silence = time.time() - self.last_target_activity if hasattr(self, 'last_target_activity') else 0 + silence = time.time() - self.last_target_activity return { "connected": self.target_socket is not None and self.target_socket.fileno() != -1, "clients": len(self.clients), - "silence_secs": int(silence) + "silence_secs": int(silence), + "cached_kb": (len(self.handshake_buffer) + len(self.rolling_buffer)) // 1024 } def _run(self): @@ -69,6 +77,7 @@ def _run(self): self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.target_socket.connect((self.target_host, self.target_port)) logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") + self.last_target_activity = time.time() break except Exception as e: logging.error(f"Failed to connect to target ({self.target_host}): {e}. Retrying in {backoff}s...") @@ -78,22 +87,17 @@ def _run(self): if not self.running: return - start_time = time.time() - last_target_activity = time.time() watchdog_timeout = 300.0 # Reconnect if no data from target for 5 minutes last_heartbeat_log = time.time() while self.running: try: - # Filter out closed sockets from inputs - # We rebuild the list of inputs every time to ensure we are using the current target_socket - # (which might have changed after a reconnect) + # Rebuild the list of inputs every time inputs = [self.server_socket, self.target_socket] current_inputs = [s for s in inputs + self.clients if s and s.fileno() != -1] readable, _, _ = select.select(current_inputs, [], [], 1.0) except Exception as e: logging.error(f"Select error: {e}") - # Clean up closed sockets from our list self.clients = [c for c in self.clients if c.fileno() != -1] continue @@ -101,12 +105,12 @@ def _run(self): # Heartbeat Logging & Watchdog Check if current_time - last_heartbeat_log > 60.0: - silence_duration = current_time - last_target_activity + silence_duration = current_time - self.last_target_activity logging.info(f"Proxy Heartbeat: Connected. Last data from radio {silence_duration:.1f}s ago. Clients: {len(self.clients)}") last_heartbeat_log = current_time # Watchdog: Force reconnect if silence is too long - if current_time - last_target_activity > watchdog_timeout: + if current_time - self.last_target_activity > watchdog_timeout: logging.warning(f"Watchdog: No data from radio for {watchdog_timeout}s. Forcing reconnect...") try: self.target_socket.close() @@ -121,45 +125,49 @@ def _run(self): self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.target_socket.connect((self.target_host, self.target_port)) logging.info("Watchdog: Reconnected to target successfully.") - last_target_activity = time.time() # Reset timer + self.last_target_activity = time.time() reconnected = True except Exception as ex: logging.error(f"Watchdog reconnect failed: {ex}. Retrying in {backoff}s...") time.sleep(backoff) backoff = min(backoff * 2, 10) - # Check for init buffer timeout - if not self.init_buffer_done and (current_time - start_time > self.buffer_time): - self.init_buffer_done = True - if self.init_buffer: - logging.info(f"Init buffer capture finished. Size: {len(self.init_buffer)} bytes") - for sock in readable: if sock is self.server_socket: try: client_socket, addr = self.server_socket.accept() logging.info(f"New proxy connection from {addr}") self.clients.append(client_socket) - # Replay init buffer - if self.init_buffer: + + # Replay buffers to the new client + # 1. Replay handshake buffer + if self.handshake_buffer: try: - client_socket.sendall(self.init_buffer) - logging.info(f"Sent {len(self.init_buffer)} bytes of cached init data to {addr}") + client_socket.sendall(self.handshake_buffer) except Exception as e: - logging.error(f"Error sending init buffer to client: {e}") + logging.error(f"Error sending handshake buffer to client: {e}") + + # 2. Replay rolling buffer + if self.rolling_buffer: + try: + # Convert deque to bytes + rolling_data = bytes(self.rolling_buffer) + client_socket.sendall(rolling_data) + logging.info(f"Sent {len(self.handshake_buffer)} bytes handshake and {len(rolling_data)} bytes rolling cache to {addr}") + except Exception as e: + logging.error(f"Error sending rolling buffer to client: {e}") + except Exception as e: logging.error(f"Error accepting connection: {e}") elif sock is self.target_socket: - last_target_activity = time.time() # Update activity timestamp + self.last_target_activity = time.time() try: - data = self.target_socket.recv(4096) + data = self.target_socket.recv(16384) if not data: logging.warning("Target closed connection. Restarting proxy connection...") - # Close the target socket self.target_socket.close() - # Attempt to reconnect loop reconnected = False backoff = 1 while self.running and not reconnected: @@ -168,17 +176,20 @@ def _run(self): self.target_socket.connect((self.target_host, self.target_port)) logging.info("Reconnected to target.") reconnected = True - # We don't reset inputs because target_socket is updated except: time.sleep(backoff) backoff = min(backoff * 2, 30) if not reconnected: - self.running = False # Give up - break # Break the inner loop to refresh select() with new socket + self.running = False + break + + # Update buffers + if len(self.handshake_buffer) < self.handshake_max: + to_add = data[:self.handshake_max - len(self.handshake_buffer)] + self.handshake_buffer += to_add - if not self.init_buffer_done: - self.init_buffer += data + self.rolling_buffer.extend(data) # Broadcast to all clients for client in self.clients[:]: @@ -193,12 +204,7 @@ def _run(self): pass except Exception as e: logging.error(f"Error reading from target: {e}") - # We should probably attempt reconnect here too, but for simplicity let's break - # and let the user restart if it's a hard fail. - # Or better, treating it as a disconnect: self.target_socket.close() - # Simple reconnect attempt (blocking) - ideally this would be async but - # blocking here for a few seconds is better than crashing try: time.sleep(5) self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -210,7 +216,7 @@ def _run(self): else: # Data from a client try: - data = sock.recv(4096) + data = sock.recv(16384) if not data: if sock in self.clients: self.clients.remove(sock) @@ -221,13 +227,11 @@ def _run(self): self.target_socket.sendall(data) except Exception as e: logging.error(f"Error sending to target: {e}. Attempting to reconnect...") - # Force a reconnection attempt try: self.target_socket.close() except: pass - # Reconnect logic reconnected = False backoff = 1 while self.running and not reconnected: @@ -235,7 +239,6 @@ def _run(self): self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.target_socket.connect((self.target_host, self.target_port)) logging.info("Reconnected to target successfully.") - # Resend the data that failed self.target_socket.sendall(data) reconnected = True except Exception as ex: From 0610e25a590e81beb82b219b19ff97a40c1db262 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 12:47:38 +0000 Subject: [PATCH 02/93] Refine TcpProxy buffering: reduce handshake size, add pacing for replays, and reset buffers on target reconnect --- src/tcp_proxy.py | 140 ++++++++++++++++++----------------------------- 1 file changed, 53 insertions(+), 87 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index d017926..d7929d2 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -16,12 +16,12 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.clients = [] self.running = False - # Buffer for the initial handshake/config (first 64KB of the session) + # Buffer for the initial handshake/config (captured at start of radio connection) self.handshake_buffer = b'' - self.handshake_max = 65536 + self.handshake_max = 16384 # 16KB is plenty for the initial protobuf sync - # Rolling buffer for recent data (last 512KB) - self.rolling_buffer = deque(maxlen=524288) + # Rolling buffer for recent data (last 256KB for history) + self.rolling_buffer = deque(maxlen=262144) self.last_target_activity = time.time() @@ -56,6 +56,26 @@ def get_status(self): "cached_kb": (len(self.handshake_buffer) + len(self.rolling_buffer)) // 1024 } + def _connect_to_target(self): + """Internal helper to connect and reset buffers""" + backoff = 1 + while self.running: + try: + # Reset buffers on new connection to ensure we capture fresh handshake + self.handshake_buffer = b'' + self.rolling_buffer.clear() + + self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.target_socket.connect((self.target_host, self.target_port)) + logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") + self.last_target_activity = time.time() + return True + except Exception as e: + logging.error(f"Failed to connect to target ({self.target_host}): {e}. Retrying in {backoff}s...") + time.sleep(backoff) + backoff = min(backoff * 2, 30) + return False + def _run(self): logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") @@ -70,29 +90,14 @@ def _run(self): self.server_socket.listen(5) - # Connect to target - backoff = 1 - while self.running: - try: - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) - logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") - self.last_target_activity = time.time() - break - except Exception as e: - logging.error(f"Failed to connect to target ({self.target_host}): {e}. Retrying in {backoff}s...") - time.sleep(backoff) - backoff = min(backoff * 2, 60) - - if not self.running: + if not self._connect_to_target(): return - watchdog_timeout = 300.0 # Reconnect if no data from target for 5 minutes + watchdog_timeout = 300.0 last_heartbeat_log = time.time() while self.running: try: - # Rebuild the list of inputs every time inputs = [self.server_socket, self.target_socket] current_inputs = [s for s in inputs + self.clients if s and s.fileno() != -1] readable, _, _ = select.select(current_inputs, [], [], 1.0) @@ -103,34 +108,16 @@ def _run(self): current_time = time.time() - # Heartbeat Logging & Watchdog Check if current_time - last_heartbeat_log > 60.0: silence_duration = current_time - self.last_target_activity logging.info(f"Proxy Heartbeat: Connected. Last data from radio {silence_duration:.1f}s ago. Clients: {len(self.clients)}") last_heartbeat_log = current_time - # Watchdog: Force reconnect if silence is too long if current_time - self.last_target_activity > watchdog_timeout: logging.warning(f"Watchdog: No data from radio for {watchdog_timeout}s. Forcing reconnect...") - try: - self.target_socket.close() - except: - pass - - # Reconnect logic - reconnected = False - backoff = 1 - while self.running and not reconnected: - try: - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) - logging.info("Watchdog: Reconnected to target successfully.") - self.last_target_activity = time.time() - reconnected = True - except Exception as ex: - logging.error(f"Watchdog reconnect failed: {ex}. Retrying in {backoff}s...") - time.sleep(backoff) - backoff = min(backoff * 2, 10) + try: self.target_socket.close() + except: pass + self._connect_to_target() for sock in readable: if sock is self.server_socket: @@ -139,23 +126,26 @@ def _run(self): logging.info(f"New proxy connection from {addr}") self.clients.append(client_socket) - # Replay buffers to the new client - # 1. Replay handshake buffer - if self.handshake_buffer: + # Replay buffers with pacing + def replay(): try: - client_socket.sendall(self.handshake_buffer) - except Exception as e: - logging.error(f"Error sending handshake buffer to client: {e}") + # 1. Handshake (essential config) + if self.handshake_buffer: + client_socket.sendall(self.handshake_buffer) + time.sleep(0.1) # Small pause - # 2. Replay rolling buffer - if self.rolling_buffer: - try: - # Convert deque to bytes - rolling_data = bytes(self.rolling_buffer) - client_socket.sendall(rolling_data) - logging.info(f"Sent {len(self.handshake_buffer)} bytes handshake and {len(rolling_data)} bytes rolling cache to {addr}") + # 2. Rolling history in chunks + if self.rolling_buffer: + rolling_data = bytes(self.rolling_buffer) + chunk_size = 4096 + for i in range(0, len(rolling_data), chunk_size): + client_socket.sendall(rolling_data[i:i+chunk_size]) + time.sleep(0.01) # 10ms pacing between chunks + logging.info(f"Replayed {len(self.handshake_buffer)}b handshake and {len(rolling_data)}b history to {addr}") except Exception as e: - logging.error(f"Error sending rolling buffer to client: {e}") + logging.debug(f"Client {addr} disconnected during replay: {e}") + + threading.Thread(target=replay, daemon=True).start() except Exception as e: logging.error(f"Error accepting connection: {e}") @@ -165,53 +155,29 @@ def _run(self): try: data = self.target_socket.recv(16384) if not data: - logging.warning("Target closed connection. Restarting proxy connection...") + logging.warning("Target closed connection. Reconnecting...") self.target_socket.close() - - reconnected = False - backoff = 1 - while self.running and not reconnected: - try: - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) - logging.info("Reconnected to target.") - reconnected = True - except: - time.sleep(backoff) - backoff = min(backoff * 2, 30) - - if not reconnected: - self.running = False + self._connect_to_target() break - # Update buffers if len(self.handshake_buffer) < self.handshake_max: to_add = data[:self.handshake_max - len(self.handshake_buffer)] self.handshake_buffer += to_add self.rolling_buffer.extend(data) - # Broadcast to all clients for client in self.clients[:]: try: client.sendall(data) except: - if client in self.clients: - self.clients.remove(client) - try: - client.close() - except: - pass + if client in self.clients: self.clients.remove(client) + try: client.close() + except: pass except Exception as e: logging.error(f"Error reading from target: {e}") self.target_socket.close() - try: - time.sleep(5) - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) - logging.info("Reconnected to target after error.") - except: - logging.error("Failed to reconnect immediately.") + time.sleep(2) + self._connect_to_target() else: # Data from a client From 5b20701c1c74d46a09b1349a0b0f02765e919c65 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 16:38:47 +0000 Subject: [PATCH 03/93] Fix TCP Proxy: Implement packet framing to ensure stream integrity and resolve 'Firmware Update Required' error --- src/commands/status.py | 2 +- src/tcp_proxy.py | 141 ++++++++++++++++++++++++++++------------- 2 files changed, 97 insertions(+), 46 deletions(-) diff --git a/src/commands/status.py b/src/commands/status.py index d7e9642..644c39a 100644 --- a/src/commands/status.py +++ b/src/commands/status.py @@ -22,7 +22,7 @@ def handle_packet(self, packet): status = self.bot.proxy.get_status() if isinstance(status, dict): state = "Online" if status['connected'] else "Reconnecting" - proxy_info = f"{state}, {status['clients']} clients, {status['cached_kb']}KB cache, last radio {status['silence_secs']}s ago" + proxy_info = f"{state}, {status['clients']} clients, {status['cached_packets']} pkts cached, last radio {status['silence_secs']}s ago" else: proxy_info = status diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index d7929d2..d7e95b4 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -5,6 +5,13 @@ import time from collections import deque +import socket +import select +import threading +import logging +import time +from collections import deque + class TcpProxy: def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): self.target_host = target_host @@ -16,12 +23,15 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.clients = [] self.running = False - # Buffer for the initial handshake/config (captured at start of radio connection) - self.handshake_buffer = b'' - self.handshake_max = 16384 # 16KB is plenty for the initial protobuf sync + # We now store full packets instead of raw bytes to ensure stream integrity + self.handshake_packets = [] + self.handshake_max_count = 20 # First 20 packets are usually the config sync - # Rolling buffer for recent data (last 256KB for history) - self.rolling_buffer = deque(maxlen=262144) + # Rolling history of last 100 packets + self.rolling_packets = deque(maxlen=100) + + # Buffer for incoming raw bytes from the radio + self.in_buffer = b'' self.last_target_activity = time.time() @@ -34,15 +44,11 @@ def start(self): def stop(self): self.running = False if self.server_socket: - try: - self.server_socket.close() - except: - pass + try: self.server_socket.close() + except: pass if self.target_socket: - try: - self.target_socket.close() - except: - pass + try: self.target_socket.close() + except: pass def get_status(self): if not self.running: @@ -53,7 +59,7 @@ def get_status(self): "connected": self.target_socket is not None and self.target_socket.fileno() != -1, "clients": len(self.clients), "silence_secs": int(silence), - "cached_kb": (len(self.handshake_buffer) + len(self.rolling_buffer)) // 1024 + "cached_packets": len(self.handshake_packets) + len(self.rolling_packets) } def _connect_to_target(self): @@ -62,8 +68,9 @@ def _connect_to_target(self): while self.running: try: # Reset buffers on new connection to ensure we capture fresh handshake - self.handshake_buffer = b'' - self.rolling_buffer.clear() + self.handshake_packets = [] + self.rolling_packets.clear() + self.in_buffer = b'' self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.target_socket.connect((self.target_host, self.target_port)) @@ -76,6 +83,48 @@ def _connect_to_target(self): backoff = min(backoff * 2, 30) return False + def _process_radio_data(self, data): + """Frames raw bytes into Meshtastic packets and caches them""" + self.in_buffer += data + + while len(self.in_buffer) >= 4: + # Check for magic header + if self.in_buffer[0:2] != b'\x94\xc3': + # Out of sync, find next magic header + idx = self.in_buffer.find(b'\x94\xc3') + if idx == -1: + self.in_buffer = b'' + break + self.in_buffer = self.in_buffer[idx:] + continue + + # Read length (big-endian) + length = (self.in_buffer[2] << 8) | self.in_buffer[3] + total_len = length + 4 + + if len(self.in_buffer) < total_len: + # Need more data for a full packet + break + + # Extract full packet + packet = self.in_buffer[:total_len] + self.in_buffer = self.in_buffer[total_len:] + + # Update cache + if len(self.handshake_packets) < self.handshake_max_count: + self.handshake_packets.append(packet) + else: + self.rolling_packets.append(packet) + + # Broadcast to all clients + for client in self.clients[:]: + try: + client.sendall(packet) + except: + if client in self.clients: self.clients.remove(client) + try: client.close() + except: pass + def _run(self): logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") @@ -126,26 +175,20 @@ def _run(self): logging.info(f"New proxy connection from {addr}") self.clients.append(client_socket) - # Replay buffers with pacing - def replay(): + # Replay full packets with pacing + def replay(target_sock, packets_to_send, client_addr): try: - # 1. Handshake (essential config) - if self.handshake_buffer: - client_socket.sendall(self.handshake_buffer) - time.sleep(0.1) # Small pause - - # 2. Rolling history in chunks - if self.rolling_buffer: - rolling_data = bytes(self.rolling_buffer) - chunk_size = 4096 - for i in range(0, len(rolling_data), chunk_size): - client_socket.sendall(rolling_data[i:i+chunk_size]) - time.sleep(0.01) # 10ms pacing between chunks - logging.info(f"Replayed {len(self.handshake_buffer)}b handshake and {len(rolling_data)}b history to {addr}") + for i, p in enumerate(packets_to_send): + target_sock.sendall(p) + # Pacing: 50ms for first few handshake packets, 10ms for history + time.sleep(0.05 if i < 10 else 0.01) + logging.info(f"Replayed {len(packets_to_send)} full packets to {client_addr}") except Exception as e: - logging.debug(f"Client {addr} disconnected during replay: {e}") + logging.debug(f"Client {client_addr} disconnected during replay: {e}") - threading.Thread(target=replay, daemon=True).start() + all_packets = self.handshake_packets + list(self.rolling_packets) + if all_packets: + threading.Thread(target=replay, args=(client_socket, all_packets, addr), daemon=True).start() except Exception as e: logging.error(f"Error accepting connection: {e}") @@ -160,25 +203,33 @@ def replay(): self._connect_to_target() break - if len(self.handshake_buffer) < self.handshake_max: - to_add = data[:self.handshake_max - len(self.handshake_buffer)] - self.handshake_buffer += to_add + self._process_radio_data(data) - self.rolling_buffer.extend(data) - - for client in self.clients[:]: - try: - client.sendall(data) - except: - if client in self.clients: self.clients.remove(client) - try: client.close() - except: pass except Exception as e: logging.error(f"Error reading from target: {e}") self.target_socket.close() time.sleep(2) self._connect_to_target() + else: + # Data from a client forwarded to target + try: + data = sock.recv(16384) + if not data: + if sock in self.clients: self.clients.remove(sock) + sock.close() + else: + try: + self.target_socket.sendall(data) + except Exception as e: + logging.error(f"Error sending to target: {e}") + self.target_socket.close() + self._connect_to_target() + except: + if sock in self.clients: self.clients.remove(sock) + try: sock.close() + except: pass + else: # Data from a client try: From 538e6f066383ac28d987b4badebf6f950f6fc768 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 16:41:26 +0000 Subject: [PATCH 04/93] Fix syntax error in tcp_proxy.py --- src/tcp_proxy.py | 50 ------------------------------------------------ 1 file changed, 50 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index d7e95b4..9aba1d1 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -5,13 +5,6 @@ import time from collections import deque -import socket -import select -import threading -import logging -import time -from collections import deque - class TcpProxy: def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): self.target_host = target_host @@ -230,49 +223,6 @@ def replay(target_sock, packets_to_send, client_addr): try: sock.close() except: pass - else: - # Data from a client - try: - data = sock.recv(16384) - if not data: - if sock in self.clients: - self.clients.remove(sock) - sock.close() - else: - # Forward to target - try: - self.target_socket.sendall(data) - except Exception as e: - logging.error(f"Error sending to target: {e}. Attempting to reconnect...") - try: - self.target_socket.close() - except: - pass - - reconnected = False - backoff = 1 - while self.running and not reconnected: - try: - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) - logging.info("Reconnected to target successfully.") - self.target_socket.sendall(data) - reconnected = True - except Exception as ex: - logging.error(f"Reconnect failed: {ex}. Retrying in {backoff}s...") - time.sleep(backoff) - backoff = min(backoff * 2, 10) - - if not reconnected: - self.running = False - except: - if sock in self.clients: - self.clients.remove(sock) - try: - sock.close() - except: - pass - # Cleanup if self.server_socket: try: self.server_socket.close() From fec6b4bcfa5ef20033d80bbff659a87cd79fab12 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 16:53:12 +0000 Subject: [PATCH 05/93] Fix duplicate messages in TCP Proxy: implement client ready states and thread-safe broadcasting --- src/tcp_proxy.py | 90 +++++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 9aba1d1..4259897 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -13,15 +13,20 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.listen_port = int(listen_port) self.server_socket = None self.target_socket = None + + # List of (socket, is_ready) tuples + # is_ready=False means the client is still receiving history replay self.clients = [] + self.clients_lock = threading.Lock() + self.running = False # We now store full packets instead of raw bytes to ensure stream integrity self.handshake_packets = [] self.handshake_max_count = 20 # First 20 packets are usually the config sync - # Rolling history of last 100 packets - self.rolling_packets = deque(maxlen=100) + # Rolling history of last 20 packets (enough for a brief disconnect) + self.rolling_packets = deque(maxlen=20) # Buffer for incoming raw bytes from the radio self.in_buffer = b'' @@ -48,9 +53,12 @@ def get_status(self): return "Proxy: Offline" silence = time.time() - self.last_target_activity + with self.clients_lock: + client_count = len(self.clients) + return { "connected": self.target_socket is not None and self.target_socket.fileno() != -1, - "clients": len(self.clients), + "clients": client_count, "silence_secs": int(silence), "cached_packets": len(self.handshake_packets) + len(self.rolling_packets) } @@ -109,14 +117,21 @@ def _process_radio_data(self, data): else: self.rolling_packets.append(packet) - # Broadcast to all clients - for client in self.clients[:]: - try: - client.sendall(packet) - except: - if client in self.clients: self.clients.remove(client) - try: client.close() - except: pass + # Broadcast to all READY clients + with self.clients_lock: + for client_sock, is_ready in self.clients[:]: + if not is_ready: + continue # Skip clients still receiving history + try: + client_sock.sendall(packet) + except: + self._remove_client(client_sock) + + def _remove_client(self, sock): + with self.clients_lock: + self.clients = [c for c in self.clients if c[0] is not sock] + try: sock.close() + except: pass def _run(self): logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") @@ -140,19 +155,21 @@ def _run(self): while self.running: try: - inputs = [self.server_socket, self.target_socket] - current_inputs = [s for s in inputs + self.clients if s and s.fileno() != -1] - readable, _, _ = select.select(current_inputs, [], [], 1.0) + with self.clients_lock: + client_socks = [c[0] for c in self.clients if c[0].fileno() != -1] + + inputs = [self.server_socket, self.target_socket] + client_socks + readable, _, _ = select.select(inputs, [], [], 1.0) except Exception as e: logging.error(f"Select error: {e}") - self.clients = [c for c in self.clients if c.fileno() != -1] continue current_time = time.time() if current_time - last_heartbeat_log > 60.0: - silence_duration = current_time - self.last_target_activity - logging.info(f"Proxy Heartbeat: Connected. Last data from radio {silence_duration:.1f}s ago. Clients: {len(self.clients)}") + with self.clients_lock: + client_count = len(self.clients) + logging.info(f"Proxy Heartbeat: Connected. Last data from radio {current_time - self.last_target_activity:.1f}s ago. Clients: {client_count}") last_heartbeat_log = current_time if current_time - self.last_target_activity > watchdog_timeout: @@ -166,22 +183,35 @@ def _run(self): try: client_socket, addr = self.server_socket.accept() logging.info(f"New proxy connection from {addr}") - self.clients.append(client_socket) - # Replay full packets with pacing + # Add to clients as NOT ready + with self.clients_lock: + self.clients.append((client_socket, False)) + + # Snapshot packets to replay + history = self.handshake_packets + list(self.rolling_packets) + + # Replay thread def replay(target_sock, packets_to_send, client_addr): try: for i, p in enumerate(packets_to_send): target_sock.sendall(p) # Pacing: 50ms for first few handshake packets, 10ms for history time.sleep(0.05 if i < 10 else 0.01) - logging.info(f"Replayed {len(packets_to_send)} full packets to {client_addr}") + + # Mark as READY for live broadcasts + with self.clients_lock: + for i, (c_sock, _) in enumerate(self.clients): + if c_sock is target_sock: + self.clients[i] = (c_sock, True) + break + + logging.info(f"Replayed {len(packets_to_send)} packets to {client_addr}. Now receiving live data.") except Exception as e: logging.debug(f"Client {client_addr} disconnected during replay: {e}") + self._remove_client(target_sock) - all_packets = self.handshake_packets + list(self.rolling_packets) - if all_packets: - threading.Thread(target=replay, args=(client_socket, all_packets, addr), daemon=True).start() + threading.Thread(target=replay, args=(client_socket, history, addr), daemon=True).start() except Exception as e: logging.error(f"Error accepting connection: {e}") @@ -209,8 +239,7 @@ def replay(target_sock, packets_to_send, client_addr): try: data = sock.recv(16384) if not data: - if sock in self.clients: self.clients.remove(sock) - sock.close() + self._remove_client(sock) else: try: self.target_socket.sendall(data) @@ -219,9 +248,7 @@ def replay(target_sock, packets_to_send, client_addr): self.target_socket.close() self._connect_to_target() except: - if sock in self.clients: self.clients.remove(sock) - try: sock.close() - except: pass + self._remove_client(sock) # Cleanup if self.server_socket: @@ -230,6 +257,7 @@ def replay(target_sock, packets_to_send, client_addr): if self.target_socket: try: self.target_socket.close() except: pass - for c in self.clients: - try: c.close() - except: pass + with self.clients_lock: + for c_sock, _ in self.clients: + try: c_sock.close() + except: pass From 032a965f8ea6f0bf67c20ddae3579f7b86bf6cc0 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 17:04:03 +0000 Subject: [PATCH 06/93] Fix message sending: remove client ready states to prevent dropped ACKs and increase handshake cache --- src/tcp_proxy.py | 66 ++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 4259897..625006b 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -14,19 +14,17 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.server_socket = None self.target_socket = None - # List of (socket, is_ready) tuples - # is_ready=False means the client is still receiving history replay self.clients = [] self.clients_lock = threading.Lock() self.running = False - # We now store full packets instead of raw bytes to ensure stream integrity + # Increased handshake cache to ensure full config is captured self.handshake_packets = [] - self.handshake_max_count = 20 # First 20 packets are usually the config sync + self.handshake_max_count = 40 - # Rolling history of last 20 packets (enough for a brief disconnect) - self.rolling_packets = deque(maxlen=20) + # Rolling history of last 50 packets + self.rolling_packets = deque(maxlen=50) # Buffer for incoming raw bytes from the radio self.in_buffer = b'' @@ -64,13 +62,12 @@ def get_status(self): } def _connect_to_target(self): - """Internal helper to connect and reset buffers""" + """Internal helper to connect to radio""" backoff = 1 while self.running: try: - # Reset buffers on new connection to ensure we capture fresh handshake - self.handshake_packets = [] - self.rolling_packets.clear() + # We NO LONGER clear handshake/rolling buffers here + # so that a radio reboot doesn't break client history. self.in_buffer = b'' self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -91,7 +88,6 @@ def _process_radio_data(self, data): while len(self.in_buffer) >= 4: # Check for magic header if self.in_buffer[0:2] != b'\x94\xc3': - # Out of sync, find next magic header idx = self.in_buffer.find(b'\x94\xc3') if idx == -1: self.in_buffer = b'' @@ -104,7 +100,6 @@ def _process_radio_data(self, data): total_len = length + 4 if len(self.in_buffer) < total_len: - # Need more data for a full packet break # Extract full packet @@ -117,11 +112,9 @@ def _process_radio_data(self, data): else: self.rolling_packets.append(packet) - # Broadcast to all READY clients + # Broadcast to all clients immediately with self.clients_lock: - for client_sock, is_ready in self.clients[:]: - if not is_ready: - continue # Skip clients still receiving history + for client_sock in self.clients[:]: try: client_sock.sendall(packet) except: @@ -129,7 +122,8 @@ def _process_radio_data(self, data): def _remove_client(self, sock): with self.clients_lock: - self.clients = [c for c in self.clients if c[0] is not sock] + if sock in self.clients: + self.clients.remove(sock) try: sock.close() except: pass @@ -156,7 +150,7 @@ def _run(self): while self.running: try: with self.clients_lock: - client_socks = [c[0] for c in self.clients if c[0].fileno() != -1] + client_socks = [s for s in self.clients if s.fileno() != -1] inputs = [self.server_socket, self.target_socket] + client_socks readable, _, _ = select.select(inputs, [], [], 1.0) @@ -184,34 +178,30 @@ def _run(self): client_socket, addr = self.server_socket.accept() logging.info(f"New proxy connection from {addr}") - # Add to clients as NOT ready with self.clients_lock: - self.clients.append((client_socket, False)) + self.clients.append(client_socket) - # Snapshot packets to replay - history = self.handshake_packets + list(self.rolling_packets) - - # Replay thread - def replay(target_sock, packets_to_send, client_addr): + # Replay full packets with pacing in a thread + def replay(target_sock, handshake, history, client_addr): try: - for i, p in enumerate(packets_to_send): + # Replay handshake first + for p in handshake: target_sock.sendall(p) - # Pacing: 50ms for first few handshake packets, 10ms for history - time.sleep(0.05 if i < 10 else 0.01) + time.sleep(0.02) - # Mark as READY for live broadcasts - with self.clients_lock: - for i, (c_sock, _) in enumerate(self.clients): - if c_sock is target_sock: - self.clients[i] = (c_sock, True) - break - - logging.info(f"Replayed {len(packets_to_send)} packets to {client_addr}. Now receiving live data.") + # Replay recent history + for p in history: + target_sock.sendall(p) + time.sleep(0.01) + + logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") except Exception as e: logging.debug(f"Client {client_addr} disconnected during replay: {e}") self._remove_client(target_sock) - threading.Thread(target=replay, args=(client_socket, history, addr), daemon=True).start() + h_snapshot = list(self.handshake_packets) + r_snapshot = list(self.rolling_packets) + threading.Thread(target=replay, args=(client_socket, h_snapshot, r_snapshot, addr), daemon=True).start() except Exception as e: logging.error(f"Error accepting connection: {e}") @@ -258,6 +248,6 @@ def replay(target_sock, packets_to_send, client_addr): try: self.target_socket.close() except: pass with self.clients_lock: - for c_sock, _ in self.clients: + for c_sock in self.clients: try: c_sock.close() except: pass From c180175bf099de9db1e0266e49bc724c34707c73 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Feb 2026 17:10:24 +0000 Subject: [PATCH 07/93] Fix NoneType crash and improve connection stability: implement 2s replay delay and handshake pacing --- src/tcp_proxy.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 625006b..2904c17 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -19,11 +19,11 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.running = False - # Increased handshake cache to ensure full config is captured + # Handshake: The first 40 packets from a fresh radio connection self.handshake_packets = [] self.handshake_max_count = 40 - # Rolling history of last 50 packets + # History: The last 50 packets seen self.rolling_packets = deque(maxlen=50) # Buffer for incoming raw bytes from the radio @@ -66,8 +66,10 @@ def _connect_to_target(self): backoff = 1 while self.running: try: - # We NO LONGER clear handshake/rolling buffers here - # so that a radio reboot doesn't break client history. + # If we are reconnecting to the radio, the handshake MUST be cleared + # because the radio will start a new session with new IDs. + # We keep rolling_packets (history) to bridge the gap for apps. + self.handshake_packets = [] self.in_buffer = b'' self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -86,7 +88,6 @@ def _process_radio_data(self, data): self.in_buffer += data while len(self.in_buffer) >= 4: - # Check for magic header if self.in_buffer[0:2] != b'\x94\xc3': idx = self.in_buffer.find(b'\x94\xc3') if idx == -1: @@ -95,24 +96,23 @@ def _process_radio_data(self, data): self.in_buffer = self.in_buffer[idx:] continue - # Read length (big-endian) length = (self.in_buffer[2] << 8) | self.in_buffer[3] total_len = length + 4 if len(self.in_buffer) < total_len: break - # Extract full packet packet = self.in_buffer[:total_len] self.in_buffer = self.in_buffer[total_len:] - # Update cache + # Update handshake cache if we're still in the start of the session if len(self.handshake_packets) < self.handshake_max_count: self.handshake_packets.append(packet) - else: - self.rolling_packets.append(packet) - # Broadcast to all clients immediately + # Always update rolling history + self.rolling_packets.append(packet) + + # Broadcast to all clients with self.clients_lock: for client_sock in self.clients[:]: try: @@ -181,18 +181,22 @@ def _run(self): with self.clients_lock: self.clients.append(client_socket) - # Replay full packets with pacing in a thread + # Replay thread with DELAY and PACING def replay(target_sock, handshake, history, client_addr): try: - # Replay handshake first + # DELAY: Give the client library 2 seconds to initialize its internal + # structures before we flood it with data. Fixes NoneType errors. + time.sleep(2.0) + + # PACING: Send handshake packets slowly for p in handshake: target_sock.sendall(p) - time.sleep(0.02) + time.sleep(0.1) # 100ms pacing for handshake - # Replay recent history + # Send history for p in history: target_sock.sendall(p) - time.sleep(0.01) + time.sleep(0.02) # 20ms pacing for history logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") except Exception as e: From c7ca99ba88020f2a1701f471a05ff42c00026434 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 13 Feb 2026 15:37:49 +0000 Subject: [PATCH 08/93] Improve Proxy stability: enable TCP Keep-Alives and implement paced output to radio to prevent buffer overflow --- src/tcp_proxy.py | 52 +++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 2904c17..44f8030 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -19,9 +19,9 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.running = False - # Handshake: The first 40 packets from a fresh radio connection + # Handshake: The first 50 packets from a fresh radio connection self.handshake_packets = [] - self.handshake_max_count = 40 + self.handshake_max_count = 50 # History: The last 50 packets seen self.rolling_packets = deque(maxlen=50) @@ -62,18 +62,26 @@ def get_status(self): } def _connect_to_target(self): - """Internal helper to connect to radio""" + """Internal helper to connect to radio with Keep-Alives""" backoff = 1 while self.running: try: - # If we are reconnecting to the radio, the handshake MUST be cleared - # because the radio will start a new session with new IDs. - # We keep rolling_packets (history) to bridge the gap for apps. - self.handshake_packets = [] + self.handshake_packets = [] self.in_buffer = b'' - self.target_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.target_socket.connect((self.target_host, self.target_port)) + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + # Enable TCP Keep-Alives to prevent the 60s timeout + sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + # Linux-specific keepalive settings (if available) + try: + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3) + except: pass + + sock.connect((self.target_host, self.target_port)) + self.target_socket = sock logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") self.last_target_activity = time.time() return True @@ -105,14 +113,10 @@ def _process_radio_data(self, data): packet = self.in_buffer[:total_len] self.in_buffer = self.in_buffer[total_len:] - # Update handshake cache if we're still in the start of the session if len(self.handshake_packets) < self.handshake_max_count: self.handshake_packets.append(packet) - - # Always update rolling history self.rolling_packets.append(packet) - # Broadcast to all clients with self.clients_lock: for client_sock in self.clients[:]: try: @@ -181,23 +185,15 @@ def _run(self): with self.clients_lock: self.clients.append(client_socket) - # Replay thread with DELAY and PACING def replay(target_sock, handshake, history, client_addr): try: - # DELAY: Give the client library 2 seconds to initialize its internal - # structures before we flood it with data. Fixes NoneType errors. time.sleep(2.0) - - # PACING: Send handshake packets slowly for p in handshake: target_sock.sendall(p) - time.sleep(0.1) # 100ms pacing for handshake - - # Send history + time.sleep(0.1) for p in history: target_sock.sendall(p) - time.sleep(0.02) # 20ms pacing for history - + time.sleep(0.02) logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") except Exception as e: logging.debug(f"Client {client_addr} disconnected during replay: {e}") @@ -219,9 +215,7 @@ def replay(target_sock, handshake, history, client_addr): self.target_socket.close() self._connect_to_target() break - self._process_radio_data(data) - except Exception as e: logging.error(f"Error reading from target: {e}") self.target_socket.close() @@ -229,14 +223,18 @@ def replay(target_sock, handshake, history, client_addr): self._connect_to_target() else: - # Data from a client forwarded to target + # Data from a client forwarded to radio with PACING try: data = sock.recv(16384) if not data: self._remove_client(sock) else: try: - self.target_socket.sendall(data) + # Chunk data to the radio (Meshtastic buffers are small) + chunk_size = 512 + for i in range(0, len(data), chunk_size): + self.target_socket.sendall(data[i:i+chunk_size]) + time.sleep(0.01) # 10ms delay between chunks except Exception as e: logging.error(f"Error sending to target: {e}") self.target_socket.close() From 9a9166213748a524d9c6597ce50ed6e84d399d59 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 13 Feb 2026 15:42:43 +0000 Subject: [PATCH 09/93] Fix proxy sync: disconnect all clients on radio loss to force re-sync, and skip history replay for local bot client --- src/tcp_proxy.py | 51 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 44f8030..b96a94f 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -39,6 +39,7 @@ def start(self): def stop(self): self.running = False + self._disconnect_all_clients() if self.server_socket: try: self.server_socket.close() except: pass @@ -61,19 +62,28 @@ def get_status(self): "cached_packets": len(self.handshake_packets) + len(self.rolling_packets) } + def _disconnect_all_clients(self): + """Force all clients to disconnect so they can re-sync with a new radio session""" + with self.clients_lock: + for sock in self.clients: + try: sock.close() + except: pass + self.clients = [] + logging.info("Disconnected all proxy clients to force re-sync.") + def _connect_to_target(self): """Internal helper to connect to radio with Keep-Alives""" + # If we are reconnecting, we MUST clear handshake and drop clients + # because the new session will have different internal IDs. + self.handshake_packets = [] + self.in_buffer = b'' + self._disconnect_all_clients() + backoff = 1 while self.running: try: - self.handshake_packets = [] - self.in_buffer = b'' - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - - # Enable TCP Keep-Alives to prevent the 60s timeout sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - # Linux-specific keepalive settings (if available) try: sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10) @@ -185,15 +195,22 @@ def _run(self): with self.clients_lock: self.clients.append(client_socket) + # Replay logic def replay(target_sock, handshake, history, client_addr): + # Don't replay history to the Bot itself (localhost) + # The bot handles its own sync and history can cause crashes + if client_addr[0] in ('127.0.0.1', 'localhost'): + logging.info(f"Skipping history replay for local bot client {client_addr}") + return + try: - time.sleep(2.0) + time.sleep(1.0) for p in handshake: target_sock.sendall(p) - time.sleep(0.1) + time.sleep(0.05) for p in history: target_sock.sendall(p) - time.sleep(0.02) + time.sleep(0.01) logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") except Exception as e: logging.debug(f"Client {client_addr} disconnected during replay: {e}") @@ -211,7 +228,7 @@ def replay(target_sock, handshake, history, client_addr): try: data = self.target_socket.recv(16384) if not data: - logging.warning("Target closed connection. Reconnecting...") + logging.warning("Target closed connection. Reconnecting radio and clients...") self.target_socket.close() self._connect_to_target() break @@ -230,11 +247,10 @@ def replay(target_sock, handshake, history, client_addr): self._remove_client(sock) else: try: - # Chunk data to the radio (Meshtastic buffers are small) chunk_size = 512 for i in range(0, len(data), chunk_size): self.target_socket.sendall(data[i:i+chunk_size]) - time.sleep(0.01) # 10ms delay between chunks + time.sleep(0.01) except Exception as e: logging.error(f"Error sending to target: {e}") self.target_socket.close() @@ -243,13 +259,4 @@ def replay(target_sock, handshake, history, client_addr): self._remove_client(sock) # Cleanup - if self.server_socket: - try: self.server_socket.close() - except: pass - if self.target_socket: - try: self.target_socket.close() - except: pass - with self.clients_lock: - for c_sock in self.clients: - try: c_sock.close() - except: pass + self.stop() From 42938e11b82a1532d36e7d04fb9e8ddbc09fd755 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 14 Feb 2026 09:24:09 +0000 Subject: [PATCH 10/93] Fix node timestamp bug and implement non-blocking proxy reconnection to prevent loop hangs --- src/bot.py | 14 ++++-- src/tcp_proxy.py | 121 ++++++++++++++++++++++++++--------------------- 2 files changed, 78 insertions(+), 57 deletions(-) diff --git a/src/bot.py b/src/bot.py index 9dfdc4d..4cfa2a3 100644 --- a/src/bot.py +++ b/src/bot.py @@ -301,9 +301,16 @@ def on_node_updated(self, node, interface): if node['user'] is not None: mesh_node = MeshNode.from_dict(node) last_heard_int = node.get('lastHeard', 0) - last_heard = datetime.fromtimestamp(last_heard_int, tz=timezone.utc) + + # Fix: Don't update if timestamp is 0 or older than what we have + if last_heard_int > 0: + last_heard = datetime.fromtimestamp(last_heard_int, tz=timezone.utc) + existing_last_heard = self.node_info.get_last_heard(mesh_node.user.id) + + if not existing_last_heard or last_heard > existing_last_heard: + self.node_info.update_last_heard(mesh_node.user.id, last_heard) + self.node_db.store_node(mesh_node) - self.node_info.update_last_heard(mesh_node.user.id, last_heard) for storage_api in self.storage_apis: try: @@ -316,7 +323,8 @@ def on_node_updated(self, node, interface): pass if self.init_complete: - last_heard_str = pretty_print_last_heard(last_heard) + current_last_heard = self.node_info.get_last_heard(mesh_node.user.id) + last_heard_str = pretty_print_last_heard(current_last_heard) if current_last_heard else "unknown" logging.info(f"New user: {mesh_node.user.long_name} (last heard {last_heard_str})") def print_nodes(self): diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index b96a94f..441c3dc 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -30,6 +30,7 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.in_buffer = b'' self.last_target_activity = time.time() + self.reconnecting = False def start(self): self.running = True @@ -55,8 +56,11 @@ def get_status(self): with self.clients_lock: client_count = len(self.clients) + state = "Reconnecting" if self.reconnecting else ("Online" if self.target_socket else "Offline") + return { - "connected": self.target_socket is not None and self.target_socket.fileno() != -1, + "state": state, + "connected": self.target_socket is not None and not self.reconnecting, "clients": client_count, "silence_secs": int(silence), "cached_packets": len(self.handshake_packets) + len(self.rolling_packets) @@ -72,34 +76,34 @@ def _disconnect_all_clients(self): logging.info("Disconnected all proxy clients to force re-sync.") def _connect_to_target(self): - """Internal helper to connect to radio with Keep-Alives""" - # If we are reconnecting, we MUST clear handshake and drop clients - # because the new session will have different internal IDs. + """Helper to connect to radio with Keep-Alives (Non-blocking retry)""" + # Clear state for new connection self.handshake_packets = [] self.in_buffer = b'' self._disconnect_all_clients() + self.reconnecting = True - backoff = 1 - while self.running: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(5.0) # 5s timeout for connection attempt + sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) try: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - try: - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3) - except: pass + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3) + except: pass - sock.connect((self.target_host, self.target_port)) - self.target_socket = sock - logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") - self.last_target_activity = time.time() - return True - except Exception as e: - logging.error(f"Failed to connect to target ({self.target_host}): {e}. Retrying in {backoff}s...") - time.sleep(backoff) - backoff = min(backoff * 2, 30) - return False + sock.connect((self.target_host, self.target_port)) + sock.settimeout(None) # Reset to blocking for select() + self.target_socket = sock + self.last_target_activity = time.time() + self.reconnecting = False + logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") + return True + except Exception as e: + logging.error(f"Failed to connect to target ({self.target_host}): {e}") + self.target_socket = None + return False def _process_radio_data(self, data): """Frames raw bytes into Meshtastic packets and caches them""" @@ -155,36 +159,53 @@ def _run(self): self.server_socket.listen(5) - if not self._connect_to_target(): - return - - watchdog_timeout = 300.0 last_heartbeat_log = time.time() + last_reconnect_attempt = 0 + watchdog_timeout = 300.0 while self.running: + current_time = time.time() + + # Reconnection logic (non-blocking) + if not self.target_socket or self.reconnecting: + if current_time - last_reconnect_attempt > 10.0: + last_reconnect_attempt = current_time + self._connect_to_target() + + # Sleep a bit to not peg CPU while radio is down + if not self.target_socket: + time.sleep(1.0) + try: with self.clients_lock: client_socks = [s for s in self.clients if s.fileno() != -1] - inputs = [self.server_socket, self.target_socket] + client_socks + inputs = [self.server_socket] + client_socks + if self.target_socket and not self.reconnecting: + inputs.append(self.target_socket) + readable, _, _ = select.select(inputs, [], [], 1.0) except Exception as e: logging.error(f"Select error: {e}") + time.sleep(0.5) continue - current_time = time.time() - + # Heartbeat Logging if current_time - last_heartbeat_log > 60.0: with self.clients_lock: client_count = len(self.clients) - logging.info(f"Proxy Heartbeat: Connected. Last data from radio {current_time - self.last_target_activity:.1f}s ago. Clients: {client_count}") + status = "Connected" if self.target_socket and not self.reconnecting else "RECONNECTING" + silence = current_time - self.last_target_activity + logging.info(f"Proxy Heartbeat: {status}. Last radio data {silence:.1f}s ago. Clients: {client_count}") last_heartbeat_log = current_time - if current_time - self.last_target_activity > watchdog_timeout: - logging.warning(f"Watchdog: No data from radio for {watchdog_timeout}s. Forcing reconnect...") - try: self.target_socket.close() - except: pass - self._connect_to_target() + # Watchdog: Force reconnect if silence is too long on an "active" connection + if self.target_socket and not self.reconnecting: + if current_time - self.last_target_activity > watchdog_timeout: + logging.warning(f"Watchdog: No data from radio for {watchdog_timeout}s. Forcing reconnect...") + try: self.target_socket.close() + except: pass + self.target_socket = None # Trigger reconnect logic for sock in readable: if sock is self.server_socket: @@ -195,16 +216,11 @@ def _run(self): with self.clients_lock: self.clients.append(client_socket) - # Replay logic def replay(target_sock, handshake, history, client_addr): - # Don't replay history to the Bot itself (localhost) - # The bot handles its own sync and history can cause crashes if client_addr[0] in ('127.0.0.1', 'localhost'): - logging.info(f"Skipping history replay for local bot client {client_addr}") return - try: - time.sleep(1.0) + time.sleep(2.0) for p in handshake: target_sock.sendall(p) time.sleep(0.05) @@ -213,7 +229,6 @@ def replay(target_sock, handshake, history, client_addr): time.sleep(0.01) logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") except Exception as e: - logging.debug(f"Client {client_addr} disconnected during replay: {e}") self._remove_client(target_sock) h_snapshot = list(self.handshake_packets) @@ -223,40 +238,38 @@ def replay(target_sock, handshake, history, client_addr): except Exception as e: logging.error(f"Error accepting connection: {e}") - elif sock is self.target_socket: + elif self.target_socket and sock is self.target_socket: self.last_target_activity = time.time() try: data = self.target_socket.recv(16384) if not data: - logging.warning("Target closed connection. Reconnecting radio and clients...") + logging.warning("Radio closed connection. Triggering re-sync...") self.target_socket.close() - self._connect_to_target() + self.target_socket = None break self._process_radio_data(data) except Exception as e: - logging.error(f"Error reading from target: {e}") + logging.error(f"Error reading from radio: {e}") self.target_socket.close() - time.sleep(2) - self._connect_to_target() + self.target_socket = None else: - # Data from a client forwarded to radio with PACING + # Data from a client forwarded to radio try: data = sock.recv(16384) if not data: self._remove_client(sock) - else: + elif self.target_socket and not self.reconnecting: try: chunk_size = 512 for i in range(0, len(data), chunk_size): self.target_socket.sendall(data[i:i+chunk_size]) time.sleep(0.01) except Exception as e: - logging.error(f"Error sending to target: {e}") + logging.error(f"Error sending to radio: {e}") self.target_socket.close() - self._connect_to_target() + self.target_socket = None except: self._remove_client(sock) - # Cleanup self.stop() From 295cf3d4cb65ca16a48f09b27ff6071e50934b11 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 14 Feb 2026 09:28:04 +0000 Subject: [PATCH 11/93] Fix: Respect ENABLE_FEATURE_NODE_TOTALS setting for connection-time reports --- src/bot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bot.py b/src/bot.py index 4cfa2a3..8a12f32 100644 --- a/src/bot.py +++ b/src/bot.py @@ -122,7 +122,8 @@ def on_connection(self, interface, topic=pub.AUTO_TOPIC): # Send an immediate node count report upon connection # We use a timer to delay slightly to ensure everything settles - threading.Timer(10.0, self.report_node_count).start() + if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): + threading.Timer(10.0, self.report_node_count).start() def on_receive_text(self, packet: MeshPacket, interface): """Callback function triggered when a text message is received.""" From 138321b61677fb5b1e2c6b508be6b0c4cd2331b4 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 14 Feb 2026 09:29:21 +0000 Subject: [PATCH 12/93] Fix: Add missing environment variables to docker-compose.yaml and improve config logging --- docker-compose.yaml | 2 ++ src/main.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index c27965c..37054f2 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,6 +13,8 @@ services: - STORAGE_API_TOKEN=${STORAGE_API_TOKEN} - STORAGE_API_VERSION=${STORAGE_API_VERSION} - ENABLE_TCP_PROXY=${ENABLE_TCP_PROXY:-true} + - ENABLE_FEATURE_NODE_TOTALS=${ENABLE_FEATURE_NODE_TOTALS:-true} + - CHANNEL_FOR_NODE_TOTAL_BROADCAST=${CHANNEL_FOR_NODE_TOTAL_BROADCAST:-2} - ENABLE_COMMAND_PING=${ENABLE_COMMAND_PING:-true} - ENABLE_COMMAND_TR=${ENABLE_COMMAND_TR:-true} - ENABLE_COMMAND_HELLO=${ENABLE_COMMAND_HELLO:-true} diff --git a/src/main.py b/src/main.py index 9a121d6..2b181e0 100644 --- a/src/main.py +++ b/src/main.py @@ -60,6 +60,8 @@ def main(): logging.info(f"--- Configuration ---") logging.info(f"MESHTASTIC_IP: {MESHTASTIC_IP}") logging.info(f"ENABLE_TCP_PROXY: {ENABLE_TCP_PROXY}") + logging.info(f"ENABLE_FEATURE_NODE_TOTALS: {get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True)}") + logging.info(f"CHANNEL_FOR_NODE_TOTAL_BROADCAST: {os.getenv('CHANNEL_FOR_NODE_TOTAL_BROADCAST', '2')}") logging.info(f"ENABLE_COMMAND_PING: {get_env_bool('ENABLE_COMMAND_PING', True)}") logging.info(f"ENABLE_COMMAND_TR: {get_env_bool('ENABLE_COMMAND_TR', True)}") logging.info(f"---------------------") From db330edb0134d53e37112d5aa90f8e6a0510901b Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 14 Feb 2026 09:37:45 +0000 Subject: [PATCH 13/93] Add configurable node report frequency via FREQUENCY_OF_NODE_REPORTS environment variable --- .env.example | 1 + docker-compose.yaml | 1 + src/bot.py | 3 ++- src/main.py | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index ee29a54..051a1d7 100644 --- a/.env.example +++ b/.env.example @@ -7,6 +7,7 @@ STORAGE_API_TOKEN=... # Features ENABLE_TCP_PROXY=true ENABLE_FEATURE_NODE_TOTALS=true +FREQUENCY_OF_NODE_REPORTS=3 CHANNEL_FOR_NODE_TOTAL_BROADCAST=2 # Commands diff --git a/docker-compose.yaml b/docker-compose.yaml index 37054f2..df7301b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,6 +14,7 @@ services: - STORAGE_API_VERSION=${STORAGE_API_VERSION} - ENABLE_TCP_PROXY=${ENABLE_TCP_PROXY:-true} - ENABLE_FEATURE_NODE_TOTALS=${ENABLE_FEATURE_NODE_TOTALS:-true} + - FREQUENCY_OF_NODE_REPORTS=${FREQUENCY_OF_NODE_REPORTS:-3} - CHANNEL_FOR_NODE_TOTAL_BROADCAST=${CHANNEL_FOR_NODE_TOTAL_BROADCAST:-2} - ENABLE_COMMAND_PING=${ENABLE_COMMAND_PING:-true} - ENABLE_COMMAND_TR=${ENABLE_COMMAND_TR:-true} diff --git a/src/bot.py b/src/bot.py index 8a12f32..e76e2de 100644 --- a/src/bot.py +++ b/src/bot.py @@ -400,7 +400,8 @@ def get_global_context(self): def start_scheduler(self): schedule.every().day.at("00:00").do(self.node_info.reset_packets_today) if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): - schedule.every(3).hours.do(self.report_node_count) + report_frequency = get_env_int('FREQUENCY_OF_NODE_REPORTS', 3) + schedule.every(report_frequency).hours.do(self.report_node_count) schedule.every(1).minutes.do(self.check_for_zero_nodes) while True: schedule.run_pending() diff --git a/src/main.py b/src/main.py index 2b181e0..ab28c3c 100644 --- a/src/main.py +++ b/src/main.py @@ -61,6 +61,7 @@ def main(): logging.info(f"MESHTASTIC_IP: {MESHTASTIC_IP}") logging.info(f"ENABLE_TCP_PROXY: {ENABLE_TCP_PROXY}") logging.info(f"ENABLE_FEATURE_NODE_TOTALS: {get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True)}") + logging.info(f"FREQUENCY_OF_NODE_REPORTS: {os.getenv('FREQUENCY_OF_NODE_REPORTS', '3')} hours") logging.info(f"CHANNEL_FOR_NODE_TOTAL_BROADCAST: {os.getenv('CHANNEL_FOR_NODE_TOTAL_BROADCAST', '2')}") logging.info(f"ENABLE_COMMAND_PING: {get_env_bool('ENABLE_COMMAND_PING', True)}") logging.info(f"ENABLE_COMMAND_TR: {get_env_bool('ENABLE_COMMAND_TR', True)}") From 78bf2401def37ab3777c223f78097166521d0894 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Mon, 16 Feb 2026 15:49:25 +0000 Subject: [PATCH 14/93] Add detailed logging for Storage API troubleshooting --- src/api/StorageAPI.py | 3 ++- src/bot.py | 1 + src/main.py | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index cda35d2..57ce822 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -65,6 +65,7 @@ def store_raw_packet(self, packet: dict): """ Store a raw packet in the storage API """ + logging.info(f"store_raw_packet called for portnum: {packet.get('decoded', {}).get('portnum')}") # Filter out packet types that the API doesn't support or we don't want to store ignored_ports = [345, 'ROUTING_APP', 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP'] portnum = packet.get('decoded', {}).get('portnum') @@ -89,7 +90,7 @@ def store_raw_packet(self, packet: dict): if 'channel' not in packet: packet['channel'] = raw_packet.channel - logging.debug(f"Storing packet: {packet}") + logging.info(f"Storing packet: {packet}") try: response = self._post(self._get_url('raw_packet'), json=packet) except HTTPError as ex: diff --git a/src/bot.py b/src/bot.py index e76e2de..55f82a6 100644 --- a/src/bot.py +++ b/src/bot.py @@ -254,6 +254,7 @@ def on_traceroute(self, packet, route): self.interface.sendText(response_in, destinationId=requester_id) def on_receive(self, packet: MeshPacket, interface): + logging.info(f"on_receive: Incoming packet from {packet.get('fromId')}") if packet.get('fromId') == '!69828b98': logging.debug(f"Received ANY packet from mte4: {packet}") diff --git a/src/main.py b/src/main.py index ab28c3c..e3cd660 100644 --- a/src/main.py +++ b/src/main.py @@ -65,6 +65,9 @@ def main(): logging.info(f"CHANNEL_FOR_NODE_TOTAL_BROADCAST: {os.getenv('CHANNEL_FOR_NODE_TOTAL_BROADCAST', '2')}") logging.info(f"ENABLE_COMMAND_PING: {get_env_bool('ENABLE_COMMAND_PING', True)}") logging.info(f"ENABLE_COMMAND_TR: {get_env_bool('ENABLE_COMMAND_TR', True)}") + logging.info(f"STORAGE_API_ROOT: {STORAGE_API_ROOT}") + if STORAGE_API_2_ROOT: + logging.info(f"STORAGE_API_2_ROOT: {STORAGE_API_2_ROOT}") logging.info(f"---------------------") proxy = None From 0ffb8cfb0ac0563fac980f7a6a70ac55a0c2e7a5 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Mon, 16 Feb 2026 15:58:43 +0000 Subject: [PATCH 15/93] Log Storage API responses at INFO level --- src/api/StorageAPI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index 57ce822..c6c3997 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -104,10 +104,10 @@ def store_raw_packet(self, packet: dict): try: response_json = response.json() - logging.debug(f"Response: {response_json}") + logging.info(f"API Response ({response.status_code}): {response_json}") return response_json except JSONDecodeError: - logging.debug(f"Response (not JSON): {response.text}") + logging.info(f"API Response ({response.status_code}, not JSON): {response.text}") return {'text': response.text} def list_nodes(self) -> list[MeshNode]: From b3d35540653d2036934f0193b25aebd928f074ac Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Mon, 16 Feb 2026 16:01:06 +0000 Subject: [PATCH 16/93] Fix 404 errors by adding guards for uninitialized node ID --- src/api/StorageAPI.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index c6c3997..0c465cd 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -29,6 +29,7 @@ def _get_url(self, path: str, args: dict = None): if args is None: args = {} + my_nodenum = self.bot.my_nodenum if self.api_version == 1: api_paths = { 'raw_packet': '/api/raw-packet/', @@ -36,7 +37,6 @@ def _get_url(self, path: str, args: dict = None): 'node_by_id': f'/api/nodes/{args.get("node_id", "")}', } else: - my_nodenum = self.bot.my_nodenum api_paths = { 'raw_packet': f'/api/packets/{my_nodenum}/ingest/', 'nodes': f'/api/packets/{my_nodenum}/nodes/', @@ -65,6 +65,10 @@ def store_raw_packet(self, packet: dict): """ Store a raw packet in the storage API """ + if self.api_version == 2 and (self.bot.my_nodenum is None or self.bot.my_nodenum <= 0): + logging.debug("Skipping store_raw_packet: Bot node number not yet initialized.") + return + logging.info(f"store_raw_packet called for portnum: {packet.get('decoded', {}).get('portnum')}") # Filter out packet types that the API doesn't support or we don't want to store ignored_ports = [345, 'ROUTING_APP', 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP'] @@ -114,6 +118,9 @@ def list_nodes(self) -> list[MeshNode]: """ Get a list of all nodes stored in the storage API. This list generally does not include position or metrics data. """ + if self.api_version == 2 and (self.bot.my_nodenum is None or self.bot.my_nodenum <= 0): + return [] + response = self._get(self._get_url('nodes')) response_json = response.json() @@ -125,6 +132,9 @@ def store_node(self, node: MeshNode): If the node contains position or metrics data, it will be stored as well """ + if self.api_version == 2 and (self.bot.my_nodenum is None or self.bot.my_nodenum <= 0): + logging.debug("Skipping store_node: Bot node number not yet initialized.") + return node_data = MeshNodeSerializer.to_api_dict(node) From f8ea10120f703429e30ff7ccad19e4b5e435377e Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 10:08:51 +0000 Subject: [PATCH 17/93] Fix watchtower Docker API version mismatch --- docker-compose.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index df7301b..239a91e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,4 +32,6 @@ services: restart: unless-stopped volumes: - /var/run/docker.sock:/var/run/docker.sock + environment: + - DOCKER_API_VERSION=1.44 command: --interval 3600 meshtastic-bot # Check for updates every hour From 357d950dbc3e3682fdb6b1f1ffb3d7987a830b03 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 11:33:54 +0000 Subject: [PATCH 18/93] Fix Meshflow API compatibility, update MTEK IP, and resolve NoneType date crashes --- src/api/StorageAPI.py | 4 ++++ src/api/serializers.py | 44 ++++++++++++++++++++++++------------ src/commands/nodes.py | 5 ++-- src/helpers.py | 5 +++- src/persistence/node_info.py | 6 ++--- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index 0c465cd..1fd232b 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -93,6 +93,10 @@ def store_raw_packet(self, packet: dict): if raw_packet: if 'channel' not in packet: packet['channel'] = raw_packet.channel + if 'id' not in packet: + packet['id'] = raw_packet.id + if 'from' not in packet: + packet['from'] = raw_packet.from_node logging.info(f"Storing packet: {packet}") try: diff --git a/src/api/serializers.py b/src/api/serializers.py index a5e8426..7476704 100644 --- a/src/api/serializers.py +++ b/src/api/serializers.py @@ -27,22 +27,25 @@ class PositionSerializer(AbstractModelSerializer): def to_api_dict(cls, position: MeshNode.Position) -> dict: return { "logged_time": cls.date_to_api(position.logged_time), # api v1 compatibility + "loggedTime": cls.date_to_api(position.logged_time), "reported_time": cls.date_to_api(position.reported_time), # api v2 compatibility + "reportedTime": cls.date_to_api(position.reported_time), "latitude": position.latitude, "longitude": position.longitude, "altitude": position.altitude, "location_source": position.location_source or "LOC_UNKNOWN", + "locationSource": position.location_source or "LOC_UNKNOWN", } @classmethod def from_api_dict(cls, position_data: dict) -> MeshNode.Position: return MeshNode.Position( - logged_time=cls.date_from_api(position_data['logged_time']), - reported_time=cls.date_from_api(position_data['reported_time']), + logged_time=cls.date_from_api(position_data.get('logged_time') or position_data.get('loggedTime')), + reported_time=cls.date_from_api(position_data.get('reported_time') or position_data.get('reportedTime')), latitude=position_data['latitude'], longitude=position_data['longitude'], altitude=position_data['altitude'], - location_source=position_data['location_source'] + location_source=position_data.get('location_source') or position_data.get('locationSource') ) @@ -51,23 +54,29 @@ class DeviceMetricsSerializer(AbstractModelSerializer): def to_api_dict(cls, device_metrics: MeshNode.DeviceMetrics) -> dict: return { "logged_time": cls.date_to_api(device_metrics.logged_time), # api v1 compatibility + "loggedTime": cls.date_to_api(device_metrics.logged_time), "reported_time": cls.date_to_api(device_metrics.logged_time), # api v2 compatibility + "reportedTime": cls.date_to_api(device_metrics.logged_time), "battery_level": device_metrics.battery_level, + "batteryLevel": device_metrics.battery_level, "voltage": device_metrics.voltage, "channel_utilization": device_metrics.channel_utilization, + "channelUtilization": device_metrics.channel_utilization, "air_util_tx": device_metrics.air_util_tx, - "uptime_seconds": device_metrics.uptime_seconds + "airUtilTx": device_metrics.air_util_tx, + "uptime_seconds": device_metrics.uptime_seconds, + "uptimeSeconds": device_metrics.uptime_seconds } @classmethod def from_api_dict(cls, device_metrics_data: dict) -> MeshNode.DeviceMetrics: return MeshNode.DeviceMetrics( - logged_time=cls.date_from_api(device_metrics_data['logged_time']), - battery_level=device_metrics_data['battery_level'], + logged_time=cls.date_from_api(device_metrics_data.get('logged_time') or device_metrics_data.get('loggedTime') or device_metrics_data.get('reported_time') or device_metrics_data.get('reportedTime')), + battery_level=device_metrics_data.get('battery_level') or device_metrics_data.get('batteryLevel'), voltage=device_metrics_data['voltage'], - channel_utilization=device_metrics_data['channel_utilization'], - air_util_tx=device_metrics_data['air_util_tx'], - uptime_seconds=device_metrics_data['uptime_seconds'] + channel_utilization=device_metrics_data.get('channel_utilization') or device_metrics_data.get('channelUtilization'), + air_util_tx=device_metrics_data.get('air_util_tx') or device_metrics_data.get('airUtilTx'), + uptime_seconds=device_metrics_data.get('uptime_seconds') or device_metrics_data.get('uptimeSeconds') ) @@ -80,10 +89,14 @@ def to_api_dict(cls, node: MeshNode) -> dict: "id": node.user.id, "macaddr": node.user.macaddr, "hw_model": node.user.hw_model, + "hwModel": node.user.hw_model, "public_key": node.user.public_key, + "publicKey": node.user.public_key, 'user': { "long_name": node.user.long_name, - "short_name": node.user.short_name + "longName": node.user.long_name, + "short_name": node.user.short_name, + "shortName": node.user.short_name } } @@ -94,6 +107,7 @@ def to_api_dict(cls, node: MeshNode) -> dict: if node.device_metrics: node_data['device_metrics'] = DeviceMetricsSerializer.to_api_dict(node.device_metrics) + node_data['deviceMetrics'] = DeviceMetricsSerializer.to_api_dict(node.device_metrics) return node_data @@ -103,10 +117,10 @@ def from_api_dict(cls, node_data: dict) -> MeshNode: user = MeshNode.User( node_id=node_data['id'], macaddr=node_data['macaddr'], - hw_model=node_data['hw_model'], - public_key=node_data['public_key'], - long_name=user_data['long_name'], - short_name=user_data['short_name'] + hw_model=node_data.get('hw_model') or node_data.get('hwModel'), + public_key=node_data.get('public_key') or node_data.get('publicKey'), + long_name=user_data.get('long_name') or user_data.get('longName'), + short_name=user_data.get('short_name') or user_data.get('shortName') ) position_data = node_data.get('position') @@ -114,7 +128,7 @@ def from_api_dict(cls, node_data: dict) -> MeshNode: if position_data: position = PositionSerializer.from_api_dict(position_data) - device_metrics_data = node_data.get('device_metrics') + device_metrics_data = node_data.get('device_metrics') or node_data.get('deviceMetrics') device_metrics = None if device_metrics_data: device_metrics = DeviceMetricsSerializer.from_api_dict(device_metrics_data) diff --git a/src/commands/nodes.py b/src/commands/nodes.py index dcd5872..0b14120 100644 --- a/src/commands/nodes.py +++ b/src/commands/nodes.py @@ -1,3 +1,4 @@ +from datetime import datetime, timezone from meshtastic.protobuf.mesh_pb2 import MeshPacket from src.bot import MeshtasticBot @@ -24,8 +25,8 @@ def handle_base_command(self, packet: MeshPacket, args: list[str]) -> None: online_nodes = self.bot.node_info.get_online_nodes() offline_nodes = self.bot.node_info.get_offline_nodes() - # get nodes sorted by last_head - sorted_nodes = sorted(nodes, key=lambda n: self.bot.node_info.get_last_heard(n.id), reverse=True) + # get nodes sorted by last_head, handling None values (sort them to the bottom) + sorted_nodes = sorted(nodes, key=lambda n: self.bot.node_info.get_last_heard(n.id) or datetime.min.replace(tzinfo=timezone.utc), reverse=True) response = f"{len(online_nodes)} nodes online, {len(offline_nodes)} offline." # Add up to 10 nodes with the most packets received today diff --git a/src/helpers.py b/src/helpers.py index 764cd45..fd5935f 100644 --- a/src/helpers.py +++ b/src/helpers.py @@ -21,7 +21,10 @@ def get_env_int(name: str, default: int) -> int: return default -def pretty_print_last_heard(last_heard_timestamp: int | datetime) -> str: +def pretty_print_last_heard(last_heard_timestamp: int | datetime | None) -> str: + if not last_heard_timestamp: + return "never" + if not isinstance(last_heard_timestamp, datetime): last_heard = datetime.fromtimestamp(last_heard_timestamp, timezone.utc) else: diff --git a/src/persistence/node_info.py b/src/persistence/node_info.py index 960c7ce..9a4e2c0 100644 --- a/src/persistence/node_info.py +++ b/src/persistence/node_info.py @@ -100,11 +100,11 @@ def reset_packets_today(self) -> None: def get_online_nodes(self) -> dict[str, datetime]: return {node_id: last_heard for node_id, last_heard in self.nodes_last_heard.items() - if last_heard > datetime.now(timezone.utc) - timedelta(seconds=self.online_threshold_sec)} + if last_heard and last_heard > datetime.now(timezone.utc) - timedelta(seconds=self.online_threshold_sec)} def get_offline_nodes(self) -> dict[str, datetime]: return {node_id: last_heard for node_id, last_heard in self.nodes_last_heard.items() - if last_heard <= datetime.now(timezone.utc) - timedelta(seconds=self.online_threshold_sec)} + if not last_heard or last_heard <= datetime.now(timezone.utc) - timedelta(seconds=self.online_threshold_sec)} def get_all_nodes(self) -> dict[str, datetime]: return self.nodes_last_heard @@ -115,7 +115,7 @@ def load_from_file(self, node_info_file: str) -> None: with open(node_info_file, 'r') as file: data = json.load(file) - self.nodes_last_heard = {k: datetime.fromisoformat(v) for k, v in data['nodes_last_heard'].items()} + self.nodes_last_heard = {k: (datetime.fromisoformat(v) if v else None) for k, v in data['nodes_last_heard'].items()} self.node_packets_today = data['node_packets_today'] self.node_packets_today_breakdown = data['node_packets_today_breakdown'] From 611059c2ba2f0d7e3e96d01c7f9ab05fb6442daa Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 15:24:50 +0000 Subject: [PATCH 19/93] Enhance proxy heartbeat to log client IPs --- src/tcp_proxy.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 441c3dc..8e776a2 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -194,9 +194,17 @@ def _run(self): if current_time - last_heartbeat_log > 60.0: with self.clients_lock: client_count = len(self.clients) + client_info = [] + for s in self.clients: + try: + peer = s.getpeername() + client_info.append(f"{peer[0]}:{peer[1]}") + except: + client_info.append("unknown") + status = "Connected" if self.target_socket and not self.reconnecting else "RECONNECTING" silence = current_time - self.last_target_activity - logging.info(f"Proxy Heartbeat: {status}. Last radio data {silence:.1f}s ago. Clients: {client_count}") + logging.info(f"Proxy Heartbeat: {status}. Last radio data {silence:.1f}s ago. Clients: {client_count} ({', '.join(client_info)})") last_heartbeat_log = current_time # Watchdog: Force reconnect if silence is too long on an "active" connection From dcb518397b5411c4e7d54cc8cf7fd804fe0c3215 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 15:27:34 +0000 Subject: [PATCH 20/93] Fix onResponseTraceRoute signature for newer meshtastic library --- src/tcp_interface.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index c0db8ed..a9f0b07 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -62,12 +62,18 @@ def __init__(self, *args, # Store packets in a queue and resend them after reconnecting # This will involve exposing the queue, and reloading the queue in bot.py since we create a new interface object - def onResponseTraceRoute(self, packet, routeDiscovery): + def onResponseTraceRoute(self, packet): """ Callback for when a traceroute response is received. """ - super().onResponseTraceRoute(packet, routeDiscovery) - pub.sendMessage("meshtastic.traceroute", packet=packet, route=routeDiscovery) + # In newer versions of the library, the route is part of the packet.decoded.routing + # We pass the packet and extract the route discovery object if present + route_discovery = None + if hasattr(packet, 'decoded') and hasattr(packet.decoded, 'routing'): + route_discovery = packet.decoded.routing + + super().onResponseTraceRoute(packet) + pub.sendMessage("meshtastic.traceroute", packet=packet, route=route_discovery) def sendHeartbeat(self): try: From b174aa0e53da865c31d43813119c32f242923b5d Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 15:50:04 +0000 Subject: [PATCH 21/93] Refine onResponseTraceRoute to handle dictionary packets --- src/tcp_interface.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index a9f0b07..95790b7 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -66,11 +66,12 @@ def onResponseTraceRoute(self, packet): """ Callback for when a traceroute response is received. """ - # In newer versions of the library, the route is part of the packet.decoded.routing - # We pass the packet and extract the route discovery object if present + # In newer versions of the library, the route is part of the packet['decoded']['routing'] route_discovery = None - if hasattr(packet, 'decoded') and hasattr(packet.decoded, 'routing'): - route_discovery = packet.decoded.routing + if isinstance(packet, dict): + route_discovery = packet.get('decoded', {}).get('routing') + elif hasattr(packet, 'decoded'): + route_discovery = getattr(packet.decoded, 'routing', None) super().onResponseTraceRoute(packet) pub.sendMessage("meshtastic.traceroute", packet=packet, route=route_discovery) From 2c1ebdca8fbf589b201c07319fa82c02eb4059d7 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Tue, 17 Feb 2026 16:23:40 +0000 Subject: [PATCH 22/93] Add timeout notification for traceroute requests --- src/commands/tr.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/commands/tr.py b/src/commands/tr.py index b7312db..bbc6a66 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -1,4 +1,6 @@ import logging +import threading +import time from meshtastic.protobuf.mesh_pb2 import MeshPacket from src.commands.command import AbstractCommand @@ -29,12 +31,27 @@ def handle_packet(self, packet: MeshPacket) -> None: # Initiate actual traceroute self.bot.pending_traces[sender_id] = sender_id + + # Start a timeout timer (90 seconds) + def check_timeout(): + time.sleep(90) + if sender_id in self.bot.pending_traces: + # If still in pending_traces, we never got a response + del self.bot.pending_traces[sender_id] + logging.info(f"Traceroute to {sender_id} timed out.") + timeout_msg = f"Traceroute to {sender_id} timed out (no response from mesh)." + self.message_in_dm(sender_id, timeout_msg) + + threading.Thread(target=check_timeout, daemon=True).start() + try: logging.info(f"Initiating traceroute to {sender_id}") # hopLimit=7 is standard max self.bot.interface.sendTraceRoute(sender_id, hopLimit=7) except Exception as e: logging.error(f"Failed to send traceroute to {sender_id}: {e}") + if sender_id in self.bot.pending_traces: + del self.bot.pending_traces[sender_id] self.reply_in_dm(packet, f"Error starting traceroute: {e}") def get_command_for_logging(self, message: str) -> (str, list[str] | None, str | None): From c5e38b17cfc693a8e488a5c115f1aa7e70f601f8 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Wed, 18 Feb 2026 08:49:07 +0000 Subject: [PATCH 23/93] Add verbose connection/disconnection logging to proxy --- src/tcp_proxy.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 8e776a2..52d6e89 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -139,11 +139,20 @@ def _process_radio_data(self, data): self._remove_client(client_sock) def _remove_client(self, sock): + try: + addr = sock.getpeername() + logging.info(f"--- PROXY: Removing client {addr}") + except: + logging.info("--- PROXY: Removing unknown client") + with self.clients_lock: if sock in self.clients: self.clients.remove(sock) try: sock.close() except: pass + + with self.clients_lock: + logging.info(f"--- PROXY: Remaining clients: {len(self.clients)}") def _run(self): logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") @@ -219,10 +228,11 @@ def _run(self): if sock is self.server_socket: try: client_socket, addr = self.server_socket.accept() - logging.info(f"New proxy connection from {addr}") + logging.info(f"+++ PROXY: New connection accepted from {addr}") with self.clients_lock: self.clients.append(client_socket) + logging.info(f"--- PROXY: Total active clients now: {len(self.clients)}") def replay(target_sock, handshake, history, client_addr): if client_addr[0] in ('127.0.0.1', 'localhost'): From 4722b83a4b48bf55b5440e6c755ffd9f983a935e Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 16:06:13 +0000 Subject: [PATCH 24/93] Add support for targeted traceroute via !tr --- src/commands/tr.py | 76 +++++++++++++++++++++----------- test/commands/test_tr.py | 93 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 25 deletions(-) create mode 100644 test/commands/test_tr.py diff --git a/src/commands/tr.py b/src/commands/tr.py index bbc6a66..1fad313 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -11,48 +11,74 @@ def __init__(self, bot): super().__init__(bot, 'tr') def handle_packet(self, packet: MeshPacket) -> None: - hop_start = packet.get('hopStart', 0) - hop_limit = packet.get('hopLimit', 0) - hops_away = hop_start - hop_limit + message = packet['decoded']['text'] + words = message.split() - snr = packet.get('rxSnr', 0.0) - - sender_id = packet['fromId'] - sender = self.bot.node_db.get_by_id(sender_id) - sender_name = sender.long_name if sender else sender_id + requester_id = packet['fromId'] + requester = self.bot.node_db.get_by_id(requester_id) + requester_name = requester.long_name if requester else requester_id - if hops_away == 0: - response = f"{sender_name} you are Zero Hops from me. No traceroute required!" - self.reply_in_dm(packet, response) + target_node = None + if len(words) > 1: + target_short = words[1] + target_node = self.bot.get_node_by_short_name(target_short) + if not target_node: + self.reply_in_dm(packet, f"Could not find node with short name '{target_short}'") + return + target_id = target_node.id + target_long_name = target_node.long_name + else: + target_id = requester_id + target_long_name = requester_name + + if target_id == self.bot.my_id: + self.reply_in_dm(packet, "I am already here! No traceroute required.") return - response = f"{sender_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." - self.reply_in_dm(packet, response) + # If tracing back to requester, we can show hops_away/SNR from the incoming packet + if target_id == requester_id: + hop_start = packet.get('hopStart', 0) + hop_limit = packet.get('hopLimit', 0) + hops_away = hop_start - hop_limit + snr = packet.get('rxSnr', 0.0) + + if hops_away == 0: + response = f"{requester_name} you are Zero Hops from me. No traceroute required!" + self.reply_in_dm(packet, response) + return + + response = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." + self.reply_in_dm(packet, response) + else: + # Tracing to a different node + response = f"Starting traceroute to {target_long_name} ({target_id}) for you..." + self.reply_in_dm(packet, response) # Initiate actual traceroute - self.bot.pending_traces[sender_id] = sender_id + # Map target_id -> requester_id so bot.on_traceroute knows who to reply to + self.bot.pending_traces[target_id] = requester_id # Start a timeout timer (90 seconds) def check_timeout(): time.sleep(90) - if sender_id in self.bot.pending_traces: + if target_id in self.bot.pending_traces and self.bot.pending_traces[target_id] == requester_id: # If still in pending_traces, we never got a response - del self.bot.pending_traces[sender_id] - logging.info(f"Traceroute to {sender_id} timed out.") - timeout_msg = f"Traceroute to {sender_id} timed out (no response from mesh)." - self.message_in_dm(sender_id, timeout_msg) + del self.bot.pending_traces[target_id] + logging.info(f"Traceroute to {target_id} (requested by {requester_id}) timed out.") + timeout_msg = f"Traceroute to {target_long_name} ({target_id}) timed out (no response from mesh)." + self.message_in_dm(requester_id, timeout_msg) threading.Thread(target=check_timeout, daemon=True).start() try: - logging.info(f"Initiating traceroute to {sender_id}") + logging.info(f"Initiating traceroute to {target_id} requested by {requester_id}") # hopLimit=7 is standard max - self.bot.interface.sendTraceRoute(sender_id, hopLimit=7) + self.bot.interface.sendTraceRoute(target_id, hopLimit=7) except Exception as e: - logging.error(f"Failed to send traceroute to {sender_id}: {e}") - if sender_id in self.bot.pending_traces: - del self.bot.pending_traces[sender_id] + logging.error(f"Failed to send traceroute to {target_id}: {e}") + if target_id in self.bot.pending_traces: + del self.bot.pending_traces[target_id] self.reply_in_dm(packet, f"Error starting traceroute: {e}") def get_command_for_logging(self, message: str) -> (str, list[str] | None, str | None): - return self._gcfl_just_base_command(message) + return self._gcfl_base_command_and_args(message) diff --git a/test/commands/test_tr.py b/test/commands/test_tr.py new file mode 100644 index 0000000..4ca62a4 --- /dev/null +++ b/test/commands/test_tr.py @@ -0,0 +1,93 @@ +import unittest +from unittest.mock import MagicMock, call +from src.commands.tr import TracerouteCommand +from test.commands import CommandTestCase +from test.test_setup_data import build_test_text_packet + +class TestTracerouteCommand(CommandTestCase): + command: TracerouteCommand + + def setUp(self): + super().setUp() + self.command = TracerouteCommand(bot=self.bot) + # Mock sendTraceRoute since it's used in handle_packet + self.bot.interface.sendTraceRoute = MagicMock() + + def test_handle_packet_basic(self): + # !tr from node 1 + sender_id = self.test_nodes[1].user.id + packet = build_test_text_packet('!tr', sender_id, self.bot.my_id) + packet['hopStart'] = 3 + packet['hopLimit'] = 2 + # Ensure we know the SNR for the test + packet['rxSnr'] = 5.5 + + self.command.handle_packet(packet) + + # Check starting message sent to sender + expected_msg = f"{self.test_nodes[1].user.long_name} you are 1 hops away (Signal: 5.5 dB). Starting full traceroute..." + self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=sender_id, wantAck=True) + + # Check sendTraceRoute called for sender + self.bot.interface.sendTraceRoute.assert_called_once_with(sender_id, hopLimit=7) + + # Check pending_traces entry + self.assertEqual(self.bot.pending_traces[sender_id], sender_id) + + def test_handle_packet_zero_hops(self): + sender_id = self.test_nodes[1].user.id + packet = build_test_text_packet('!tr', sender_id, self.bot.my_id) + packet['hopStart'] = 3 + packet['hopLimit'] = 3 + + self.command.handle_packet(packet) + + # Check zero hops message + expected_msg = f"{self.test_nodes[1].user.long_name} you are Zero Hops from me. No traceroute required!" + self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=sender_id, wantAck=True) + self.bot.interface.sendTraceRoute.assert_not_called() + + def test_handle_packet_to_specific_node(self): + # Requester is node 1, Target is node 2 + requester_id = self.test_nodes[1].user.id + target_node = self.test_nodes[2] + target_short = target_node.user.short_name + + packet = build_test_text_packet(f'!tr {target_short}', requester_id, self.bot.my_id) + + self.command.handle_packet(packet) + + expected_msg = f"Starting traceroute to {target_node.user.long_name} ({target_node.user.id}) for you..." + self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=requester_id, wantAck=True) + + self.bot.interface.sendTraceRoute.assert_called_once_with(target_node.user.id, hopLimit=7) + self.assertEqual(self.bot.pending_traces[target_node.user.id], requester_id) + + def test_handle_packet_unknown_shortname(self): + requester_id = self.test_nodes[1].user.id + packet = build_test_text_packet('!tr NONEXIST', requester_id, self.bot.my_id) + + self.command.handle_packet(packet) + + expected_msg = "Could not find node with short name 'NONEXIST'" + self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=requester_id, wantAck=True) + self.bot.interface.sendTraceRoute.assert_not_called() + + def test_handle_packet_to_self(self): + # Bot's ID is typically !00000001 in test setup + requester_id = self.test_nodes[1].user.id + # We need the bot's short name if we want to test by shortname, + # but the command specifically checks against self.bot.my_id. + # Let's find a way to trigger the "I am already here" message. + + # Manually find/set a short name for the bot if needed, or just use words[1] + self.bot.get_node_by_short_name = MagicMock(return_value=MagicMock(id=self.bot.my_id, long_name="Bot")) + + packet = build_test_text_packet('!tr BOT', requester_id, self.bot.my_id) + self.command.handle_packet(packet) + + expected_msg = "I am already here! No traceroute required." + self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=requester_id, wantAck=True) + +if __name__ == '__main__': + unittest.main() From 14fd1d56664d8589d442eb356da5fa0498581b75 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 16:16:22 +0000 Subject: [PATCH 25/93] Fix traceroute hang by supporting multiple concurrent requesters per target node --- src/bot.py | 22 ++++++++++++---------- src/commands/tr.py | 24 +++++++++++++++++------- test/commands/test_tr.py | 4 ++-- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/bot.py b/src/bot.py index 55f82a6..7376425 100644 --- a/src/bot.py +++ b/src/bot.py @@ -215,7 +215,9 @@ def on_traceroute(self, packet, route): logging.debug(f"Received traceroute from {target_id} but no pending request found.") return - requester_id = self.pending_traces.pop(target_id) + requesters = self.pending_traces.pop(target_id) + if not isinstance(requesters, list): + requesters = [requesters] # Format the OUTBOUND route route_ids = route.route @@ -230,12 +232,10 @@ def on_traceroute(self, packet, route): hops.append(f"{node_id_str}") route_str = " -> ".join(hops) if hops else "Direct (or unknown)" - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - logging.info(f"Sending traceroute OUT result to {requester_id}: {response_out}") - self.interface.sendText(response_out, destinationId=requester_id) - + # Format the INBOUND route (if available) + response_in = None if hasattr(route, 'route_back') and route.route_back: hops_back = [] for node_id_int in route.route_back: @@ -246,12 +246,14 @@ def on_traceroute(self, packet, route): else: hops_back.append(f"{node_id_str}") back_str = " -> ".join(hops_back) - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - logging.info(f"Sending traceroute IN result to {requester_id}: {response_in}") - # Small delay to ensure order - time.sleep(1) - self.interface.sendText(response_in, destinationId=requester_id) + + for requester_id in requesters: + logging.info(f"Sending traceroute result to {requester_id}: {response_out}") + self.interface.sendText(response_out, destinationId=requester_id) + if response_in: + time.sleep(1) + self.interface.sendText(response_in, destinationId=requester_id) def on_receive(self, packet: MeshPacket, interface): logging.info(f"on_receive: Incoming packet from {packet.get('fromId')}") diff --git a/src/commands/tr.py b/src/commands/tr.py index 1fad313..d3b81bd 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -55,15 +55,23 @@ def handle_packet(self, packet: MeshPacket) -> None: self.reply_in_dm(packet, response) # Initiate actual traceroute - # Map target_id -> requester_id so bot.on_traceroute knows who to reply to - self.bot.pending_traces[target_id] = requester_id + # Map target_id -> list of requester_ids + if target_id not in self.bot.pending_traces: + self.bot.pending_traces[target_id] = [] + + if requester_id not in self.bot.pending_traces[target_id]: + self.bot.pending_traces[target_id].append(requester_id) # Start a timeout timer (90 seconds) def check_timeout(): time.sleep(90) - if target_id in self.bot.pending_traces and self.bot.pending_traces[target_id] == requester_id: - # If still in pending_traces, we never got a response - del self.bot.pending_traces[target_id] + if target_id in self.bot.pending_traces and requester_id in self.bot.pending_traces[target_id]: + # Remove this specific requester from the pending list + self.bot.pending_traces[target_id].remove(requester_id) + # If no more requesters for this target, clean up the key + if not self.bot.pending_traces[target_id]: + del self.bot.pending_traces[target_id] + logging.info(f"Traceroute to {target_id} (requested by {requester_id}) timed out.") timeout_msg = f"Traceroute to {target_long_name} ({target_id}) timed out (no response from mesh)." self.message_in_dm(requester_id, timeout_msg) @@ -76,8 +84,10 @@ def check_timeout(): self.bot.interface.sendTraceRoute(target_id, hopLimit=7) except Exception as e: logging.error(f"Failed to send traceroute to {target_id}: {e}") - if target_id in self.bot.pending_traces: - del self.bot.pending_traces[target_id] + if target_id in self.bot.pending_traces and requester_id in self.bot.pending_traces[target_id]: + self.bot.pending_traces[target_id].remove(requester_id) + if not self.bot.pending_traces[target_id]: + del self.bot.pending_traces[target_id] self.reply_in_dm(packet, f"Error starting traceroute: {e}") def get_command_for_logging(self, message: str) -> (str, list[str] | None, str | None): diff --git a/test/commands/test_tr.py b/test/commands/test_tr.py index 4ca62a4..2be4fb3 100644 --- a/test/commands/test_tr.py +++ b/test/commands/test_tr.py @@ -32,7 +32,7 @@ def test_handle_packet_basic(self): self.bot.interface.sendTraceRoute.assert_called_once_with(sender_id, hopLimit=7) # Check pending_traces entry - self.assertEqual(self.bot.pending_traces[sender_id], sender_id) + self.assertEqual(self.bot.pending_traces[sender_id], [sender_id]) def test_handle_packet_zero_hops(self): sender_id = self.test_nodes[1].user.id @@ -61,7 +61,7 @@ def test_handle_packet_to_specific_node(self): self.mock_interface.sendText.assert_any_call(expected_msg, destinationId=requester_id, wantAck=True) self.bot.interface.sendTraceRoute.assert_called_once_with(target_node.user.id, hopLimit=7) - self.assertEqual(self.bot.pending_traces[target_node.user.id], requester_id) + self.assertEqual(self.bot.pending_traces[target_node.user.id], [requester_id]) def test_handle_packet_unknown_shortname(self): requester_id = self.test_nodes[1].user.id From cbdf8b0fda6be7d20a315c8c47c2beeae919a86a Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 16:23:24 +0000 Subject: [PATCH 26/93] Add debug logging to diagnose message receipt issues after traceroute --- src/bot.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bot.py b/src/bot.py index 7376425..803bbf3 100644 --- a/src/bot.py +++ b/src/bot.py @@ -127,6 +127,9 @@ def on_connection(self, interface, topic=pub.AUTO_TOPIC): def on_receive_text(self, packet: MeshPacket, interface): """Callback function triggered when a text message is received.""" + from_id = packet.get('fromId') + text = packet.get('decoded', {}).get('text', '') + logging.info(f"on_receive_text: Incoming text from {from_id}: {text}") to_id = packet['toId'] @@ -256,8 +259,11 @@ def on_traceroute(self, packet, route): self.interface.sendText(response_in, destinationId=requester_id) def on_receive(self, packet: MeshPacket, interface): - logging.info(f"on_receive: Incoming packet from {packet.get('fromId')}") - if packet.get('fromId') == '!69828b98': + from_id = packet.get('fromId') + portnum = packet['decoded']['portnum'] if 'decoded' in packet else 'unknown' + logging.debug(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") + + if from_id == '!69828b98': logging.debug(f"Received ANY packet from mte4: {packet}") # dump the packet to disk (if enabled) From 3af6b707b1a3a458b7b8e98d5553521ec7c2d31b Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 16:31:16 +0000 Subject: [PATCH 27/93] Fix bot hang by making traceroute callback defensive against NoneType route data --- src/bot.py | 99 ++++++++++++++++++++++++-------------------- src/tcp_interface.py | 7 +++- 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/src/bot.py b/src/bot.py index 803bbf3..3260b24 100644 --- a/src/bot.py +++ b/src/bot.py @@ -212,51 +212,62 @@ def handle_public_message(self, packet: MeshPacket): def on_traceroute(self, packet, route): """Callback for when a traceroute response is received.""" - target_id = packet.get('fromId') - - if target_id not in self.pending_traces: - logging.debug(f"Received traceroute from {target_id} but no pending request found.") - return + try: + target_id = packet.get('fromId') + logging.debug(f"on_traceroute: Received response from {target_id}. Route data: {route}") + + if target_id not in self.pending_traces: + logging.debug(f"Received traceroute from {target_id} but no pending request found.") + return - requesters = self.pending_traces.pop(target_id) - if not isinstance(requesters, list): - requesters = [requesters] - - # Format the OUTBOUND route - route_ids = route.route - hops = [] - for node_id_int in route_ids: - # Convert int to !hex string - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops.append(f"{node.short_name}") - else: - hops.append(f"{node_id_str}") - - route_str = " -> ".join(hops) if hops else "Direct (or unknown)" - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - - # Format the INBOUND route (if available) - response_in = None - if hasattr(route, 'route_back') and route.route_back: - hops_back = [] - for node_id_int in route.route_back: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") - back_str = " -> ".join(hops_back) - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - - for requester_id in requesters: - logging.info(f"Sending traceroute result to {requester_id}: {response_out}") - self.interface.sendText(response_out, destinationId=requester_id) - if response_in: - time.sleep(1) - self.interface.sendText(response_in, destinationId=requester_id) + requesters = self.pending_traces.pop(target_id) + if not isinstance(requesters, list): + requesters = [requesters] + + if route is None: + logging.warning(f"Traceroute response from {target_id} contained no route data.") + for requester_id in requesters: + self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) + return + + # Format the OUTBOUND route + route_ids = getattr(route, 'route', []) + hops = [] + for node_id_int in route_ids: + # Convert int to !hex string + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops.append(f"{node.short_name}") + else: + hops.append(f"{node_id_str}") + + route_str = " -> ".join(hops) if hops else "Direct (or unknown)" + response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + + # Format the INBOUND route (if available) + response_in = None + route_back_ids = getattr(route, 'route_back', []) + if route_back_ids: + hops_back = [] + for node_id_int in route_back_ids: + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops_back.append(f"{node.short_name}") + else: + hops_back.append(f"{node_id_str}") + back_str = " -> ".join(hops_back) + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + + for requester_id in requesters: + logging.info(f"Sending traceroute result to {requester_id}: {response_out}") + self.interface.sendText(response_out, destinationId=requester_id) + if response_in: + time.sleep(1) + self.interface.sendText(response_in, destinationId=requester_id) + except Exception as e: + logging.error(f"Error in on_traceroute callback: {e}", exc_info=True) def on_receive(self, packet: MeshPacket, interface): from_id = packet.get('fromId') diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 95790b7..7e4a2c8 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -66,13 +66,16 @@ def onResponseTraceRoute(self, packet): """ Callback for when a traceroute response is received. """ - # In newer versions of the library, the route is part of the packet['decoded']['routing'] route_discovery = None if isinstance(packet, dict): - route_discovery = packet.get('decoded', {}).get('routing') + decoded = packet.get('decoded', {}) + # It might be in 'routing' or 'routing_app' depending on library version/packet type + route_discovery = decoded.get('routing') elif hasattr(packet, 'decoded'): route_discovery = getattr(packet.decoded, 'routing', None) + logging.debug(f"onResponseTraceRoute: Extracted route_discovery: {route_discovery}") + super().onResponseTraceRoute(packet) pub.sendMessage("meshtastic.traceroute", packet=packet, route=route_discovery) From cd091da52b5a835e3d56a76a1d3da9314d4a79fc Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 16:37:12 +0000 Subject: [PATCH 28/93] Add processing reaction and improve timeout responsiveness --- src/commands/tr.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index d3b81bd..4f71b44 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -14,6 +14,9 @@ def handle_packet(self, packet: MeshPacket) -> None: message = packet['decoded']['text'] words = message.split() + # Add a reaction to show we are working on it + self.bot.interface.sendReaction("⌛", messageId=packet['id'], destinationId=packet['fromId']) + requester_id = packet['fromId'] requester = self.bot.node_db.get_by_id(requester_id) requester_name = requester.long_name if requester else requester_id @@ -74,7 +77,12 @@ def check_timeout(): logging.info(f"Traceroute to {target_id} (requested by {requester_id}) timed out.") timeout_msg = f"Traceroute to {target_long_name} ({target_id}) timed out (no response from mesh)." - self.message_in_dm(requester_id, timeout_msg) + + # Send the timeout message in a separate thread to avoid blocking the timer/interface + def send_timeout(): + self.message_in_dm(requester_id, timeout_msg) + + threading.Thread(target=send_timeout, daemon=True).start() threading.Thread(target=check_timeout, daemon=True).start() From 0f9830cbe0d17702f85473089b99aefc66101fb5 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 17:19:59 +0000 Subject: [PATCH 29/93] Improve traceroute data extraction and diagnostic logging --- src/bot.py | 3 ++- src/tcp_interface.py | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/bot.py b/src/bot.py index 3260b24..54aa962 100644 --- a/src/bot.py +++ b/src/bot.py @@ -225,7 +225,8 @@ def on_traceroute(self, packet, route): requesters = [requesters] if route is None: - logging.warning(f"Traceroute response from {target_id} contained no route data.") + decoded_keys = packet.get('decoded', {}).keys() + logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") for requester_id in requesters: self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) return diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 7e4a2c8..b02fad1 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -69,10 +69,14 @@ def onResponseTraceRoute(self, packet): route_discovery = None if isinstance(packet, dict): decoded = packet.get('decoded', {}) - # It might be in 'routing' or 'routing_app' depending on library version/packet type - route_discovery = decoded.get('routing') + # Try multiple common locations for the route data + route_discovery = decoded.get('routing') or decoded.get('routing_app') + + if not route_discovery and 'payload' in decoded: + # Some versions might not have parsed the payload yet + logging.debug(f"onResponseTraceRoute: Route not found in decoded, full packet: {packet}") elif hasattr(packet, 'decoded'): - route_discovery = getattr(packet.decoded, 'routing', None) + route_discovery = getattr(packet.decoded, 'routing', getattr(packet.decoded, 'routing_app', None)) logging.debug(f"onResponseTraceRoute: Extracted route_discovery: {route_discovery}") From 9b5c768492ca8790286a1f250d992a2066f7605b Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 17:27:47 +0000 Subject: [PATCH 30/93] Fix traceroute data extraction by checking 'traceroute' key --- src/tcp_interface.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index b02fad1..bcb8250 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -70,13 +70,16 @@ def onResponseTraceRoute(self, packet): if isinstance(packet, dict): decoded = packet.get('decoded', {}) # Try multiple common locations for the route data - route_discovery = decoded.get('routing') or decoded.get('routing_app') + # It might be in 'routing', 'routing_app', or 'traceroute' + route_discovery = decoded.get('routing') or decoded.get('routing_app') or decoded.get('traceroute') if not route_discovery and 'payload' in decoded: # Some versions might not have parsed the payload yet logging.debug(f"onResponseTraceRoute: Route not found in decoded, full packet: {packet}") elif hasattr(packet, 'decoded'): - route_discovery = getattr(packet.decoded, 'routing', getattr(packet.decoded, 'routing_app', None)) + route_discovery = getattr(packet.decoded, 'routing', + getattr(packet.decoded, 'routing_app', + getattr(packet.decoded, 'traceroute', None))) logging.debug(f"onResponseTraceRoute: Extracted route_discovery: {route_discovery}") From c5443cb72ba94a0c959334420b9c8f3c4189dda2 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 18:19:39 +0000 Subject: [PATCH 31/93] Further improvements to traceroute stability and diagnostics --- src/bot.py | 17 +++++++++++------ src/tcp_interface.py | 40 ++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/bot.py b/src/bot.py index 54aa962..cacf1f7 100644 --- a/src/bot.py +++ b/src/bot.py @@ -214,10 +214,10 @@ def on_traceroute(self, packet, route): """Callback for when a traceroute response is received.""" try: target_id = packet.get('fromId') - logging.debug(f"on_traceroute: Received response from {target_id}. Route data: {route}") + logging.info(f"on_traceroute: Processing response from {target_id}. Route data type: {type(route)}") if target_id not in self.pending_traces: - logging.debug(f"Received traceroute from {target_id} but no pending request found.") + logging.info(f"Received traceroute from {target_id} but no pending request found.") return requesters = self.pending_traces.pop(target_id) @@ -231,8 +231,13 @@ def on_traceroute(self, packet, route): self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) return + def get_route_hops(r, key='route'): + if isinstance(r, dict): + return r.get(key, []) + return getattr(r, key, []) + # Format the OUTBOUND route - route_ids = getattr(route, 'route', []) + route_ids = get_route_hops(route, 'route') hops = [] for node_id_int in route_ids: # Convert int to !hex string @@ -248,7 +253,7 @@ def on_traceroute(self, packet, route): # Format the INBOUND route (if available) response_in = None - route_back_ids = getattr(route, 'route_back', []) + route_back_ids = get_route_hops(route, 'route_back') if route_back_ids: hops_back = [] for node_id_int in route_back_ids: @@ -272,8 +277,8 @@ def on_traceroute(self, packet, route): def on_receive(self, packet: MeshPacket, interface): from_id = packet.get('fromId') - portnum = packet['decoded']['portnum'] if 'decoded' in packet else 'unknown' - logging.debug(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") + portnum = packet.get('decoded', {}).get('portnum', 'unknown') + logging.info(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") if from_id == '!69828b98': logging.debug(f"Received ANY packet from mte4: {packet}") diff --git a/src/tcp_interface.py b/src/tcp_interface.py index bcb8250..1c147ba 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -66,25 +66,29 @@ def onResponseTraceRoute(self, packet): """ Callback for when a traceroute response is received. """ - route_discovery = None - if isinstance(packet, dict): - decoded = packet.get('decoded', {}) - # Try multiple common locations for the route data - # It might be in 'routing', 'routing_app', or 'traceroute' - route_discovery = decoded.get('routing') or decoded.get('routing_app') or decoded.get('traceroute') + try: + route_discovery = None + if isinstance(packet, dict): + decoded = packet.get('decoded', {}) + # It might be in 'routing', 'routing_app', or 'traceroute' + route_discovery = decoded.get('routing') or decoded.get('routing_app') or decoded.get('traceroute') + + if not route_discovery and 'payload' in decoded: + logging.debug(f"onResponseTraceRoute: Route not found in decoded, full packet: {packet}") + elif hasattr(packet, 'decoded'): + route_discovery = getattr(packet.decoded, 'routing', + getattr(packet.decoded, 'routing_app', + getattr(packet.decoded, 'traceroute', None))) + + logging.info(f"onResponseTraceRoute: Received traceroute response. Route data present: {route_discovery is not None}") + + # Always call super to allow library internal processing (printing to stdout etc) + super().onResponseTraceRoute(packet) - if not route_discovery and 'payload' in decoded: - # Some versions might not have parsed the payload yet - logging.debug(f"onResponseTraceRoute: Route not found in decoded, full packet: {packet}") - elif hasattr(packet, 'decoded'): - route_discovery = getattr(packet.decoded, 'routing', - getattr(packet.decoded, 'routing_app', - getattr(packet.decoded, 'traceroute', None))) - - logging.debug(f"onResponseTraceRoute: Extracted route_discovery: {route_discovery}") - - super().onResponseTraceRoute(packet) - pub.sendMessage("meshtastic.traceroute", packet=packet, route=route_discovery) + # Notify bot logic + pub.sendMessage("meshtastic.traceroute", packet=packet, route=route_discovery) + except Exception as e: + logging.error(f"Error in onResponseTraceRoute: {e}", exc_info=True) def sendHeartbeat(self): try: From ffe63b0ee2d703c121fcb856edd1ee4ed0942dd4 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 18:22:00 +0000 Subject: [PATCH 32/93] Offload blocking command and traceroute processing to threads to prevent reader thread hang --- src/bot.py | 154 +++++++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 69 deletions(-) diff --git a/src/bot.py b/src/bot.py index cacf1f7..e9c8dfe 100644 --- a/src/bot.py +++ b/src/bot.py @@ -151,10 +151,16 @@ def handle_private_message(self, packet: MeshPacket): command_instance = CommandFactory.create_command(command_name, self) if command_instance: self.command_logger.log_command(from_id, command_instance, message) - try: - command_instance.handle_packet(packet) - except Exception as e: - logging.error(f"Error handling message: {e}") + + def run_command(): + try: + logging.info(f"Running command {command_name} in thread for {from_id}") + command_instance.handle_packet(packet) + logging.info(f"Finished command {command_name} for {from_id}") + except Exception as e: + logging.error(f"Error handling private command {command_name}: {e}", exc_info=True) + + threading.Thread(target=run_command, daemon=True).start() else: self.command_logger.log_unknown_request(from_id, message) @@ -191,12 +197,17 @@ def handle_public_message(self, packet: MeshPacket): from src.commands.factory import CommandFactory command_instance = CommandFactory.create_command(command_name, self) if command_instance: - try: - # Commands by default reply via DM (reply_in_dm). - command_instance.handle_packet(packet) - return # Stop processing responders - except Exception as e: - logging.error(f"Error handling public command {command_name}: {e}") + def run_command(): + try: + logging.info(f"Running public command {command_name} in thread for {from_id}") + # Commands by default reply via DM (reply_in_dm). + command_instance.handle_packet(packet) + logging.info(f"Finished public command {command_name} for {from_id}") + except Exception as e: + logging.error(f"Error handling public command {command_name}: {e}", exc_info=True) + + threading.Thread(target=run_command, daemon=True).start() + return # Stop processing responders responder = ResponderFactory.match_responder(message, self) if responder: @@ -212,68 +223,73 @@ def handle_public_message(self, packet: MeshPacket): def on_traceroute(self, packet, route): """Callback for when a traceroute response is received.""" - try: - target_id = packet.get('fromId') - logging.info(f"on_traceroute: Processing response from {target_id}. Route data type: {type(route)}") - - if target_id not in self.pending_traces: - logging.info(f"Received traceroute from {target_id} but no pending request found.") - return + def process_traceroute(): + try: + target_id = packet.get('fromId') + logging.info(f"on_traceroute: Processing response from {target_id} in thread. Route data type: {type(route)}") + + if target_id not in self.pending_traces: + logging.info(f"Received traceroute from {target_id} but no pending request found.") + return + + requesters = self.pending_traces.pop(target_id) + if not isinstance(requesters, list): + requesters = [requesters] + + if route is None: + decoded_keys = packet.get('decoded', {}).keys() + logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") + for requester_id in requesters: + self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) + return + + def get_route_hops(r, key='route'): + if isinstance(r, dict): + return r.get(key, []) + return getattr(r, key, []) + + # Format the OUTBOUND route + route_ids = get_route_hops(route, 'route') + hops = [] + for node_id_int in route_ids: + # Convert int to !hex string + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops.append(f"{node.short_name}") + else: + hops.append(f"{node_id_str}") + + route_str = " -> ".join(hops) if hops else "Direct (or unknown)" + response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + + # Format the INBOUND route (if available) + response_in = None + route_back_ids = get_route_hops(route, 'route_back') + if route_back_ids: + hops_back = [] + for node_id_int in route_back_ids: + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops_back.append(f"{node.short_name}") + else: + hops_back.append(f"{node_id_str}") + back_str = " -> ".join(hops_back) + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - requesters = self.pending_traces.pop(target_id) - if not isinstance(requesters, list): - requesters = [requesters] - - if route is None: - decoded_keys = packet.get('decoded', {}).keys() - logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") for requester_id in requesters: - self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) - return + logging.info(f"Sending traceroute result to {requester_id}: {response_out}") + self.interface.sendText(response_out, destinationId=requester_id) + if response_in: + time.sleep(1) + self.interface.sendText(response_in, destinationId=requester_id) + + logging.info(f"Finished processing traceroute for {target_id}") + except Exception as e: + logging.error(f"Error in on_traceroute thread: {e}", exc_info=True) - def get_route_hops(r, key='route'): - if isinstance(r, dict): - return r.get(key, []) - return getattr(r, key, []) - - # Format the OUTBOUND route - route_ids = get_route_hops(route, 'route') - hops = [] - for node_id_int in route_ids: - # Convert int to !hex string - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops.append(f"{node.short_name}") - else: - hops.append(f"{node_id_str}") - - route_str = " -> ".join(hops) if hops else "Direct (or unknown)" - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - - # Format the INBOUND route (if available) - response_in = None - route_back_ids = get_route_hops(route, 'route_back') - if route_back_ids: - hops_back = [] - for node_id_int in route_back_ids: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") - back_str = " -> ".join(hops_back) - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - - for requester_id in requesters: - logging.info(f"Sending traceroute result to {requester_id}: {response_out}") - self.interface.sendText(response_out, destinationId=requester_id) - if response_in: - time.sleep(1) - self.interface.sendText(response_in, destinationId=requester_id) - except Exception as e: - logging.error(f"Error in on_traceroute callback: {e}", exc_info=True) + threading.Thread(target=process_traceroute, daemon=True).start() def on_receive(self, packet: MeshPacket, interface): from_id = packet.get('fromId') From 0768928a45d43d580aecb35a14968ab94912064a Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 18:24:12 +0000 Subject: [PATCH 33/93] Small cleanup of redundant import in bot.py --- src/bot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bot.py b/src/bot.py index e9c8dfe..11ef8a2 100644 --- a/src/bot.py +++ b/src/bot.py @@ -194,7 +194,6 @@ def handle_public_message(self, packet: MeshPacket): env_var_name = f"ENABLE_COMMAND_{command_name.lstrip('!').upper()}" if get_env_bool(env_var_name, True): logging.info(f"Received public {command_name} from {sender_name}") - from src.commands.factory import CommandFactory command_instance = CommandFactory.create_command(command_name, self) if command_instance: def run_command(): From 8496cc8d0954faaafbe58bc034cd1bfc03715406 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 18:26:48 +0000 Subject: [PATCH 34/93] Cumulative stability fixes: formatting, robust ID detection, and better logging --- src/bot.py | 7 +++++-- src/commands/tr.py | 4 ++-- src/tcp_interface.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bot.py b/src/bot.py index 11ef8a2..aed2e3d 100644 --- a/src/bot.py +++ b/src/bot.py @@ -114,10 +114,10 @@ def disconnect(self): def on_connection(self, interface, topic=pub.AUTO_TOPIC): self.my_nodenum = interface.localNode.nodeNum # in dec - self.my_id = f"!{hex(self.my_nodenum)[2:]}" + self.my_id = f"!{self.my_nodenum:08x}" self.init_complete = True - logging.info('Connected to Meshtastic node') + logging.info(f'Connected to Meshtastic node as {self.my_id}') self.print_nodes() # Send an immediate node count report upon connection @@ -292,6 +292,9 @@ def get_route_hops(r, key='route'): def on_receive(self, packet: MeshPacket, interface): from_id = packet.get('fromId') + if from_id is None and 'from' in packet: + from_id = f"!{packet['from']:08x}" + portnum = packet.get('decoded', {}).get('portnum', 'unknown') logging.info(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") diff --git a/src/commands/tr.py b/src/commands/tr.py index 4f71b44..14d5d0d 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -65,9 +65,9 @@ def handle_packet(self, packet: MeshPacket) -> None: if requester_id not in self.bot.pending_traces[target_id]: self.bot.pending_traces[target_id].append(requester_id) - # Start a timeout timer (90 seconds) + # Start a timeout timer (120 seconds) def check_timeout(): - time.sleep(90) + time.sleep(120) if target_id in self.bot.pending_traces and requester_id in self.bot.pending_traces[target_id]: # Remove this specific requester from the pending list self.bot.pending_traces[target_id].remove(requester_id) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 1c147ba..192ff28 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -108,7 +108,7 @@ def _sendPacket( pkiEncrypted: Optional[bool] = False, publicKey: Optional[bytes] = None, ): - logging.debug(f"Sending packet to {destinationId} (Payload: {meshPacket.decoded.payload})") + logging.info(f"Sending packet to {destinationId} (Port: {meshPacket.decoded.portnum})") try: super()._sendPacket( meshPacket=meshPacket, From f7939e0024240d0e4743e6f0e43627e4c1f67734 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 19 Feb 2026 18:30:27 +0000 Subject: [PATCH 35/93] FIX: Restore missing methods in bot.py --- src/bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bot.py b/src/bot.py index aed2e3d..8a9e4fe 100644 --- a/src/bot.py +++ b/src/bot.py @@ -340,7 +340,7 @@ def on_receive(self, packet: MeshPacket, interface): def on_node_updated(self, node, interface): if interface.localNode and self.my_nodenum is None: self.my_nodenum = interface.localNode.nodeNum - self.my_id = f"!{hex(self.my_nodenum)[2:]}" + self.my_id = f"!{self.my_nodenum:08x}" # Check if the node is a new user if node['user'] is not None: From 9fd858cdecfa14595165dbbf2a4dee8fa60eae5a Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:08:45 +0000 Subject: [PATCH 36/93] Reduce congestion and improve traceroute result delivery by setting wantAck=False --- src/bot.py | 7 ++++--- src/tcp_interface.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/bot.py b/src/bot.py index 8a9e4fe..03457fb 100644 --- a/src/bot.py +++ b/src/bot.py @@ -279,10 +279,11 @@ def get_route_hops(r, key='route'): for requester_id in requesters: logging.info(f"Sending traceroute result to {requester_id}: {response_out}") - self.interface.sendText(response_out, destinationId=requester_id) + # Use wantAck=False for result delivery to reduce congestion + self.interface.sendText(response_out, destinationId=requester_id, wantAck=False) if response_in: - time.sleep(1) - self.interface.sendText(response_in, destinationId=requester_id) + time.sleep(2) # Increased delay for radio to settle + self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) logging.info(f"Finished processing traceroute for {target_id}") except Exception as e: diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 192ff28..8601627 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -108,7 +108,8 @@ def _sendPacket( pkiEncrypted: Optional[bool] = False, publicKey: Optional[bytes] = None, ): - logging.info(f"Sending packet to {destinationId} (Port: {meshPacket.decoded.portnum})") + port_val = meshPacket.decoded.portnum + logging.info(f"_sendPacket: Attempting to send Port {port_val} to {destinationId} (wantAck={wantAck})") try: super()._sendPacket( meshPacket=meshPacket, @@ -118,11 +119,15 @@ def _sendPacket( pkiEncrypted=pkiEncrypted, publicKey=publicKey ) + logging.info(f"_sendPacket: Successfully handed Port {port_val} to {destinationId} to meshtastic library") except (OSError, BrokenPipeError) as e: - logging.error(f"sendPacket failed: {e}") + logging.error(f"_sendPacket failed (connection error): {e}") self.packet_queue.put((meshPacket, destinationId, wantAck, hopLimit, pkiEncrypted, publicKey)) - # self._reconnect_with_backoff() self._shutdown_and_call_error_handler(e) + except Exception as e: + logging.error(f"_sendPacket failed (unexpected error): {e}", exc_info=True) + # We still queue it just in case it's recoverable + self.packet_queue.put((meshPacket, destinationId, wantAck, hopLimit, pkiEncrypted, publicKey)) def _shutdown_and_call_error_handler(self, conn_error: Optional[Exception] = None): try: From f4ddada771fd26908eb0309ee51f3b8b6de0245f Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:11:47 +0000 Subject: [PATCH 37/93] Silence watchtower errors and increase radio settle time for traceroute results --- docker-compose.yaml | 4 +++- src/bot.py | 13 ++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 239a91e..91b1ca6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -25,6 +25,8 @@ services: - ENABLE_COMMAND_PREFS=${ENABLE_COMMAND_PREFS:-true} - ENABLE_COMMAND_ADMIN=${ENABLE_COMMAND_ADMIN:-true} - ENABLE_COMMAND_STATUS=${ENABLE_COMMAND_STATUS:-true} + labels: + - "com.centurylinklabs.watchtower.enable=false" watchtower: image: containrrr/watchtower @@ -34,4 +36,4 @@ services: - /var/run/docker.sock:/var/run/docker.sock environment: - DOCKER_API_VERSION=1.44 - command: --interval 3600 meshtastic-bot # Check for updates every hour + command: --interval 3600 --label-enable diff --git a/src/bot.py b/src/bot.py index 03457fb..7da5111 100644 --- a/src/bot.py +++ b/src/bot.py @@ -265,18 +265,13 @@ def get_route_hops(r, key='route'): # Format the INBOUND route (if available) response_in = None route_back_ids = get_route_hops(route, 'route_back') - if route_back_ids: - hops_back = [] - for node_id_int in route_back_ids: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") + if response_in: back_str = " -> ".join(hops_back) response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + # Wait for radio to settle after receiving the traceroute response + time.sleep(5) + for requester_id in requesters: logging.info(f"Sending traceroute result to {requester_id}: {response_out}") # Use wantAck=False for result delivery to reduce congestion From 231513b49b0b9c83caa8504dfe925294ff8200c4 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:19:57 +0000 Subject: [PATCH 38/93] Append target/local node to traceroute output for full path visibility --- src/bot.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/bot.py b/src/bot.py index 7da5111..156cd73 100644 --- a/src/bot.py +++ b/src/bot.py @@ -259,17 +259,27 @@ def get_route_hops(r, key='route'): else: hops.append(f"{node_id_str}") - route_str = " -> ".join(hops) if hops else "Direct (or unknown)" - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - - # Format the INBOUND route (if available) + route_str = " -> ".join(hops) if hops else "Direct (or unknown)" + + # Append target to the end of the TO route + target_node = self.node_db.get_by_id(target_id) + target_name = target_node.short_name if target_node else target_id + route_str += f" -> {target_name}" + + response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + # Format the INBOUND route (if available) response_in = None route_back_ids = get_route_hops(route, 'route_back') if response_in: - back_str = " -> ".join(hops_back) - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - - # Wait for radio to settle after receiving the traceroute response + back_str = " -> ".join(hops_back) + + # Append bot to the end of the FROM route + my_node = self.node_db.get_by_id(self.my_id) + my_name = my_node.short_name if my_node else self.my_id + back_str += f" -> {my_name}" + + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + # Wait for radio to settle after receiving the traceroute response time.sleep(5) for requester_id in requesters: From 0186cb4cb117aeb0d2a14a809648f0acb49dc873 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:22:13 +0000 Subject: [PATCH 39/93] FIX: Correct indentation in bot.py and ensure full traceroute path visibility --- src/bot.py | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/bot.py b/src/bot.py index 156cd73..1a035ed 100644 --- a/src/bot.py +++ b/src/bot.py @@ -251,7 +251,6 @@ def get_route_hops(r, key='route'): route_ids = get_route_hops(route, 'route') hops = [] for node_id_int in route_ids: - # Convert int to !hex string node_id_str = f"!{node_id_int:08x}" node = self.node_db.get_by_id(node_id_str) if node: @@ -259,27 +258,37 @@ def get_route_hops(r, key='route'): else: hops.append(f"{node_id_str}") - route_str = " -> ".join(hops) if hops else "Direct (or unknown)" - - # Append target to the end of the TO route - target_node = self.node_db.get_by_id(target_id) - target_name = target_node.short_name if target_node else target_id - route_str += f" -> {target_name}" - - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - # Format the INBOUND route (if available) + route_str = " -> ".join(hops) if hops else "Direct (or unknown)" + + # Append target to the end of the TO route + target_node = self.node_db.get_by_id(target_id) + target_name = target_node.short_name if target_node else target_id + route_str += f" -> {target_name}" + + response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + + # Format the INBOUND route (if available) response_in = None route_back_ids = get_route_hops(route, 'route_back') - if response_in: - back_str = " -> ".join(hops_back) - - # Append bot to the end of the FROM route - my_node = self.node_db.get_by_id(self.my_id) - my_name = my_node.short_name if my_node else self.my_id - back_str += f" -> {my_name}" - - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - # Wait for radio to settle after receiving the traceroute response + if route_back_ids: + hops_back = [] + for node_id_int in route_back_ids: + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops_back.append(f"{node.short_name}") + else: + hops_back.append(f"{node_id_str}") + back_str = " -> ".join(hops_back) + + # Append bot to the end of the FROM route + my_node = self.node_db.get_by_id(self.my_id) + my_name = my_node.short_name if my_node else self.my_id + back_str += f" -> {my_name}" + + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + + # Wait for radio to settle after receiving the traceroute response time.sleep(5) for requester_id in requesters: From ccb07ab5c937e2465c8524c77b16c201f8fbd4a6 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:36:34 +0000 Subject: [PATCH 40/93] Update README to document targeted traceroute feature --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 076057d..9ca1709 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ The bot listens for messages and responds to commands. You can interact with it | `!nodes totals` | Manually triggers a node count report | | `!whoami` | Displays information about the sender | | `!tr` | Performs a traceroute to the sender (outbound & inbound) | +| `!tr ` | Performs a traceroute to a specific node by its short name | | `!status` | Displays bot status and radio connection details | ## Features @@ -153,9 +154,11 @@ The bot now includes a built-in TCP proxy to manage the connection to the Meshta Messages received on named Group Channels (e.g., 'LongRange', 'PrivateChat') are now logged with their specific channel name, making it easier to track conversations across different mesh networks. ### Advanced Traceroute -The `!tr` command has been upgraded to show the full path: -- **Outbound:** The route from the bot to your node. -- **Inbound:** The route back from your node to the bot (if available). +The `!tr` command provides visibility into the mesh topology: +- **Full Path visibility:** Shows the complete route including the target node. +- **Targeted Trace:** Use `!tr ` (e.g., `!tr NUMC`) to trace the route to a specific node. The results will be sent back to you. +- **Outbound:** The route from the bot to the destination. +- **Inbound:** The route back from the destination to the bot (if available). --- From 6d10ea711fc1221cea92f548c192f3327cb8ac73 Mon Sep 17 00:00:00 2001 From: Mitchcom <160724610+Mitchcom@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:38:02 +0000 Subject: [PATCH 41/93] Update traceroute command description in README Clarified traceroute command description for management node. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ca1709..771f072 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ The bot listens for messages and responds to commands. You can interact with it | `!nodes totals` | Manually triggers a node count report | | `!whoami` | Displays information about the sender | | `!tr` | Performs a traceroute to the sender (outbound & inbound) | -| `!tr ` | Performs a traceroute to a specific node by its short name | +| `!tr ` | Performs a traceroute to a specific node by its short name from management node | | `!status` | Displays bot status and radio connection details | ## Features From 68770f096c11c030c8cfb57e66fbd20a0e8bb9b1 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:48:23 +0000 Subject: [PATCH 42/93] Ensure traceroute always sends two messages (TO and FROM) even for direct routes --- src/bot.py | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/bot.py b/src/bot.py index 1a035ed..1dcd2eb 100644 --- a/src/bot.py +++ b/src/bot.py @@ -247,10 +247,11 @@ def get_route_hops(r, key='route'): return r.get(key, []) return getattr(r, key, []) - # Format the OUTBOUND route + # 1. Format the OUTBOUND route (TO target) route_ids = get_route_hops(route, 'route') hops = [] for node_id_int in route_ids: + # Convert int to !hex string node_id_str = f"!{node_id_int:08x}" node = self.node_db.get_by_id(node_id_str) if node: @@ -258,7 +259,7 @@ def get_route_hops(r, key='route'): else: hops.append(f"{node_id_str}") - route_str = " -> ".join(hops) if hops else "Direct (or unknown)" + route_str = " -> ".join(hops) if hops else "Direct" # Append target to the end of the TO route target_node = self.node_db.get_by_id(target_id) @@ -267,26 +268,25 @@ def get_route_hops(r, key='route'): response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - # Format the INBOUND route (if available) - response_in = None + # 2. Format the INBOUND route (FROM target) route_back_ids = get_route_hops(route, 'route_back') - if route_back_ids: - hops_back = [] - for node_id_int in route_back_ids: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") - back_str = " -> ".join(hops_back) - - # Append bot to the end of the FROM route - my_node = self.node_db.get_by_id(self.my_id) - my_name = my_node.short_name if my_node else self.my_id - back_str += f" -> {my_name}" - - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + hops_back = [] + for node_id_int in route_back_ids: + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops_back.append(f"{node.short_name}") + else: + hops_back.append(f"{node_id_str}") + + back_str = " -> ".join(hops_back) if hops_back else "Direct" + + # Append bot to the end of the FROM route + my_node = self.node_db.get_by_id(self.my_id) + my_name = my_node.short_name if my_node else self.my_id + back_str += f" -> {my_name}" + + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" # Wait for radio to settle after receiving the traceroute response time.sleep(5) @@ -295,9 +295,11 @@ def get_route_hops(r, key='route'): logging.info(f"Sending traceroute result to {requester_id}: {response_out}") # Use wantAck=False for result delivery to reduce congestion self.interface.sendText(response_out, destinationId=requester_id, wantAck=False) - if response_in: - time.sleep(2) # Increased delay for radio to settle - self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) + + # Always send the return path message for consistency + time.sleep(2) + logging.info(f"Sending traceroute result to {requester_id}: {response_in}") + self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) logging.info(f"Finished processing traceroute for {target_id}") except Exception as e: From 141a0eb2224bd35a3f78d293196cd01d55b4d6c2 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 20 Feb 2026 13:57:27 +0000 Subject: [PATCH 43/93] Update README: add (outbound & inbound) to targeted traceroute description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 771f072..4e3e424 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ The bot listens for messages and responds to commands. You can interact with it | `!nodes totals` | Manually triggers a node count report | | `!whoami` | Displays information about the sender | | `!tr` | Performs a traceroute to the sender (outbound & inbound) | -| `!tr ` | Performs a traceroute to a specific node by its short name from management node | +| `!tr ` | Performs a traceroute to a specific node by its short name from management node (outbound & inbound) | | `!status` | Displays bot status and radio connection details | ## Features From 6d1861141a6bfd7e210f4d7ebb9704a44c9ce9ce Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 21 Feb 2026 09:32:20 +0000 Subject: [PATCH 44/93] feat: implement !nodes totals subcommand and fix tests --- src/commands/nodes.py | 5 +++++ test/commands/test_nodes.py | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/commands/nodes.py b/src/commands/nodes.py index 0b14120..ffa2571 100644 --- a/src/commands/nodes.py +++ b/src/commands/nodes.py @@ -14,6 +14,7 @@ class NodesCommand(AbstractCommandWithSubcommands): def __init__(self, bot: MeshtasticBot): super().__init__(bot, 'nodes') self.sub_commands['busy'] = self.handle_busy + self.sub_commands['totals'] = self.handle_totals def get_busy_nodes(self) -> list[MeshNode.User]: return sorted(self.bot.node_db.list_nodes(), @@ -37,6 +38,9 @@ def handle_base_command(self, packet: MeshPacket, args: list[str]) -> None: self.reply(packet, response) + def handle_totals(self, packet: MeshPacket, args: list[str]) -> None: + self.bot.report_node_count(destination=packet['fromId']) + def handle_busy(self, packet: MeshPacket, args: list[str]) -> None: sender = packet['fromId'] @@ -96,6 +100,7 @@ def send_detailed_nodeinfo(self, sender: str, node_id: str): def show_help(self, packet: MeshPacket, args: list[str]) -> None: help_text = "!nodes: details about nodes this device has seen\n" + help_text += "!nodes totals: report the current node count\n" help_text += "!nodes busy: summary of busiest nodes\n" help_text += "!nodes busy detailed: detailed info about busiest nodes\n" self.reply(packet, help_text) diff --git a/test/commands/test_nodes.py b/test/commands/test_nodes.py index f5823d0..43e12c9 100644 --- a/test/commands/test_nodes.py +++ b/test/commands/test_nodes.py @@ -11,6 +11,7 @@ class TestNodesCommand(CommandWSCTestCase): def setUp(self): super().setUp() + self.bot.init_complete = True self.command = NodesCommand(self.bot) self.online_count = len(self.bot.node_info.get_online_nodes()) @@ -32,7 +33,7 @@ def test_handle_base_command(self): friendly_time = pretty_print_last_heard(last_heard) expected_response += f"- {node.user.short_name} ({friendly_time})\n" - self.assert_message_sent(expected_response, self.test_nodes[1]) + self.assert_message_sent(expected_response, self.test_nodes[1], want_ack=True) def test_handle_busy_command(self): packet = build_test_text_packet('!nodes busy', self.test_nodes[1].user.id, self.bot.my_id) @@ -52,7 +53,7 @@ def test_handle_busy_command(self): expected_response += f"(last reset at {last_reset_time})" - self.assert_message_sent(expected_response, self.test_nodes[1]) + self.assert_message_sent(expected_response, self.test_nodes[1], want_ack=True) def test_handle_busy_detailed_command(self): packet = build_test_text_packet('!nodes busy detailed', self.test_nodes[1].user.id, self.bot.my_id) @@ -81,7 +82,19 @@ def test_handle_busy_specific_node(self): for packet_type, count in sorted_breakdown: expected_response += f"- {packet_type}: {count}\n" - self.assert_message_sent(expected_response, self.test_nodes[1]) + self.assert_message_sent(expected_response, self.test_nodes[1], want_ack=True) + + def test_handle_totals_command(self): + packet = build_test_text_packet('!nodes totals', self.test_nodes[1].user.id, self.bot.my_id) + self.command.handle_packet(packet) + + # report_node_count is called on the bot + # In this test environment, we expect it to send a message via the interface + online_nodes = self.bot.node_info.get_online_nodes() + count = len(online_nodes) + expected_message = f"MTEK has a node count of {count}" + + self.assert_message_sent(expected_message, self.test_nodes[1], want_ack=True) if __name__ == '__main__': From f73467c9e3a7f89886a793c6a612886e58bb356c Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 21 Feb 2026 09:48:38 +0000 Subject: [PATCH 45/93] perf: optimize node sync to reduce redundant logs and storage writes --- src/bot.py | 33 ++++++++++++++++++++------------- src/data_classes.py | 10 ++++++++++ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/bot.py b/src/bot.py index 1dcd2eb..6bf1165 100644 --- a/src/bot.py +++ b/src/bot.py @@ -372,19 +372,26 @@ def on_node_updated(self, node, interface): if not existing_last_heard or last_heard > existing_last_heard: self.node_info.update_last_heard(mesh_node.user.id, last_heard) - self.node_db.store_node(mesh_node) - - for storage_api in self.storage_apis: - try: - storage_api.store_node(mesh_node) - except HTTPError as ex: - logging.warning(f"Error storing node: {ex.response.text}") - pass - except Exception as ex: - logging.warning(f"Error storing node: {ex}") - pass - - if self.init_complete: + # Optimization: Only store and notify if the node has actually changed + # or if it's the first time we've seen it. + existing_user = self.node_db.get_by_id(mesh_node.user.id) + is_new = existing_user is None + has_changed = is_new or existing_user != mesh_node.user + + if has_changed: + self.node_db.store_node(mesh_node) + + for storage_api in self.storage_apis: + try: + storage_api.store_node(mesh_node) + except HTTPError as ex: + logging.warning(f"Error storing node: {ex.response.text}") + pass + except Exception as ex: + logging.warning(f"Error storing node: {ex}") + pass + + if self.init_complete and is_new: current_last_heard = self.node_info.get_last_heard(mesh_node.user.id) last_heard_str = pretty_print_last_heard(current_last_heard) if current_last_heard else "unknown" logging.info(f"New user: {mesh_node.user.long_name} (last heard {last_heard_str})") diff --git a/src/data_classes.py b/src/data_classes.py index 9ed28b3..4171c7e 100644 --- a/src/data_classes.py +++ b/src/data_classes.py @@ -19,6 +19,16 @@ def __init__(self, self.hw_model = hw_model self.public_key = public_key + def __eq__(self, other): + if not isinstance(other, MeshNode.User): + return False + return (self.id == other.id and + self.long_name == other.long_name and + self.short_name == other.short_name and + self.macaddr == other.macaddr and + self.hw_model == other.hw_model and + self.public_key == other.public_key) + id: str long_name: str short_name: str From 60dec9151e52e4743c880109d46a3c47a4de4054 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 21 Feb 2026 13:13:40 +0000 Subject: [PATCH 46/93] Fix TcpProxy deadlock: Implement RLock and synchronized snapshots. Add unit test. --- src/tcp_proxy.py | 42 +++++++++++++++-------------- test/test_tcp_proxy.py | 60 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 test/test_tcp_proxy.py diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 52d6e89..a602afc 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -15,7 +15,7 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.target_socket = None self.clients = [] - self.clients_lock = threading.Lock() + self.lock = threading.RLock() self.running = False @@ -53,8 +53,9 @@ def get_status(self): return "Proxy: Offline" silence = time.time() - self.last_target_activity - with self.clients_lock: + with self.lock: client_count = len(self.clients) + cached_count = len(self.handshake_packets) + len(self.rolling_packets) state = "Reconnecting" if self.reconnecting else ("Online" if self.target_socket else "Offline") @@ -63,12 +64,12 @@ def get_status(self): "connected": self.target_socket is not None and not self.reconnecting, "clients": client_count, "silence_secs": int(silence), - "cached_packets": len(self.handshake_packets) + len(self.rolling_packets) + "cached_packets": cached_count } def _disconnect_all_clients(self): """Force all clients to disconnect so they can re-sync with a new radio session""" - with self.clients_lock: + with self.lock: for sock in self.clients: try: sock.close() except: pass @@ -127,16 +128,17 @@ def _process_radio_data(self, data): packet = self.in_buffer[:total_len] self.in_buffer = self.in_buffer[total_len:] - if len(self.handshake_packets) < self.handshake_max_count: - self.handshake_packets.append(packet) - self.rolling_packets.append(packet) + with self.lock: + if len(self.handshake_packets) < self.handshake_max_count: + self.handshake_packets.append(packet) + self.rolling_packets.append(packet) + targets = self.clients[:] - with self.clients_lock: - for client_sock in self.clients[:]: - try: - client_sock.sendall(packet) - except: - self._remove_client(client_sock) + for client_sock in targets: + try: + client_sock.sendall(packet) + except: + self._remove_client(client_sock) def _remove_client(self, sock): try: @@ -145,13 +147,13 @@ def _remove_client(self, sock): except: logging.info("--- PROXY: Removing unknown client") - with self.clients_lock: + with self.lock: if sock in self.clients: self.clients.remove(sock) try: sock.close() except: pass - with self.clients_lock: + with self.lock: logging.info(f"--- PROXY: Remaining clients: {len(self.clients)}") def _run(self): @@ -186,7 +188,7 @@ def _run(self): time.sleep(1.0) try: - with self.clients_lock: + with self.lock: client_socks = [s for s in self.clients if s.fileno() != -1] inputs = [self.server_socket] + client_socks @@ -201,7 +203,7 @@ def _run(self): # Heartbeat Logging if current_time - last_heartbeat_log > 60.0: - with self.clients_lock: + with self.lock: client_count = len(self.clients) client_info = [] for s in self.clients: @@ -230,9 +232,11 @@ def _run(self): client_socket, addr = self.server_socket.accept() logging.info(f"+++ PROXY: New connection accepted from {addr}") - with self.clients_lock: + with self.lock: self.clients.append(client_socket) logging.info(f"--- PROXY: Total active clients now: {len(self.clients)}") + h_snapshot = list(self.handshake_packets) + r_snapshot = list(self.rolling_packets) def replay(target_sock, handshake, history, client_addr): if client_addr[0] in ('127.0.0.1', 'localhost'): @@ -249,8 +253,6 @@ def replay(target_sock, handshake, history, client_addr): except Exception as e: self._remove_client(target_sock) - h_snapshot = list(self.handshake_packets) - r_snapshot = list(self.rolling_packets) threading.Thread(target=replay, args=(client_socket, h_snapshot, r_snapshot, addr), daemon=True).start() except Exception as e: diff --git a/test/test_tcp_proxy.py b/test/test_tcp_proxy.py new file mode 100644 index 0000000..0328200 --- /dev/null +++ b/test/test_tcp_proxy.py @@ -0,0 +1,60 @@ +import unittest +from unittest.mock import MagicMock, patch +import threading +import time +import socket +from src.tcp_proxy import TcpProxy + +class TestTcpProxy(unittest.TestCase): + def setUp(self): + self.proxy = TcpProxy("127.0.0.1", 4403, "127.0.0.1", 4404) + + def test_lock_is_rlock(self): + # threading.RLock() might be a factory function returning a platform-specific class + self.assertTrue(hasattr(self.proxy.lock, 'acquire') and hasattr(self.proxy.lock, '_count') or isinstance(self.proxy.lock, type(threading.RLock()))) + + def test_remove_client_no_deadlock(self): + # Mock a client socket + mock_client = MagicMock() + mock_client.getpeername.return_value = ("127.0.0.1", 12345) + + self.proxy.clients.append(mock_client) + + # This should not deadlock now + self.proxy._remove_client(mock_client) + + self.assertEqual(len(self.proxy.clients), 0) + mock_client.close.assert_called_once() + + def test_process_radio_data_deadlock_fix(self): + # This test simulates the exact deadlock condition: + # _process_radio_data holds the lock and calls _remove_client (via sendall failure) + # which tries to acquire the lock again. + + mock_client = MagicMock() + mock_client.getpeername.return_value = ("127.0.0.1", 12345) + # Force sendall to fail + mock_client.sendall.side_effect = Exception("Broken pipe") + + self.proxy.clients.append(mock_client) + + # Valid Meshtastic packet header \x94\xc3 + length 0001 + 1 byte data + packet_data = b'\x94\xc3\x00\x01\x00' + + # This call should not hang + self.proxy._process_radio_data(packet_data) + + # Verify client was removed + self.assertEqual(len(self.proxy.clients), 0) + mock_client.close.assert_called_once() + + def test_get_status_thread_safety(self): + # Ensure get_status can be called while holding the lock elsewhere + self.proxy.running = True + self.proxy.target_socket = MagicMock() # To make it look "Online" + with self.proxy.lock: + status = self.proxy.get_status() + self.assertEqual(status["state"], "Online") + +if __name__ == "__main__": + unittest.main() From 54c555fca89b6ec9911626e34b092b58ee09012c Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 21 Feb 2026 15:35:36 +0000 Subject: [PATCH 47/93] Fix TcpProxy socket blocking: Implement 10s timeouts for all sockets and improve error logging. --- src/tcp_proxy.py | 13 +++++++++---- test/test_tcp_proxy.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index a602afc..cdf3616 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -95,7 +95,7 @@ def _connect_to_target(self): except: pass sock.connect((self.target_host, self.target_port)) - sock.settimeout(None) # Reset to blocking for select() + sock.settimeout(10.0) # 10s timeout for all operations self.target_socket = sock self.last_target_activity = time.time() self.reconnecting = False @@ -136,8 +136,10 @@ def _process_radio_data(self, data): for client_sock in targets: try: + # logging.debug(f"Forwarding packet to {client_sock.getpeername()}") client_sock.sendall(packet) - except: + except Exception as e: + logging.debug(f"Failed to forward packet to client: {e}") self._remove_client(client_sock) def _remove_client(self, sock): @@ -230,6 +232,7 @@ def _run(self): if sock is self.server_socket: try: client_socket, addr = self.server_socket.accept() + client_socket.settimeout(10.0) # 10s timeout for client sends logging.info(f"+++ PROXY: New connection accepted from {addr}") with self.lock: @@ -287,9 +290,11 @@ def replay(target_sock, handshake, history, client_addr): time.sleep(0.01) except Exception as e: logging.error(f"Error sending to radio: {e}") - self.target_socket.close() + try: self.target_socket.close() + except: pass self.target_socket = None - except: + except Exception as e: + logging.debug(f"Error receiving from client: {e}") self._remove_client(sock) self.stop() diff --git a/test/test_tcp_proxy.py b/test/test_tcp_proxy.py index 0328200..1efd43d 100644 --- a/test/test_tcp_proxy.py +++ b/test/test_tcp_proxy.py @@ -56,5 +56,34 @@ def test_get_status_thread_safety(self): status = self.proxy.get_status() self.assertEqual(status["state"], "Online") + @patch('socket.socket') + def test_client_socket_has_timeout(self, mock_socket_class): + # Mock the server socket instance + mock_server_sock = MagicMock() + mock_socket_class.return_value = mock_server_sock + + # Mock accept() to return a mock client socket + mock_client = MagicMock() + mock_server_sock.accept.return_value = (mock_client, ("1.2.3.4", 5555)) + + # We need to mock select.select to return the server_socket as readable + with patch('select.select') as mock_select: + # First call: return server socket as readable + # Second call: flip self.running to False to exit loop + def select_side_effect(*args, **kwargs): + if self.proxy.running: + self.proxy.running = False + return ([mock_server_sock], [], []) + return ([], [], []) + + mock_select.side_effect = select_side_effect + + self.proxy.running = True + self.proxy.target_socket = MagicMock() # Avoid reconnect logic + self.proxy._run() + + # Verify timeout was set on the client socket + mock_client.settimeout.assert_called_with(10.0) + if __name__ == "__main__": unittest.main() From 84a25caade62b4643ea2adbf61f2e7fd7a3310d9 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Wed, 4 Mar 2026 13:17:02 +0000 Subject: [PATCH 48/93] Fix redundant 30 minute timeout response on !tr --- src/commands/tr.py | 25 ++++++++++++++++++------- src/tcp_interface.py | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index 14d5d0d..660deae 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -14,6 +14,14 @@ def handle_packet(self, packet: MeshPacket) -> None: message = packet['decoded']['text'] words = message.split() + is_public = packet.get('toId') == '^all' or 'channel' in packet + + def send_reply(msg): + if is_public: + self.reply_in_channel(packet, msg, want_ack=False) + else: + self.reply_in_dm(packet, msg, want_ack=False) + # Add a reaction to show we are working on it self.bot.interface.sendReaction("⌛", messageId=packet['id'], destinationId=packet['fromId']) @@ -26,7 +34,7 @@ def handle_packet(self, packet: MeshPacket) -> None: target_short = words[1] target_node = self.bot.get_node_by_short_name(target_short) if not target_node: - self.reply_in_dm(packet, f"Could not find node with short name '{target_short}'") + send_reply(f"Could not find node with short name '{target_short}'") return target_id = target_node.id target_long_name = target_node.long_name @@ -35,7 +43,7 @@ def handle_packet(self, packet: MeshPacket) -> None: target_long_name = requester_name if target_id == self.bot.my_id: - self.reply_in_dm(packet, "I am already here! No traceroute required.") + send_reply("I am already here! No traceroute required.") return # If tracing back to requester, we can show hops_away/SNR from the incoming packet @@ -47,15 +55,15 @@ def handle_packet(self, packet: MeshPacket) -> None: if hops_away == 0: response = f"{requester_name} you are Zero Hops from me. No traceroute required!" - self.reply_in_dm(packet, response) + send_reply(response) return response = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." - self.reply_in_dm(packet, response) + send_reply(response) else: # Tracing to a different node response = f"Starting traceroute to {target_long_name} ({target_id}) for you..." - self.reply_in_dm(packet, response) + send_reply(response) # Initiate actual traceroute # Map target_id -> list of requester_ids @@ -80,7 +88,10 @@ def check_timeout(): # Send the timeout message in a separate thread to avoid blocking the timer/interface def send_timeout(): - self.message_in_dm(requester_id, timeout_msg) + if is_public: + self.message_in_channel(packet.get('channel', 0), timeout_msg, want_ack=False) + else: + self.message_in_dm(requester_id, timeout_msg, want_ack=False) threading.Thread(target=send_timeout, daemon=True).start() @@ -96,7 +107,7 @@ def send_timeout(): self.bot.pending_traces[target_id].remove(requester_id) if not self.bot.pending_traces[target_id]: del self.bot.pending_traces[target_id] - self.reply_in_dm(packet, f"Error starting traceroute: {e}") + send_reply(f"Error starting traceroute: {e}") def get_command_for_logging(self, message: str) -> (str, list[str] | None, str | None): return self._gcfl_base_command_and_args(message) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 8601627..4422ce6 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -95,8 +95,8 @@ def sendHeartbeat(self): super().sendHeartbeat() except (OSError, BrokenPipeError) as e: logging.error(f"Heartbeat failed: {e}") - # TODO: Decide if we want to handle the error on this thread - # self._reconnect_with_backoff() + # Shutdown and notify the error handler to trigger a clean restart from the main thread. + # This avoids nested reconnection attempts on the heartbeat thread. self._shutdown_and_call_error_handler() def _sendPacket( From 7180994841b883fbca327662b8d60c951a233ee9 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Wed, 4 Mar 2026 22:40:18 +0000 Subject: [PATCH 49/93] Expose TCP proxy port 4403 on remote compose --- docker-compose-remote.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose-remote.yaml b/docker-compose-remote.yaml index 3ce8378..d20ae94 100644 --- a/docker-compose-remote.yaml +++ b/docker-compose-remote.yaml @@ -4,6 +4,8 @@ services: image: ghcr.io/pskillen/meshtastic-bot:latest container_name: meshtastic-bot restart: unless-stopped + ports: + - "4403:4403" env_file: - meshtastic-bot.env volumes: From 324202f170541572a057b23ba4a0fc61f311959a Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 10:18:54 +0000 Subject: [PATCH 50/93] Refactor Core Services: Asyncio Proxy, Thread-Safe DBs, Narrowed Errors - **TCP Proxy**: Completely rewritten to use Python's `asyncio` event loop, replacing blocking sockets and `time.sleep` loops. Resolves thread exhaustion and packet framing deadlocks. - **Database Thread-Safety**: SQLite connections now use `check_same_thread=False` protected by a global `threading.RLock()` across the Data Access Objects to prevent 'database is locked' corruption. - **Error Handling**: Narrowed overly broad `Exception` catch-alls in proxy, api wrappers, and pub/sub interfaces to log `exc_info=True` for unexpected errors, and handle timeouts gracefully. - **Tests**: Updated `test_tcp_proxy.py` to test asyncio coroutines properly. --- src/bot.py | 980 +++++++++++++++-------------- src/persistence/__init__.py | 47 +- src/persistence/commands_logger.py | 14 +- src/persistence/node_db.py | 22 +- src/persistence/user_prefs.py | 6 +- src/tcp_proxy.py | 559 ++++++++-------- test/test_tcp_proxy.py | 137 ++-- 7 files changed, 850 insertions(+), 915 deletions(-) diff --git a/src/bot.py b/src/bot.py index 6bf1165..1482747 100644 --- a/src/bot.py +++ b/src/bot.py @@ -1,485 +1,495 @@ -import logging -import sys -import time -import threading -from datetime import datetime, timezone - -import schedule -from meshtastic.protobuf.mesh_pb2 import MeshPacket -from pubsub import pub -from requests import HTTPError - -from src.api.StorageAPI import StorageAPIWrapper -from src.commands.factory import CommandFactory -from src.data_classes import MeshNode -from src.helpers import pretty_print_last_heard, safe_encode_node_name, get_env_bool, get_env_int -from src.persistence.commands_logger import AbstractCommandLogger -from src.persistence.node_db import AbstractNodeDB -from src.persistence.node_info import AbstractNodeInfoStore -from src.persistence.packet_dump import dump_packet -from src.persistence.user_prefs import AbstractUserPrefsPersistence -from src.responders.responder_factory import ResponderFactory -from src.tcp_interface import AutoReconnectTcpInterface, SupportsMessageReactionInterface - - -class MeshtasticBot: - admin_nodes: list[str] - - interface: SupportsMessageReactionInterface - init_complete: bool - - my_id: str - my_nodenum: int - node_db: AbstractNodeDB - node_info: AbstractNodeInfoStore - command_logger: AbstractCommandLogger - - user_prefs_persistence: AbstractUserPrefsPersistence - - storage_apis: list[StorageAPIWrapper] - - def __init__(self, address: str): - self.address = address - self.start_time = datetime.now(timezone.utc) - self.proxy = None - - self.admin_nodes = [] - - self.interface = None - self.init_complete = False - - self.my_id = None - self.my_nodenum = None - self.node_db = None - self.node_info = None - self.command_logger = None - self.user_prefs_persistence = None - self.storage_apis = [] - self.pending_traces = {} - self.last_report_zero = False - - pub.subscribe(self.on_receive, "meshtastic.receive") - pub.subscribe(self.on_traceroute, "meshtastic.traceroute") - pub.subscribe(self.on_receive_text, "meshtastic.receive.text") - pub.subscribe(self.on_node_updated, "meshtastic.node.updated") - pub.subscribe(self.on_connection, "meshtastic.connection.established") - - def connect(self): - logging.info(f"Connecting to Meshtastic node at {self.address}...") - self.init_complete = False - - old_packet_queue = None - if self.interface and hasattr(self.interface, 'packet_queue'): - old_packet_queue = self.interface.packet_queue - - self.interface = AutoReconnectTcpInterface( - hostname=self.address, - error_handler=self._handle_interface_error, - packet_queue=old_packet_queue, - ) - - logging.info("Connected. Listening for messages...") - - def _handle_interface_error(self, error): - self.disconnect() - - logging.error(f"Handling interface error: {error}") - backoff_time = 5 # Initial back-off time in seconds - max_backoff_time = 300 # Maximum back-off time in seconds (5 minutes) - backoff_rate = 1.5 # Exponential back-off rate - - while True: - try: - self.connect() - self.init_complete = True - logging.info("Reconnected successfully") - break - except Exception as e: - logging.error(f"Reconnection attempt failed: {e}") - if backoff_time == max_backoff_time: - logging.error("Max backoff time reached. Exiting.") - sys.exit(1) - backoff_time = min(backoff_time * backoff_rate, max_backoff_time) # Exponential back-off - logging.info(f"Next reconnection attempt in {backoff_time} seconds") - time.sleep(backoff_time) - - def disconnect(self): - self.init_complete = False - try: - if self.interface: - self.interface.close() - self.interface._disconnected() - except OSError as ex: - logging.warning(f"Failed to close connection. Continuing anyway: {ex}") - - def on_connection(self, interface, topic=pub.AUTO_TOPIC): - self.my_nodenum = interface.localNode.nodeNum # in dec - self.my_id = f"!{self.my_nodenum:08x}" - - self.init_complete = True - logging.info(f'Connected to Meshtastic node as {self.my_id}') - self.print_nodes() - - # Send an immediate node count report upon connection - # We use a timer to delay slightly to ensure everything settles - if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): - threading.Timer(10.0, self.report_node_count).start() - - def on_receive_text(self, packet: MeshPacket, interface): - """Callback function triggered when a text message is received.""" - from_id = packet.get('fromId') - text = packet.get('decoded', {}).get('text', '') - logging.info(f"on_receive_text: Incoming text from {from_id}: {text}") - - to_id = packet['toId'] - - if to_id == self.my_id: - self.handle_private_message(packet) - else: - self.handle_public_message(packet) - - def handle_private_message(self, packet: MeshPacket): - """Handle private messages.""" - message = packet['decoded']['text'] - from_id = packet['fromId'] - - sender = self.node_db.get_by_id(from_id) - logging.info(f"Received private message: '{message}' from {sender.long_name if sender else from_id}") - - words = message.split() - command_name = words[0] - command_instance = CommandFactory.create_command(command_name, self) - if command_instance: - self.command_logger.log_command(from_id, command_instance, message) - - def run_command(): - try: - logging.info(f"Running command {command_name} in thread for {from_id}") - command_instance.handle_packet(packet) - logging.info(f"Finished command {command_name} for {from_id}") - except Exception as e: - logging.error(f"Error handling private command {command_name}: {e}", exc_info=True) - - threading.Thread(target=run_command, daemon=True).start() - else: - self.command_logger.log_unknown_request(from_id, message) - - def get_channel_name(self, packet: MeshPacket) -> str: - """Get the name of the channel for a packet.""" - channel_index = packet.get('channel', 0) - try: - if self.interface and self.interface.localNode: - channel = self.interface.localNode.channels[channel_index] - if channel and channel.settings and channel.settings.name: - return channel.settings.name - except (AttributeError, IndexError): - pass - return "Primary" if channel_index == 0 else f"Channel {channel_index}" - - def handle_public_message(self, packet: MeshPacket): - """Handle public (group channel) messages.""" - message = packet['decoded']['text'] - from_id = packet['fromId'] - sender = self.node_db.get_by_id(from_id) - sender_name = sender.long_name if sender else from_id - channel_name = self.get_channel_name(packet) - - logging.info(f"Received group message on channel '{channel_name}' from {sender_name}: {message}") - - # Allow certain commands in public channels - words = message.split() - if words: - command_name = words[0].lower() - if command_name in ["!tr", "!ping", "!hello", "!nodes", "!status", "!whoami"]: - env_var_name = f"ENABLE_COMMAND_{command_name.lstrip('!').upper()}" - if get_env_bool(env_var_name, True): - logging.info(f"Received public {command_name} from {sender_name}") - command_instance = CommandFactory.create_command(command_name, self) - if command_instance: - def run_command(): - try: - logging.info(f"Running public command {command_name} in thread for {from_id}") - # Commands by default reply via DM (reply_in_dm). - command_instance.handle_packet(packet) - logging.info(f"Finished public command {command_name} for {from_id}") - except Exception as e: - logging.error(f"Error handling public command {command_name}: {e}", exc_info=True) - - threading.Thread(target=run_command, daemon=True).start() - return # Stop processing responders - - responder = ResponderFactory.match_responder(message, self) - if responder: - try: - outcome = responder.handle_packet(packet) - - if outcome: - logging.info( - f"Handled message from {sender.long_name if sender else from_id} with responder {responder.__class__.__name__}: {message}") - self.command_logger.log_responder_handled(from_id, responder, message) - except Exception as e: - logging.error(f"Error handling message: {e}") - - def on_traceroute(self, packet, route): - """Callback for when a traceroute response is received.""" - def process_traceroute(): - try: - target_id = packet.get('fromId') - logging.info(f"on_traceroute: Processing response from {target_id} in thread. Route data type: {type(route)}") - - if target_id not in self.pending_traces: - logging.info(f"Received traceroute from {target_id} but no pending request found.") - return - - requesters = self.pending_traces.pop(target_id) - if not isinstance(requesters, list): - requesters = [requesters] - - if route is None: - decoded_keys = packet.get('decoded', {}).keys() - logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") - for requester_id in requesters: - self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) - return - - def get_route_hops(r, key='route'): - if isinstance(r, dict): - return r.get(key, []) - return getattr(r, key, []) - - # 1. Format the OUTBOUND route (TO target) - route_ids = get_route_hops(route, 'route') - hops = [] - for node_id_int in route_ids: - # Convert int to !hex string - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops.append(f"{node.short_name}") - else: - hops.append(f"{node_id_str}") - - route_str = " -> ".join(hops) if hops else "Direct" - - # Append target to the end of the TO route - target_node = self.node_db.get_by_id(target_id) - target_name = target_node.short_name if target_node else target_id - route_str += f" -> {target_name}" - - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" - - # 2. Format the INBOUND route (FROM target) - route_back_ids = get_route_hops(route, 'route_back') - hops_back = [] - for node_id_int in route_back_ids: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") - - back_str = " -> ".join(hops_back) if hops_back else "Direct" - - # Append bot to the end of the FROM route - my_node = self.node_db.get_by_id(self.my_id) - my_name = my_node.short_name if my_node else self.my_id - back_str += f" -> {my_name}" - - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" - - # Wait for radio to settle after receiving the traceroute response - time.sleep(5) - - for requester_id in requesters: - logging.info(f"Sending traceroute result to {requester_id}: {response_out}") - # Use wantAck=False for result delivery to reduce congestion - self.interface.sendText(response_out, destinationId=requester_id, wantAck=False) - - # Always send the return path message for consistency - time.sleep(2) - logging.info(f"Sending traceroute result to {requester_id}: {response_in}") - self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) - - logging.info(f"Finished processing traceroute for {target_id}") - except Exception as e: - logging.error(f"Error in on_traceroute thread: {e}", exc_info=True) - - threading.Thread(target=process_traceroute, daemon=True).start() - - def on_receive(self, packet: MeshPacket, interface): - from_id = packet.get('fromId') - if from_id is None and 'from' in packet: - from_id = f"!{packet['from']:08x}" - - portnum = packet.get('decoded', {}).get('portnum', 'unknown') - logging.info(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") - - if from_id == '!69828b98': - logging.debug(f"Received ANY packet from mte4: {packet}") - - # dump the packet to disk (if enabled) - dump_packet(packet) - - for storage_api in self.storage_apis: - try: - storage_api.store_raw_packet(packet) - except HTTPError as ex: - logging.warning(f"Error storing packet: {ex.response.text}") - pass - except Exception as ex: - logging.warning(f"Error storing packet in API: {ex}") - pass - - sender = packet['fromId'] - node = self.node_db.get_by_id(sender) - if not node: - # logging.warning(f"Received packet from unknown sender {sender}") - return - - if node: - portnum = packet['decoded']['portnum'] if 'decoded' in packet else 'unknown' - if sender == self.my_id and portnum == 'TELEMETRY_APP': - # Ignore telemetry packets sent by self - pass - else: - # Increment packets_today for this node - self.node_info.node_packet_received(sender, portnum) - - if sender == self.my_id: - recipient_id = packet['toId'] - recipient = self.node_db.get_by_id(recipient_id) - portnum = packet['decoded']['portnum'] - - logging.debug( - f"Received packet from self: {recipient.long_name if recipient else recipient_id} (port {portnum})") - - def on_node_updated(self, node, interface): - if interface.localNode and self.my_nodenum is None: - self.my_nodenum = interface.localNode.nodeNum - self.my_id = f"!{self.my_nodenum:08x}" - - # Check if the node is a new user - if node['user'] is not None: - mesh_node = MeshNode.from_dict(node) - last_heard_int = node.get('lastHeard', 0) - - # Fix: Don't update if timestamp is 0 or older than what we have - if last_heard_int > 0: - last_heard = datetime.fromtimestamp(last_heard_int, tz=timezone.utc) - existing_last_heard = self.node_info.get_last_heard(mesh_node.user.id) - - if not existing_last_heard or last_heard > existing_last_heard: - self.node_info.update_last_heard(mesh_node.user.id, last_heard) - - # Optimization: Only store and notify if the node has actually changed - # or if it's the first time we've seen it. - existing_user = self.node_db.get_by_id(mesh_node.user.id) - is_new = existing_user is None - has_changed = is_new or existing_user != mesh_node.user - - if has_changed: - self.node_db.store_node(mesh_node) - - for storage_api in self.storage_apis: - try: - storage_api.store_node(mesh_node) - except HTTPError as ex: - logging.warning(f"Error storing node: {ex.response.text}") - pass - except Exception as ex: - logging.warning(f"Error storing node: {ex}") - pass - - if self.init_complete and is_new: - current_last_heard = self.node_info.get_last_heard(mesh_node.user.id) - last_heard_str = pretty_print_last_heard(current_last_heard) if current_last_heard else "unknown" - logging.info(f"New user: {mesh_node.user.long_name} (last heard {last_heard_str})") - - def print_nodes(self): - # filter nodes where last heard is more than 2 hours ago - online_nodes = self.node_info.get_online_nodes() - offline_nodes = self.node_info.get_offline_nodes() - - # print all nodes, sorted by last heard descending - logging.info(f"Online nodes: ({len(online_nodes)})") - sorted_nodes = sorted(online_nodes, key=lambda x: online_nodes[x], reverse=True) - for node_id in sorted_nodes: - if node_id == self.my_id: - continue - node = self.node_db.get_by_id(node_id) - last_heard = self.node_info.get_last_heard(node_id) - last_heard = pretty_print_last_heard(last_heard) - encoded_name = safe_encode_node_name(node.long_name) - logging.info(f"- {encoded_name} (last heard {last_heard})") - - logging.info(f"- Plus {len(offline_nodes)} offline nodes") - - def report_node_count(self, destination=None, channel_index=None): - """Report the current node count to a specific channel or destination.""" - if not self.init_complete or not self.interface: - logging.warning("Skipping node count report: interface not ready.") - return - - if channel_index is None: - channel_index = get_env_int('CHANNEL_FOR_NODE_TOTAL_BROADCAST', 2) - - online_nodes = self.node_info.get_online_nodes() - count = len(online_nodes) - - if count == 0: - message = "Warning MTEK cant see any nodes" - self.last_report_zero = True - else: - message = f"MTEK has a node count of {count}" - self.last_report_zero = False - - logging.info(f"Reporting node count: {message}") - try: - if destination: - self.interface.sendText(message, destinationId=destination, wantAck=True) - else: - self.interface.sendText(message, channelIndex=channel_index, wantAck=True) - except Exception as e: - logging.error(f"Failed to report node count: {e}") - - def check_for_zero_nodes(self): - """Checks if the node count is zero and alerts immediately if it transitioned to zero.""" - if not self.init_complete or not self.interface: - return - - online_nodes = self.node_info.get_online_nodes() - count = len(online_nodes) - - if count == 0 and not self.last_report_zero: - logging.warning("Immediate alert: Node count dropped to zero!") - self.report_node_count() - elif count > 0: - # Reset flag so we can alert again if it drops to zero later - self.last_report_zero = False - - def get_global_context(self): - return { - 'nodes': self.node_db.list_nodes(), - 'online_nodes': self.node_info.get_online_nodes(), - 'offline_nodes': self.node_info.get_offline_nodes(), - } - - def start_scheduler(self): - schedule.every().day.at("00:00").do(self.node_info.reset_packets_today) - if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): - report_frequency = get_env_int('FREQUENCY_OF_NODE_REPORTS', 3) - schedule.every(report_frequency).hours.do(self.report_node_count) - schedule.every(1).minutes.do(self.check_for_zero_nodes) - while True: - schedule.run_pending() - try: - time.sleep(1) - except KeyboardInterrupt: - return - - def get_node_by_short_name(self, short_name: str) -> MeshNode.User | None: - for node in self.node_db.list_nodes(): - if node.short_name.lower() == short_name.lower(): - return node - return None +import logging +import sys +import time +import threading +from datetime import datetime, timezone + +import schedule +from meshtastic.protobuf.mesh_pb2 import MeshPacket +from pubsub import pub +from requests import HTTPError + +from src.api.StorageAPI import StorageAPIWrapper +from src.commands.factory import CommandFactory +from src.data_classes import MeshNode +from src.helpers import pretty_print_last_heard, safe_encode_node_name, get_env_bool, get_env_int +from src.persistence.commands_logger import AbstractCommandLogger +from src.persistence.node_db import AbstractNodeDB +from src.persistence.node_info import AbstractNodeInfoStore +from src.persistence.packet_dump import dump_packet +from src.persistence.user_prefs import AbstractUserPrefsPersistence +from src.responders.responder_factory import ResponderFactory +from src.tcp_interface import AutoReconnectTcpInterface, SupportsMessageReactionInterface + + +class MeshtasticBot: + admin_nodes: list[str] + + interface: SupportsMessageReactionInterface + init_complete: bool + + my_id: str + my_nodenum: int + node_db: AbstractNodeDB + node_info: AbstractNodeInfoStore + command_logger: AbstractCommandLogger + + user_prefs_persistence: AbstractUserPrefsPersistence + + storage_apis: list[StorageAPIWrapper] + + def __init__(self, address: str): + self.address = address + self.start_time = datetime.now(timezone.utc) + self.proxy = None + + self.admin_nodes = [] + + self.interface = None + self.init_complete = False + + self.my_id = None + self.my_nodenum = None + self.node_db = None + self.node_info = None + self.command_logger = None + self.user_prefs_persistence = None + self.storage_apis = [] + self.pending_traces = {} + self.last_report_zero = False + + pub.subscribe(self.on_receive, "meshtastic.receive") + pub.subscribe(self.on_traceroute, "meshtastic.traceroute") + pub.subscribe(self.on_receive_text, "meshtastic.receive.text") + pub.subscribe(self.on_node_updated, "meshtastic.node.updated") + pub.subscribe(self.on_connection, "meshtastic.connection.established") + + def connect(self): + logging.info(f"Connecting to Meshtastic node at {self.address}...") + self.init_complete = False + + old_packet_queue = None + if self.interface and hasattr(self.interface, 'packet_queue'): + old_packet_queue = self.interface.packet_queue + + self.interface = AutoReconnectTcpInterface( + hostname=self.address, + error_handler=self._handle_interface_error, + packet_queue=old_packet_queue, + ) + + logging.info("Connected. Listening for messages...") + + def _handle_interface_error(self, error): + self.disconnect() + + logging.error(f"Handling interface error: {error}") + backoff_time = 5 # Initial back-off time in seconds + max_backoff_time = 300 # Maximum back-off time in seconds (5 minutes) + backoff_rate = 1.5 # Exponential back-off rate + + while True: + try: + self.connect() + self.init_complete = True + logging.info("Reconnected successfully") + break + except Exception as e: + logging.error(f"Reconnection attempt failed: {e}") + if backoff_time == max_backoff_time: + logging.error("Max backoff time reached. Exiting.") + sys.exit(1) + backoff_time = min(backoff_time * backoff_rate, max_backoff_time) # Exponential back-off + logging.info(f"Next reconnection attempt in {backoff_time} seconds") + time.sleep(backoff_time) + + def disconnect(self): + self.init_complete = False + try: + if self.interface: + self.interface.close() + self.interface._disconnected() + except OSError as ex: + logging.warning(f"Failed to close connection. Continuing anyway: {ex}") + + def on_connection(self, interface, topic=pub.AUTO_TOPIC): + self.my_nodenum = interface.localNode.nodeNum # in dec + self.my_id = f"!{self.my_nodenum:08x}" + + self.init_complete = True + logging.info(f'Connected to Meshtastic node as {self.my_id}') + self.print_nodes() + + # Send an immediate node count report upon connection + # We use a timer to delay slightly to ensure everything settles + if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): + threading.Timer(10.0, self.report_node_count).start() + + def on_receive_text(self, packet: MeshPacket, interface): + """Callback function triggered when a text message is received.""" + from_id = packet.get('fromId') + text = packet.get('decoded', {}).get('text', '') + logging.info(f"on_receive_text: Incoming text from {from_id}: {text}") + + to_id = packet['toId'] + + if to_id == self.my_id: + self.handle_private_message(packet) + else: + self.handle_public_message(packet) + + def handle_private_message(self, packet: MeshPacket): + """Handle private messages.""" + message = packet['decoded']['text'] + from_id = packet['fromId'] + + sender = self.node_db.get_by_id(from_id) + logging.info(f"Received private message: '{message}' from {sender.long_name if sender else from_id}") + + words = message.split() + command_name = words[0] + command_instance = CommandFactory.create_command(command_name, self) + if command_instance: + self.command_logger.log_command(from_id, command_instance, message) + + def run_command(): + try: + logging.info(f"Running command {command_name} in thread for {from_id}") + command_instance.handle_packet(packet) + logging.info(f"Finished command {command_name} for {from_id}") + except Exception as e: + logging.error(f"Error handling private command {command_name}: {e}", exc_info=True) + + threading.Thread(target=run_command, daemon=True).start() + else: + self.command_logger.log_unknown_request(from_id, message) + + def get_channel_name(self, packet: MeshPacket) -> str: + """Get the name of the channel for a packet.""" + channel_index = packet.get('channel', 0) + try: + if self.interface and self.interface.localNode: + channel = self.interface.localNode.channels[channel_index] + if channel and channel.settings and channel.settings.name: + return channel.settings.name + except (AttributeError, IndexError): + pass + return "Primary" if channel_index == 0 else f"Channel {channel_index}" + + def handle_public_message(self, packet: MeshPacket): + """Handle public (group channel) messages.""" + message = packet['decoded']['text'] + from_id = packet['fromId'] + sender = self.node_db.get_by_id(from_id) + sender_name = sender.long_name if sender else from_id + channel_name = self.get_channel_name(packet) + + logging.info(f"Received group message on channel '{channel_name}' from {sender_name}: {message}") + + # Allow certain commands in public channels + words = message.split() + if words: + command_name = words[0].lower() + if command_name in ["!tr", "!ping", "!hello", "!nodes", "!status", "!whoami"]: + env_var_name = f"ENABLE_COMMAND_{command_name.lstrip('!').upper()}" + if get_env_bool(env_var_name, True): + logging.info(f"Received public {command_name} from {sender_name}") + command_instance = CommandFactory.create_command(command_name, self) + if command_instance: + def run_command(): + try: + logging.info(f"Running public command {command_name} in thread for {from_id}") + # Commands by default reply via DM (reply_in_dm). + command_instance.handle_packet(packet) + logging.info(f"Finished public command {command_name} for {from_id}") + except Exception as e: + logging.error(f"Error handling public command {command_name}: {e}", exc_info=True) + + threading.Thread(target=run_command, daemon=True).start() + return # Stop processing responders + + responder = ResponderFactory.match_responder(message, self) + if responder: + try: + outcome = responder.handle_packet(packet) + + if outcome: + logging.info( + f"Handled message from {sender.long_name if sender else from_id} with responder {responder.__class__.__name__}: {message}") + self.command_logger.log_responder_handled(from_id, responder, message) + except (KeyError, ValueError) as e: + logging.error(f"Packet format error handling message: {e}", exc_info=True) + except Exception as e: + logging.error(f"Error handling message: {e}", exc_info=True) + + def on_traceroute(self, packet, route): + """Callback for when a traceroute response is received.""" + def process_traceroute(): + try: + target_id = packet.get('fromId') + logging.info(f"on_traceroute: Processing response from {target_id} in thread. Route data type: {type(route)}") + + if target_id not in self.pending_traces: + logging.info(f"Received traceroute from {target_id} but no pending request found.") + return + + requesters = self.pending_traces.pop(target_id) + if not isinstance(requesters, list): + requesters = [requesters] + + if route is None: + decoded_keys = packet.get('decoded', {}).keys() + logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") + for requester_id in requesters: + self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) + return + + def get_route_hops(r, key='route'): + if isinstance(r, dict): + return r.get(key, []) + return getattr(r, key, []) + + # 1. Format the OUTBOUND route (TO target) + route_ids = get_route_hops(route, 'route') + hops = [] + for node_id_int in route_ids: + # Convert int to !hex string + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops.append(f"{node.short_name}") + else: + hops.append(f"{node_id_str}") + + route_str = " -> ".join(hops) if hops else "Direct" + + # Append target to the end of the TO route + target_node = self.node_db.get_by_id(target_id) + target_name = target_node.short_name if target_node else target_id + route_str += f" -> {target_name}" + + response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + + # 2. Format the INBOUND route (FROM target) + route_back_ids = get_route_hops(route, 'route_back') + hops_back = [] + for node_id_int in route_back_ids: + node_id_str = f"!{node_id_int:08x}" + node = self.node_db.get_by_id(node_id_str) + if node: + hops_back.append(f"{node.short_name}") + else: + hops_back.append(f"{node_id_str}") + + back_str = " -> ".join(hops_back) if hops_back else "Direct" + + # Append bot to the end of the FROM route + my_node = self.node_db.get_by_id(self.my_id) + my_name = my_node.short_name if my_node else self.my_id + back_str += f" -> {my_name}" + + response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + + # Wait for radio to settle after receiving the traceroute response + time.sleep(5) + + for requester_id in requesters: + logging.info(f"Sending traceroute result to {requester_id}: {response_out}") + # Use wantAck=False for result delivery to reduce congestion + self.interface.sendText(response_out, destinationId=requester_id, wantAck=False) + + # Always send the return path message for consistency + time.sleep(2) + logging.info(f"Sending traceroute result to {requester_id}: {response_in}") + self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) + + logging.info(f"Finished processing traceroute for {target_id}") + except Exception as e: + logging.error(f"Error in on_traceroute thread: {e}", exc_info=True) + + threading.Thread(target=process_traceroute, daemon=True).start() + + def on_receive(self, packet: MeshPacket, interface): + from_id = packet.get('fromId') + if from_id is None and 'from' in packet: + from_id = f"!{packet['from']:08x}" + + portnum = packet.get('decoded', {}).get('portnum', 'unknown') + logging.info(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") + + if from_id == '!69828b98': + logging.debug(f"Received ANY packet from mte4: {packet}") + + # dump the packet to disk (if enabled) + dump_packet(packet) + + for storage_api in self.storage_apis: + try: + storage_api.store_raw_packet(packet) + except HTTPError as ex: + logging.warning(f"Error storing packet: {ex.response.text}") + pass + except (ConnectionError, TimeoutError) as ex: + logging.warning(f"Network error storing packet in API: {ex}") + pass + except Exception as ex: + logging.warning(f"Unexpected error storing packet in API: {ex}", exc_info=True) + pass + + sender = packet['fromId'] + node = self.node_db.get_by_id(sender) + if not node: + # logging.warning(f"Received packet from unknown sender {sender}") + return + + if node: + portnum = packet['decoded']['portnum'] if 'decoded' in packet else 'unknown' + if sender == self.my_id and portnum == 'TELEMETRY_APP': + # Ignore telemetry packets sent by self + pass + else: + # Increment packets_today for this node + self.node_info.node_packet_received(sender, portnum) + + if sender == self.my_id: + recipient_id = packet['toId'] + recipient = self.node_db.get_by_id(recipient_id) + portnum = packet['decoded']['portnum'] + + logging.debug( + f"Received packet from self: {recipient.long_name if recipient else recipient_id} (port {portnum})") + + def on_node_updated(self, node, interface): + if interface.localNode and self.my_nodenum is None: + self.my_nodenum = interface.localNode.nodeNum + self.my_id = f"!{self.my_nodenum:08x}" + + # Check if the node is a new user + if node['user'] is not None: + mesh_node = MeshNode.from_dict(node) + last_heard_int = node.get('lastHeard', 0) + + # Fix: Don't update if timestamp is 0 or older than what we have + if last_heard_int > 0: + last_heard = datetime.fromtimestamp(last_heard_int, tz=timezone.utc) + existing_last_heard = self.node_info.get_last_heard(mesh_node.user.id) + + if not existing_last_heard or last_heard > existing_last_heard: + self.node_info.update_last_heard(mesh_node.user.id, last_heard) + + # Optimization: Only store and notify if the node has actually changed + # or if it's the first time we've seen it. + existing_user = self.node_db.get_by_id(mesh_node.user.id) + is_new = existing_user is None + has_changed = is_new or existing_user != mesh_node.user + + if has_changed: + self.node_db.store_node(mesh_node) + + for storage_api in self.storage_apis: + try: + storage_api.store_node(mesh_node) + except HTTPError as ex: + logging.warning(f"Error storing node: {ex.response.text}") + pass + except (ConnectionError, TimeoutError) as ex: + logging.warning(f"Network error storing node: {ex}") + pass + except Exception as ex: + logging.warning(f"Unexpected error storing node: {ex}", exc_info=True) + pass + + if self.init_complete and is_new: + current_last_heard = self.node_info.get_last_heard(mesh_node.user.id) + last_heard_str = pretty_print_last_heard(current_last_heard) if current_last_heard else "unknown" + logging.info(f"New user: {mesh_node.user.long_name} (last heard {last_heard_str})") + + def print_nodes(self): + # filter nodes where last heard is more than 2 hours ago + online_nodes = self.node_info.get_online_nodes() + offline_nodes = self.node_info.get_offline_nodes() + + # print all nodes, sorted by last heard descending + logging.info(f"Online nodes: ({len(online_nodes)})") + sorted_nodes = sorted(online_nodes, key=lambda x: online_nodes[x], reverse=True) + for node_id in sorted_nodes: + if node_id == self.my_id: + continue + node = self.node_db.get_by_id(node_id) + last_heard = self.node_info.get_last_heard(node_id) + last_heard = pretty_print_last_heard(last_heard) + encoded_name = safe_encode_node_name(node.long_name) + logging.info(f"- {encoded_name} (last heard {last_heard})") + + logging.info(f"- Plus {len(offline_nodes)} offline nodes") + + def report_node_count(self, destination=None, channel_index=None): + """Report the current node count to a specific channel or destination.""" + if not self.init_complete or not self.interface: + logging.warning("Skipping node count report: interface not ready.") + return + + if channel_index is None: + channel_index = get_env_int('CHANNEL_FOR_NODE_TOTAL_BROADCAST', 2) + + online_nodes = self.node_info.get_online_nodes() + count = len(online_nodes) + + if count == 0: + message = "Warning MTEK cant see any nodes" + self.last_report_zero = True + else: + message = f"MTEK has a node count of {count}" + self.last_report_zero = False + + logging.info(f"Reporting node count: {message}") + try: + if destination: + self.interface.sendText(message, destinationId=destination, wantAck=True) + else: + self.interface.sendText(message, channelIndex=channel_index, wantAck=True) + except (OSError, ConnectionError) as e: + logging.error(f"Network failure reporting node count: {e}", exc_info=True) + except Exception as e: + logging.error(f"Unexpected error reporting node count: {e}", exc_info=True) + + def check_for_zero_nodes(self): + """Checks if the node count is zero and alerts immediately if it transitioned to zero.""" + if not self.init_complete or not self.interface: + return + + online_nodes = self.node_info.get_online_nodes() + count = len(online_nodes) + + if count == 0 and not self.last_report_zero: + logging.warning("Immediate alert: Node count dropped to zero!") + self.report_node_count() + elif count > 0: + # Reset flag so we can alert again if it drops to zero later + self.last_report_zero = False + + def get_global_context(self): + return { + 'nodes': self.node_db.list_nodes(), + 'online_nodes': self.node_info.get_online_nodes(), + 'offline_nodes': self.node_info.get_offline_nodes(), + } + + def start_scheduler(self): + schedule.every().day.at("00:00").do(self.node_info.reset_packets_today) + if get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True): + report_frequency = get_env_int('FREQUENCY_OF_NODE_REPORTS', 3) + schedule.every(report_frequency).hours.do(self.report_node_count) + schedule.every(1).minutes.do(self.check_for_zero_nodes) + while True: + schedule.run_pending() + try: + time.sleep(1) + except KeyboardInterrupt: + return + + def get_node_by_short_name(self, short_name: str) -> MeshNode.User | None: + for node in self.node_db.list_nodes(): + if node.short_name.lower() == short_name.lower(): + return node + return None diff --git a/src/persistence/__init__.py b/src/persistence/__init__.py index a7d2976..f5209c9 100644 --- a/src/persistence/__init__.py +++ b/src/persistence/__init__.py @@ -1,20 +1,27 @@ -import abc -import logging -from pathlib import Path - - -class BaseSqlitePersistenceStore(abc.ABC): - db_path: Path - - def __init__(self, db_path: str): - self.db_path = Path(db_path) - self._initialize_db() - if self.db_path.is_relative_to(Path.cwd()): - path_string = self.db_path.relative_to(Path.cwd()) - else: - path_string = self.db_path - logging.info(f"Connected to {self.__class__.__name__} DB at {path_string}") - - @abc.abstractmethod - def _initialize_db(self): - pass +import abc +import logging +import sqlite3 +import threading +from pathlib import Path + + +class BaseSqlitePersistenceStore(abc.ABC): + db_path: Path + + def __init__(self, db_path: str): + self.db_path = Path(db_path) + self._lock = threading.RLock() + self._initialize_db() + if self.db_path.is_relative_to(Path.cwd()): + path_string = self.db_path.relative_to(Path.cwd()) + else: + path_string = self.db_path + logging.info(f"Connected to {self.__class__.__name__} DB at {path_string}") + + def _get_connection(self): + """Returns a thread-safe sqlite3 connection.""" + return sqlite3.connect(self.db_path, check_same_thread=False) + + @abc.abstractmethod + def _initialize_db(self): + pass diff --git a/src/persistence/commands_logger.py b/src/persistence/commands_logger.py index ca57d89..09edc5e 100644 --- a/src/persistence/commands_logger.py +++ b/src/persistence/commands_logger.py @@ -37,7 +37,7 @@ def get_responder_history(self, since: datetime, sender_id: str = None) -> pd.Da class SqliteCommandLogger(AbstractCommandLogger, BaseSqlitePersistenceStore): def _initialize_db(self): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS command_log ( @@ -70,7 +70,7 @@ def log_command(self, sender_id: str, command_instance, message: str) -> None: base_cmd, subcommands, args = command_instance.get_command_for_logging(message) subcommands_str = ' '.join(subcommands) if subcommands else None - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO command_log (sender_id, base_command, sub_commands, args, timestamp, handler_class) @@ -80,7 +80,7 @@ def log_command(self, sender_id: str, command_instance, message: str) -> None: conn.commit() def log_responder_handled(self, sender_id: str, responder_instance, message_text: str) -> None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO responder_log (sender_id, message, timestamp, responder_class) @@ -89,7 +89,7 @@ def log_responder_handled(self, sender_id: str, responder_instance, message_text conn.commit() def log_unknown_request(self, sender_id: str, message: str) -> None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO unknown_requests (sender_id, message, timestamp) @@ -98,7 +98,7 @@ def log_unknown_request(self, sender_id: str, message: str) -> None: conn.commit() def get_command_history(self, since: datetime, sender_id: str = None) -> pd.DataFrame: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() if sender_id: cursor.execute(''' @@ -114,7 +114,7 @@ def get_command_history(self, since: datetime, sender_id: str = None) -> pd.Data return pd.DataFrame(rows, columns=['sender_id', 'base_command', 'timestamp']) def get_unknown_command_history(self, since: datetime, sender_id: str = None) -> pd.DataFrame: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() if sender_id: cursor.execute(''' @@ -130,7 +130,7 @@ def get_unknown_command_history(self, since: datetime, sender_id: str = None) -> return pd.DataFrame(rows, columns=['sender_id', 'message', 'timestamp']) def get_responder_history(self, since: datetime, sender_id: str = None) -> pd.DataFrame: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() if sender_id: cursor.execute(''' diff --git a/src/persistence/node_db.py b/src/persistence/node_db.py index 955cb23..e6d3726 100644 --- a/src/persistence/node_db.py +++ b/src/persistence/node_db.py @@ -117,7 +117,7 @@ def get_device_metrics_log(self, node_id: str, start: datetime, end: datetime) - class SqliteNodeDB(BaseSqlitePersistenceStore, AbstractNodeDB): def _initialize_db(self): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS nodes ( @@ -156,7 +156,7 @@ def _initialize_db(self): conn.commit() def store_user(self, node_user: MeshNode.User): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT OR REPLACE INTO nodes (id, short_name, long_name, macaddr, hw_model, public_key) @@ -166,7 +166,7 @@ def store_user(self, node_user: MeshNode.User): conn.commit() def store_position(self, node_id: str, position: MeshNode.Position): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO positions (node_id, logged_time, reported_time, latitude, longitude, altitude, location_source) @@ -176,7 +176,7 @@ def store_position(self, node_id: str, position: MeshNode.Position): conn.commit() def store_device_metrics(self, node_id: str, device_metrics: MeshNode.DeviceMetrics): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' INSERT INTO device_metrics (node_id, logged_time, battery_level, voltage, channel_utilization, air_util_tx, uptime_seconds) @@ -186,7 +186,7 @@ def store_device_metrics(self, node_id: str, device_metrics: MeshNode.DeviceMetr conn.commit() def get_by_id(self, node_id: str) -> MeshNode.User | None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT id, short_name, long_name, macaddr, hw_model, public_key FROM nodes WHERE id = ?', (node_id,)) @@ -197,7 +197,7 @@ def get_by_id(self, node_id: str) -> MeshNode.User | None: return None def get_by_short_name(self, short_name: str) -> MeshNode.User | None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute( 'SELECT id, short_name, long_name, macaddr, hw_model, public_key FROM nodes WHERE short_name = ? COLLATE NOCASE', @@ -209,7 +209,7 @@ def get_by_short_name(self, short_name: str) -> MeshNode.User | None: return None def list_nodes(self) -> list[MeshNode.User]: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT id, short_name, long_name, macaddr, hw_model, public_key FROM nodes') rows = cursor.fetchall() @@ -217,7 +217,7 @@ def list_nodes(self) -> list[MeshNode.User]: hw_model=row[4], public_key=row[5]) for row in rows] def get_last_position(self, node_id: str) -> MeshNode.Position | None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT logged_time, reported_time, latitude, longitude, altitude, location_source @@ -234,7 +234,7 @@ def get_last_position(self, node_id: str) -> MeshNode.Position | None: def get_position_log(self, node_id: str, start: datetime, end: datetime) -> list[ MeshNode.Position]: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT logged_time, reported_time, latitude, longitude, altitude, location_source @@ -247,7 +247,7 @@ def get_position_log(self, node_id: str, start: datetime, end: datetime) -> list altitude=row[4], location_source=row[5]) for row in rows] def get_last_device_metrics(self, node_id: str) -> MeshNode.DeviceMetrics | None: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT logged_time, battery_level, voltage, channel_utilization, air_util_tx, uptime_seconds @@ -264,7 +264,7 @@ def get_last_device_metrics(self, node_id: str) -> MeshNode.DeviceMetrics | None def get_device_metrics_log(self, node_id: str, start: datetime, end: datetime) -> list[ MeshNode.DeviceMetrics]: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT logged_time, battery_level, voltage, channel_utilization, air_util_tx, uptime_seconds diff --git a/src/persistence/user_prefs.py b/src/persistence/user_prefs.py index 5888fbd..af3bc70 100644 --- a/src/persistence/user_prefs.py +++ b/src/persistence/user_prefs.py @@ -51,7 +51,7 @@ def persist_user_prefs(self, user_id: str, user_prefs: UserPrefs): class SqliteUserPrefsPersistence(AbstractUserPrefsPersistence, BaseSqlitePersistenceStore): def _initialize_db(self): - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS user_prefs ( @@ -66,7 +66,7 @@ def _initialize_db(self): conn.commit() def get_user_prefs(self, user_id: str) -> UserPrefs: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: # Fetch the data cursor = conn.cursor() cursor.execute(''' @@ -91,7 +91,7 @@ def get_user_prefs(self, user_id: str) -> UserPrefs: return user_prefs def persist_user_prefs(self, user_id: str, user_prefs: UserPrefs) -> UserPrefs: - with sqlite3.connect(self.db_path) as conn: + with self._lock, self._get_connection() as conn: cursor = conn.cursor() for key, preference in user_prefs.__dict__.items(): if key == 'user_id': diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index cdf3616..a2c05f5 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -1,300 +1,259 @@ -import socket -import select -import threading -import logging -import time -from collections import deque - -class TcpProxy: - def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): - self.target_host = target_host - self.target_port = int(target_port) - self.listen_host = listen_host - self.listen_port = int(listen_port) - self.server_socket = None - self.target_socket = None - - self.clients = [] - self.lock = threading.RLock() - - self.running = False - - # Handshake: The first 50 packets from a fresh radio connection - self.handshake_packets = [] - self.handshake_max_count = 50 - - # History: The last 50 packets seen - self.rolling_packets = deque(maxlen=50) - - # Buffer for incoming raw bytes from the radio - self.in_buffer = b'' - - self.last_target_activity = time.time() - self.reconnecting = False - - def start(self): - self.running = True - self.thread = threading.Thread(target=self._run) - self.thread.daemon = True - self.thread.start() - - def stop(self): - self.running = False - self._disconnect_all_clients() - if self.server_socket: - try: self.server_socket.close() - except: pass - if self.target_socket: - try: self.target_socket.close() - except: pass - - def get_status(self): - if not self.running: - return "Proxy: Offline" - - silence = time.time() - self.last_target_activity - with self.lock: - client_count = len(self.clients) - cached_count = len(self.handshake_packets) + len(self.rolling_packets) - - state = "Reconnecting" if self.reconnecting else ("Online" if self.target_socket else "Offline") - - return { - "state": state, - "connected": self.target_socket is not None and not self.reconnecting, - "clients": client_count, - "silence_secs": int(silence), - "cached_packets": cached_count - } - - def _disconnect_all_clients(self): - """Force all clients to disconnect so they can re-sync with a new radio session""" - with self.lock: - for sock in self.clients: - try: sock.close() - except: pass - self.clients = [] - logging.info("Disconnected all proxy clients to force re-sync.") - - def _connect_to_target(self): - """Helper to connect to radio with Keep-Alives (Non-blocking retry)""" - # Clear state for new connection - self.handshake_packets = [] - self.in_buffer = b'' - self._disconnect_all_clients() - self.reconnecting = True - - try: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(5.0) # 5s timeout for connection attempt - sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) - try: - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3) - except: pass - - sock.connect((self.target_host, self.target_port)) - sock.settimeout(10.0) # 10s timeout for all operations - self.target_socket = sock - self.last_target_activity = time.time() - self.reconnecting = False - logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") - return True - except Exception as e: - logging.error(f"Failed to connect to target ({self.target_host}): {e}") - self.target_socket = None - return False - - def _process_radio_data(self, data): - """Frames raw bytes into Meshtastic packets and caches them""" - self.in_buffer += data - - while len(self.in_buffer) >= 4: - if self.in_buffer[0:2] != b'\x94\xc3': - idx = self.in_buffer.find(b'\x94\xc3') - if idx == -1: - self.in_buffer = b'' - break - self.in_buffer = self.in_buffer[idx:] - continue - - length = (self.in_buffer[2] << 8) | self.in_buffer[3] - total_len = length + 4 - - if len(self.in_buffer) < total_len: - break - - packet = self.in_buffer[:total_len] - self.in_buffer = self.in_buffer[total_len:] - - with self.lock: - if len(self.handshake_packets) < self.handshake_max_count: - self.handshake_packets.append(packet) - self.rolling_packets.append(packet) - targets = self.clients[:] - - for client_sock in targets: - try: - # logging.debug(f"Forwarding packet to {client_sock.getpeername()}") - client_sock.sendall(packet) - except Exception as e: - logging.debug(f"Failed to forward packet to client: {e}") - self._remove_client(client_sock) - - def _remove_client(self, sock): - try: - addr = sock.getpeername() - logging.info(f"--- PROXY: Removing client {addr}") - except: - logging.info("--- PROXY: Removing unknown client") - - with self.lock: - if sock in self.clients: - self.clients.remove(sock) - try: sock.close() - except: pass - - with self.lock: - logging.info(f"--- PROXY: Remaining clients: {len(self.clients)}") - - def _run(self): - logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") - - self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - try: - self.server_socket.bind((self.listen_host, self.listen_port)) - except Exception as e: - logging.error(f"Failed to bind proxy port {self.listen_port}: {e}") - self.running = False - return - - self.server_socket.listen(5) - - last_heartbeat_log = time.time() - last_reconnect_attempt = 0 - watchdog_timeout = 300.0 - - while self.running: - current_time = time.time() - - # Reconnection logic (non-blocking) - if not self.target_socket or self.reconnecting: - if current_time - last_reconnect_attempt > 10.0: - last_reconnect_attempt = current_time - self._connect_to_target() - - # Sleep a bit to not peg CPU while radio is down - if not self.target_socket: - time.sleep(1.0) - - try: - with self.lock: - client_socks = [s for s in self.clients if s.fileno() != -1] - - inputs = [self.server_socket] + client_socks - if self.target_socket and not self.reconnecting: - inputs.append(self.target_socket) - - readable, _, _ = select.select(inputs, [], [], 1.0) - except Exception as e: - logging.error(f"Select error: {e}") - time.sleep(0.5) - continue - - # Heartbeat Logging - if current_time - last_heartbeat_log > 60.0: - with self.lock: - client_count = len(self.clients) - client_info = [] - for s in self.clients: - try: - peer = s.getpeername() - client_info.append(f"{peer[0]}:{peer[1]}") - except: - client_info.append("unknown") - - status = "Connected" if self.target_socket and not self.reconnecting else "RECONNECTING" - silence = current_time - self.last_target_activity - logging.info(f"Proxy Heartbeat: {status}. Last radio data {silence:.1f}s ago. Clients: {client_count} ({', '.join(client_info)})") - last_heartbeat_log = current_time - - # Watchdog: Force reconnect if silence is too long on an "active" connection - if self.target_socket and not self.reconnecting: - if current_time - self.last_target_activity > watchdog_timeout: - logging.warning(f"Watchdog: No data from radio for {watchdog_timeout}s. Forcing reconnect...") - try: self.target_socket.close() - except: pass - self.target_socket = None # Trigger reconnect logic - - for sock in readable: - if sock is self.server_socket: - try: - client_socket, addr = self.server_socket.accept() - client_socket.settimeout(10.0) # 10s timeout for client sends - logging.info(f"+++ PROXY: New connection accepted from {addr}") - - with self.lock: - self.clients.append(client_socket) - logging.info(f"--- PROXY: Total active clients now: {len(self.clients)}") - h_snapshot = list(self.handshake_packets) - r_snapshot = list(self.rolling_packets) - - def replay(target_sock, handshake, history, client_addr): - if client_addr[0] in ('127.0.0.1', 'localhost'): - return - try: - time.sleep(2.0) - for p in handshake: - target_sock.sendall(p) - time.sleep(0.05) - for p in history: - target_sock.sendall(p) - time.sleep(0.01) - logging.info(f"Replayed {len(handshake) + len(history)} packets to {client_addr}") - except Exception as e: - self._remove_client(target_sock) - - threading.Thread(target=replay, args=(client_socket, h_snapshot, r_snapshot, addr), daemon=True).start() - - except Exception as e: - logging.error(f"Error accepting connection: {e}") - - elif self.target_socket and sock is self.target_socket: - self.last_target_activity = time.time() - try: - data = self.target_socket.recv(16384) - if not data: - logging.warning("Radio closed connection. Triggering re-sync...") - self.target_socket.close() - self.target_socket = None - break - self._process_radio_data(data) - except Exception as e: - logging.error(f"Error reading from radio: {e}") - self.target_socket.close() - self.target_socket = None - - else: - # Data from a client forwarded to radio - try: - data = sock.recv(16384) - if not data: - self._remove_client(sock) - elif self.target_socket and not self.reconnecting: - try: - chunk_size = 512 - for i in range(0, len(data), chunk_size): - self.target_socket.sendall(data[i:i+chunk_size]) - time.sleep(0.01) - except Exception as e: - logging.error(f"Error sending to radio: {e}") - try: self.target_socket.close() - except: pass - self.target_socket = None - except Exception as e: - logging.debug(f"Error receiving from client: {e}") - self._remove_client(sock) - - self.stop() +import asyncio +import logging +import time +from collections import deque +import threading + +class TcpProxy: + def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): + self.target_host = target_host + self.target_port = int(target_port) + self.listen_host = listen_host + self.listen_port = int(listen_port) + + self.server = None + self.target_reader = None + self.target_writer = None + + self.clients = set() + + self.running = False + self.loop = None + self.thread = None + + self.handshake_packets = [] + self.handshake_max_count = 50 + self.rolling_packets = deque(maxlen=50) + + self.last_target_activity = time.time() + self.reconnecting = False + + def start(self): + self.running = True + self.thread = threading.Thread(target=self._run_loop, daemon=True) + self.thread.start() + + def stop(self): + self.running = False + if self.loop: + self.loop.call_soon_threadsafe(self._stop_loop) + + def _stop_loop(self): + if self.server: + self.server.close() + for writer in self.clients: + try: writer.close() + except: pass + if self.target_writer: + try: self.target_writer.close() + except: pass + + def get_status(self): + if not self.running: + return "Proxy: Offline" + + silence = time.time() - self.last_target_activity + client_count = len(self.clients) + cached_count = len(self.handshake_packets) + len(self.rolling_packets) + + state = "Reconnecting" if self.reconnecting else ("Online" if self.target_writer else "Offline") + + return { + "state": state, + "connected": self.target_writer is not None and not self.reconnecting, + "clients": client_count, + "silence_secs": int(silence), + "cached_packets": cached_count + } + + def _run_loop(self): + self.loop = asyncio.new_event_loop() + asyncio.set_event_loop(self.loop) + self.loop.run_until_complete(self._async_run()) + + async def _async_run(self): + logging.info(f"Starting TCP Proxy on {self.listen_host}:{self.listen_port} -> {self.target_host}:{self.target_port}") + + try: + self.server = await asyncio.start_server( + self._handle_client, self.listen_host, self.listen_port) + except Exception as e: + logging.error(f"Failed to bind proxy port {self.listen_port}: {e}") + self.running = False + return + + asyncio.create_task(self._target_connection_manager()) + asyncio.create_task(self._watchdog()) + + try: + async with self.server: + while self.running: + await asyncio.sleep(1) + except asyncio.CancelledError: + pass + finally: + self._stop_loop() + + async def _watchdog(self): + last_heartbeat_log = time.time() + while self.running: + current_time = time.time() + if self.target_writer and not self.reconnecting: + if current_time - self.last_target_activity > 300.0: + logging.warning(f"Watchdog: No data from radio for 300s. Forcing reconnect...") + try: self.target_writer.close() + except: pass + self.target_reader = None + self.target_writer = None + + if current_time - last_heartbeat_log > 60.0: + client_count = len(self.clients) + status = "Connected" if self.target_writer and not self.reconnecting else "RECONNECTING" + silence = current_time - self.last_target_activity + logging.info(f"Proxy Heartbeat: {status}. Last radio data {silence:.1f}s ago. Clients: {client_count}") + last_heartbeat_log = current_time + + await asyncio.sleep(5) + + async def _target_connection_manager(self): + while self.running: + if self.target_writer is None or self.target_reader is None: + self.reconnecting = True + self._disconnect_all_clients() + self.handshake_packets.clear() + self.rolling_packets.clear() + + try: + reader, writer = await asyncio.wait_for( + asyncio.open_connection(self.target_host, self.target_port), + timeout=5.0 + ) + self.target_reader = reader + self.target_writer = writer + self.last_target_activity = time.time() + self.reconnecting = False + logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") + asyncio.create_task(self._read_from_target()) + except Exception as e: + logging.error(f"Failed to connect to target ({self.target_host}): {e}") + await asyncio.sleep(5.0) + else: + await asyncio.sleep(1) + + def _disconnect_all_clients(self): + for writer in list(self.clients): + try: writer.close() + except: pass + self.clients.clear() + logging.info("Disconnected all proxy clients to force re-sync.") + + async def _read_from_target(self): + reader = self.target_reader + writer = self.target_writer + + in_buffer = b'' + while self.running and self.target_reader == reader: + try: + data = await reader.read(16384) + if not data: + logging.warning("Radio closed connection. Triggering re-sync...") + break + self.last_target_activity = time.time() + + in_buffer += data + + while len(in_buffer) >= 4: + if in_buffer[0:2] != b'\x94\xc3': + idx = in_buffer.find(b'\x94\xc3') + if idx == -1: + in_buffer = b'' + break + in_buffer = in_buffer[idx:] + continue + + length = (in_buffer[2] << 8) | in_buffer[3] + total_len = length + 4 + + if len(in_buffer) < total_len: + break + + packet = in_buffer[:total_len] + in_buffer = in_buffer[total_len:] + + if len(self.handshake_packets) < self.handshake_max_count: + self.handshake_packets.append(packet) + self.rolling_packets.append(packet) + + for client_writer in list(self.clients): + try: + client_writer.write(packet) + await client_writer.drain() + except Exception as e: + logging.debug(f"Failed to forward packet to client: {e}") + self._remove_client(client_writer) + except Exception as e: + logging.error(f"Error reading from radio: {e}") + break + + if self.target_writer == writer: + try: writer.close() + except: pass + self.target_writer = None + self.target_reader = None + + async def _handle_client(self, reader, writer): + addr = writer.get_extra_info('peername') + logging.info(f"+++ PROXY: New connection accepted from {addr}") + self.clients.add(writer) + + h_snapshot = list(self.handshake_packets) + r_snapshot = list(self.rolling_packets) + + if addr[0] not in ('127.0.0.1', 'localhost'): + try: + await asyncio.sleep(2.0) + for p in h_snapshot: + writer.write(p) + await writer.drain() + await asyncio.sleep(0.05) + for p in r_snapshot: + writer.write(p) + await writer.drain() + await asyncio.sleep(0.01) + logging.info(f"Replayed {len(h_snapshot) + len(r_snapshot)} packets to {addr}") + except Exception as e: + self._remove_client(writer) + return + + while self.running: + try: + data = await reader.read(16384) + if not data: + break + if self.target_writer and not self.reconnecting: + try: + self.target_writer.write(data) + await self.target_writer.drain() + except Exception as e: + logging.error(f"Error sending to radio: {e}") + try: self.target_writer.close() + except: pass + self.target_writer = None + except Exception as e: + logging.debug(f"Error receiving from client: {e}") + break + + self._remove_client(writer) + + def _remove_client(self, writer): + addr = None + try: + addr = writer.get_extra_info('peername') + logging.info(f"--- PROXY: Removing client {addr}") + except: + logging.info("--- PROXY: Removing unknown client") + + if writer in self.clients: + self.clients.remove(writer) + try: writer.close() + except: pass diff --git a/test/test_tcp_proxy.py b/test/test_tcp_proxy.py index 1efd43d..d8a65a6 100644 --- a/test/test_tcp_proxy.py +++ b/test/test_tcp_proxy.py @@ -1,89 +1,48 @@ -import unittest -from unittest.mock import MagicMock, patch -import threading -import time -import socket -from src.tcp_proxy import TcpProxy - -class TestTcpProxy(unittest.TestCase): - def setUp(self): - self.proxy = TcpProxy("127.0.0.1", 4403, "127.0.0.1", 4404) - - def test_lock_is_rlock(self): - # threading.RLock() might be a factory function returning a platform-specific class - self.assertTrue(hasattr(self.proxy.lock, 'acquire') and hasattr(self.proxy.lock, '_count') or isinstance(self.proxy.lock, type(threading.RLock()))) - - def test_remove_client_no_deadlock(self): - # Mock a client socket - mock_client = MagicMock() - mock_client.getpeername.return_value = ("127.0.0.1", 12345) - - self.proxy.clients.append(mock_client) - - # This should not deadlock now - self.proxy._remove_client(mock_client) - - self.assertEqual(len(self.proxy.clients), 0) - mock_client.close.assert_called_once() - - def test_process_radio_data_deadlock_fix(self): - # This test simulates the exact deadlock condition: - # _process_radio_data holds the lock and calls _remove_client (via sendall failure) - # which tries to acquire the lock again. - - mock_client = MagicMock() - mock_client.getpeername.return_value = ("127.0.0.1", 12345) - # Force sendall to fail - mock_client.sendall.side_effect = Exception("Broken pipe") - - self.proxy.clients.append(mock_client) - - # Valid Meshtastic packet header \x94\xc3 + length 0001 + 1 byte data - packet_data = b'\x94\xc3\x00\x01\x00' - - # This call should not hang - self.proxy._process_radio_data(packet_data) - - # Verify client was removed - self.assertEqual(len(self.proxy.clients), 0) - mock_client.close.assert_called_once() - - def test_get_status_thread_safety(self): - # Ensure get_status can be called while holding the lock elsewhere - self.proxy.running = True - self.proxy.target_socket = MagicMock() # To make it look "Online" - with self.proxy.lock: - status = self.proxy.get_status() - self.assertEqual(status["state"], "Online") - - @patch('socket.socket') - def test_client_socket_has_timeout(self, mock_socket_class): - # Mock the server socket instance - mock_server_sock = MagicMock() - mock_socket_class.return_value = mock_server_sock - - # Mock accept() to return a mock client socket - mock_client = MagicMock() - mock_server_sock.accept.return_value = (mock_client, ("1.2.3.4", 5555)) - - # We need to mock select.select to return the server_socket as readable - with patch('select.select') as mock_select: - # First call: return server socket as readable - # Second call: flip self.running to False to exit loop - def select_side_effect(*args, **kwargs): - if self.proxy.running: - self.proxy.running = False - return ([mock_server_sock], [], []) - return ([], [], []) - - mock_select.side_effect = select_side_effect - - self.proxy.running = True - self.proxy.target_socket = MagicMock() # Avoid reconnect logic - self.proxy._run() - - # Verify timeout was set on the client socket - mock_client.settimeout.assert_called_with(10.0) - -if __name__ == "__main__": - unittest.main() +import unittest +import asyncio +from unittest.mock import MagicMock, AsyncMock, patch +from src.tcp_proxy import TcpProxy + +class TestTcpProxy(unittest.TestCase): + def setUp(self): + self.proxy = TcpProxy("127.0.0.1", 4403, "127.0.0.1", 4404) + + def test_status_fields(self): + status = self.proxy.get_status() + self.assertIn("Offline", status) + + self.proxy.running = True + self.proxy.target_writer = MagicMock() + status = self.proxy.get_status() + self.assertEqual(status["state"], "Online") + self.assertEqual(status["clients"], 0) + + def test_remove_client(self): + mock_writer = MagicMock() + mock_writer.get_extra_info.return_value = ("127.0.0.1", 12345) + + self.proxy.clients.add(mock_writer) + self.proxy._remove_client(mock_writer) + + self.assertEqual(len(self.proxy.clients), 0) + mock_writer.close.assert_called_once() + + @patch('asyncio.start_server', new_callable=AsyncMock) + def test_async_run_binds_server(self, mock_start_server): + async def run_test(): + self.proxy.running = True + + # Cancel the watchdog and connection manager immediately to avoid hang + async def stop_soon(): + await asyncio.sleep(0.1) + self.proxy.running = False + + asyncio.create_task(stop_soon()) + await self.proxy._async_run() + + mock_start_server.assert_called_once() + + asyncio.run(run_test()) + +if __name__ == "__main__": + unittest.main() From 38f486bf2daafad3f1ba16a4f1c842879ba80440 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 10:48:54 +0000 Subject: [PATCH 51/93] Fix emoji reaction protobuf type mismatch: expect int instead of bool --- src/tcp_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 4422ce6..7487a57 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -30,7 +30,7 @@ def sendReaction( packet.decoded.portnum = portNum packet.decoded.payload = emoji_bytes packet.decoded.reply_id = messageId - packet.decoded.emoji = True + packet.decoded.emoji = ord(emoji) if isinstance(emoji, str) else 1 self._sendPacket(packet, destinationId, wantAck=wantAck, From 5cfa693e637a39ba0efbba766ff6d8670070a263 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:02:53 +0000 Subject: [PATCH 52/93] Add detailed traceroute debugging logs --- src/api/StorageAPI.py | 11 ++++++++++- src/commands/tr.py | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index 1fd232b..13d212c 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -71,13 +71,22 @@ def store_raw_packet(self, packet: dict): logging.info(f"store_raw_packet called for portnum: {packet.get('decoded', {}).get('portnum')}") # Filter out packet types that the API doesn't support or we don't want to store - ignored_ports = [345, 'ROUTING_APP', 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP'] + ignored_ports = [345, 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP'] portnum = packet.get('decoded', {}).get('portnum') if portnum in ignored_ports: return # Additional filtering for Telemetry packets to avoid API errors # The API requires either 'deviceMetrics' or 'localStats' + if portnum == 'ROUTING_APP': + from_id = packet.get('from') + logging.info(f"DEBUG: ROUTING_APP Packet from {from_id}: {packet}") + + # Log all text messages + if portnum == 'TEXT_MESSAGE_APP': + from_id = packet.get('from') + logging.info(f"DEBUG: TEXT_MESSAGE_APP Packet from {from_id}: {packet}") + if portnum == 'TELEMETRY_APP': telemetry = packet.get('decoded', {}).get('telemetry', {}) if 'deviceMetrics' not in telemetry and 'localStats' not in telemetry: diff --git a/src/commands/tr.py b/src/commands/tr.py index 660deae..92dbfd8 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -23,9 +23,10 @@ def send_reply(msg): self.reply_in_dm(packet, msg, want_ack=False) # Add a reaction to show we are working on it + logging.info(f"Adding reaction ⌛ for packet {packet.get('id')} from {packet.get('fromId')}") self.bot.interface.sendReaction("⌛", messageId=packet['id'], destinationId=packet['fromId']) - requester_id = packet['fromId'] + requester_id = packet.get('fromId') requester = self.bot.node_db.get_by_id(requester_id) requester_name = requester.long_name if requester else requester_id From 9f3215dc83b7f6c03aaa9b30e99c4a2970f05282 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:10:19 +0000 Subject: [PATCH 53/93] Add more traceroute debugging logs --- src/bot.py | 1 + src/tcp_interface.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/bot.py b/src/bot.py index 1482747..82447cd 100644 --- a/src/bot.py +++ b/src/bot.py @@ -224,6 +224,7 @@ def run_command(): def on_traceroute(self, packet, route): """Callback for when a traceroute response is received.""" + logging.info(f"on_traceroute: Received signal. Packet: {packet.get('id') if isinstance(packet, dict) else 'obj'}") def process_traceroute(): try: target_id = packet.get('fromId') diff --git a/src/tcp_interface.py b/src/tcp_interface.py index 7487a57..ab87eeb 100644 --- a/src/tcp_interface.py +++ b/src/tcp_interface.py @@ -81,6 +81,7 @@ def onResponseTraceRoute(self, packet): getattr(packet.decoded, 'traceroute', None))) logging.info(f"onResponseTraceRoute: Received traceroute response. Route data present: {route_discovery is not None}") + logging.info(f"DEBUG: Traceroute packet keys: {packet.keys() if isinstance(packet, dict) else 'not a dict'}") # Always call super to allow library internal processing (printing to stdout etc) super().onResponseTraceRoute(packet) From 8c778e2bd88d85849cfbc9e8722eaa637fb7e930 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:35:10 +0000 Subject: [PATCH 54/93] Enable INFO logging for core interfaces --- src/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index e3cd660..2a5fea3 100644 --- a/src/main.py +++ b/src/main.py @@ -15,9 +15,9 @@ stream=sys.stdout) # Set the log level for specific modules -logging.getLogger('tcp_interface').setLevel(logging.WARNING) -logging.getLogger('stream_interface').setLevel(logging.WARNING) -logging.getLogger('mesh_interface').setLevel(logging.WARNING) +logging.getLogger('tcp_interface').setLevel(logging.INFO) +logging.getLogger('stream_interface').setLevel(logging.INFO) +logging.getLogger('mesh_interface').setLevel(logging.INFO) # Now we can import the rest of our local files from src.api.StorageAPI import StorageAPIWrapper From fa7ce24f17a04fd16f984e72af972673f45ef6b1 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:38:04 +0000 Subject: [PATCH 55/93] Add packet ID and request ID tracking for traceroute --- src/bot.py | 7 +++++-- src/commands/tr.py | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/bot.py b/src/bot.py index 82447cd..d6b310a 100644 --- a/src/bot.py +++ b/src/bot.py @@ -318,8 +318,11 @@ def on_receive(self, packet: MeshPacket, interface): portnum = packet.get('decoded', {}).get('portnum', 'unknown') logging.info(f"on_receive: Incoming packet from {from_id} (Port: {portnum})") - if from_id == '!69828b98': - logging.debug(f"Received ANY packet from mte4: {packet}") + if from_id == '!69828b98' or from_id == '!6985f59c': + logging.info(f"Received ANY packet from {from_id}: {packet}") + if portnum == 'ROUTING_APP': + decoded = packet.get('decoded', {}) + logging.info(f" ROUTING_APP Detail: requestId={decoded.get('requestId')}, routing={decoded.get('routing')}") # dump the packet to disk (if enabled) dump_packet(packet) diff --git a/src/commands/tr.py b/src/commands/tr.py index 92dbfd8..f545c05 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -101,7 +101,11 @@ def send_timeout(): try: logging.info(f"Initiating traceroute to {target_id} requested by {requester_id}") # hopLimit=7 is standard max - self.bot.interface.sendTraceRoute(target_id, hopLimit=7) + p = self.bot.interface.sendTraceRoute(target_id, hopLimit=7) + if p: + logging.info(f"Sent traceroute packet to {target_id}. Packet ID: {p.id}") + else: + logging.warning(f"sendTraceRoute returned None for {target_id}") except Exception as e: logging.error(f"Failed to send traceroute to {target_id}: {e}") if target_id in self.bot.pending_traces and requester_id in self.bot.pending_traces[target_id]: From 2e9d95ffaf8dba8571c729dd4fda1efc6f1b2213 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:47:23 +0000 Subject: [PATCH 56/93] Force full traceroute even for 0-hop nodes and add logging --- src/commands/tr.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index f545c05..63d11dc 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -54,16 +54,13 @@ def send_reply(msg): hops_away = hop_start - hop_limit snr = packet.get('rxSnr', 0.0) - if hops_away == 0: - response = f"{requester_name} you are Zero Hops from me. No traceroute required!" - send_reply(response) - return - - response = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." - send_reply(response) + status_msg = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Initiating full traceroute for verification..." + logging.info(f"Detected {hops_away} hops for {target_id}. {status_msg}") + send_reply(status_msg) else: # Tracing to a different node response = f"Starting traceroute to {target_long_name} ({target_id}) for you..." + logging.info(response) send_reply(response) # Initiate actual traceroute From 414460068d382cdb7482e1d5b8f74c2da6f0b6a4 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:51:56 +0000 Subject: [PATCH 57/93] Final cleanup: Revert log levels and restore Zero Hops optimization --- src/commands/tr.py | 8 +++++++- src/main.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index 63d11dc..baae383 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -54,7 +54,13 @@ def send_reply(msg): hops_away = hop_start - hop_limit snr = packet.get('rxSnr', 0.0) - status_msg = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Initiating full traceroute for verification..." + if hops_away == 0: + response = f"{requester_name} you are Zero Hops from me. No traceroute required!" + logging.info(f"Detected 0 hops for {target_id}. {response}") + send_reply(response) + return + + status_msg = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." logging.info(f"Detected {hops_away} hops for {target_id}. {status_msg}") send_reply(status_msg) else: diff --git a/src/main.py b/src/main.py index 2a5fea3..e3cd660 100644 --- a/src/main.py +++ b/src/main.py @@ -15,9 +15,9 @@ stream=sys.stdout) # Set the log level for specific modules -logging.getLogger('tcp_interface').setLevel(logging.INFO) -logging.getLogger('stream_interface').setLevel(logging.INFO) -logging.getLogger('mesh_interface').setLevel(logging.INFO) +logging.getLogger('tcp_interface').setLevel(logging.WARNING) +logging.getLogger('stream_interface').setLevel(logging.WARNING) +logging.getLogger('mesh_interface').setLevel(logging.WARNING) # Now we can import the rest of our local files from src.api.StorageAPI import StorageAPIWrapper From 2a7038bc381a1184502f443c1cfedfc5ac4bdc5d Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:57:21 +0000 Subject: [PATCH 58/93] Consolidate traceroute response into a single message for reliability --- src/bot.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/bot.py b/src/bot.py index d6b310a..d69793b 100644 --- a/src/bot.py +++ b/src/bot.py @@ -291,18 +291,17 @@ def get_route_hops(r, key='route'): response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + # Consolidate into a single message to ensure delivery (less radio congestion) + combined_response = f"{response_out}\n{response_in}" + # Wait for radio to settle after receiving the traceroute response - time.sleep(5) + time.sleep(3) for requester_id in requesters: - logging.info(f"Sending traceroute result to {requester_id}: {response_out}") + logging.info(f"Sending consolidated traceroute result to {requester_id}:\n{combined_response}") # Use wantAck=False for result delivery to reduce congestion - self.interface.sendText(response_out, destinationId=requester_id, wantAck=False) - - # Always send the return path message for consistency - time.sleep(2) - logging.info(f"Sending traceroute result to {requester_id}: {response_in}") - self.interface.sendText(response_in, destinationId=requester_id, wantAck=False) + self.interface.sendText(combined_response, destinationId=requester_id, wantAck=False) + time.sleep(1) logging.info(f"Finished processing traceroute for {target_id}") except Exception as e: From 0ec15d7745dc884433a66c751fd44dd26d2ae178 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:58:19 +0000 Subject: [PATCH 59/93] Optimize traceroute response for 200 char limit --- src/bot.py | 56 +++++++++++++++++++----------------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/src/bot.py b/src/bot.py index d69793b..561b6f1 100644 --- a/src/bot.py +++ b/src/bot.py @@ -250,46 +250,30 @@ def get_route_hops(r, key='route'): return r.get(key, []) return getattr(r, key, []) - # 1. Format the OUTBOUND route (TO target) - route_ids = get_route_hops(route, 'route') - hops = [] - for node_id_int in route_ids: - # Convert int to !hex string - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops.append(f"{node.short_name}") - else: - hops.append(f"{node_id_str}") - - route_str = " -> ".join(hops) if hops else "Direct" - - # Append target to the end of the TO route + # Format compact routes target_node = self.node_db.get_by_id(target_id) - target_name = target_node.short_name if target_node else target_id - route_str += f" -> {target_name}" + t_name = target_node.short_name if target_node else target_id[-4:] - response_out = f"Trace TO {target_id} ({len(hops)} hops):\n{route_str}" + my_node = self.node_db.get_by_id(self.my_id) + m_name = my_node.short_name if my_node else self.my_id[-4:] + + # Outbound + route_ids = get_route_hops(route, 'route') + hops_to = [] + for nid in route_ids: + n = self.node_db.get_by_id(f"!{nid:08x}") + hops_to.append(n.short_name if n else f"{nid:08x}"[-4:]) + route_to_str = ">".join(hops_to) + (">" if hops_to else "") + t_name - # 2. Format the INBOUND route (FROM target) + # Inbound route_back_ids = get_route_hops(route, 'route_back') - hops_back = [] - for node_id_int in route_back_ids: - node_id_str = f"!{node_id_int:08x}" - node = self.node_db.get_by_id(node_id_str) - if node: - hops_back.append(f"{node.short_name}") - else: - hops_back.append(f"{node_id_str}") - - back_str = " -> ".join(hops_back) if hops_back else "Direct" - - # Append bot to the end of the FROM route - my_node = self.node_db.get_by_id(self.my_id) - my_name = my_node.short_name if my_node else self.my_id - back_str += f" -> {my_name}" - - response_in = f"Trace FROM {target_id} ({len(hops_back)} hops):\n{back_str}" + hops_fr = [] + for nid in route_back_ids: + n = self.node_db.get_by_id(f"!{nid:08x}") + hops_fr.append(n.short_name if n else f"{nid:08x}"[-4:]) + route_fr_str = ">".join(hops_fr) + (">" if hops_fr else "") + m_name + + combined_response = f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" # Consolidate into a single message to ensure delivery (less radio congestion) combined_response = f"{response_out}\n{response_in}" From fa7b2655baed69e0f82056aa9d4209198100afa4 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 11:58:31 +0000 Subject: [PATCH 60/93] Fix combined_response variable error --- src/bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bot.py b/src/bot.py index 561b6f1..a2e5863 100644 --- a/src/bot.py +++ b/src/bot.py @@ -276,7 +276,7 @@ def get_route_hops(r, key='route'): combined_response = f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" # Consolidate into a single message to ensure delivery (less radio congestion) - combined_response = f"{response_out}\n{response_in}" + # combined_response already defined above as: f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" # Wait for radio to settle after receiving the traceroute response time.sleep(3) From 8d3f22643ba33450f3d7297272a93fbf4ed00812 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 12:02:19 +0000 Subject: [PATCH 61/93] Permanently remove Zero Hops shortcut to force real trace verification --- src/commands/tr.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index baae383..33f54aa 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -54,13 +54,7 @@ def send_reply(msg): hops_away = hop_start - hop_limit snr = packet.get('rxSnr', 0.0) - if hops_away == 0: - response = f"{requester_name} you are Zero Hops from me. No traceroute required!" - logging.info(f"Detected 0 hops for {target_id}. {response}") - send_reply(response) - return - - status_msg = f"{requester_name} you are {hops_away} hops away (Signal: {snr} dB). Starting full traceroute..." + status_msg = f"{requester_name} ({hops_away} hops, {snr}dB). Starting real-time trace..." logging.info(f"Detected {hops_away} hops for {target_id}. {status_msg}") send_reply(status_msg) else: From e3a4a483dd6cab83e5dac162fa045c456784a69a Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 12:15:22 +0000 Subject: [PATCH 62/93] Fix traceroute response context and enable ACKs for reliability --- src/bot.py | 41 ++++++++++++++++++++++++++++------------- src/commands/tr.py | 26 +++++++++++++++----------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/src/bot.py b/src/bot.py index a2e5863..b47cff8 100644 --- a/src/bot.py +++ b/src/bot.py @@ -241,8 +241,17 @@ def process_traceroute(): if route is None: decoded_keys = packet.get('decoded', {}).keys() logging.warning(f"Traceroute response from {target_id} contained no route data. Decoded keys: {list(decoded_keys)}") - for requester_id in requesters: - self.interface.sendText(f"Traceroute response received from {target_id}, but no route data was provided.", destinationId=requester_id) + for ctx in requesters: + # Handle both old (string) and new (tuple) formats during transition + r_id = ctx[0] if isinstance(ctx, tuple) else ctx + is_pub = ctx[1] if isinstance(ctx, tuple) else False + c_idx = ctx[3] if isinstance(ctx, tuple) else 0 + + msg = f"Traceroute response received from {target_id}, but no route data was provided." + if is_pub: + self.interface.sendText(msg, destinationId=requesters[0][2] if isinstance(requesters[0], tuple) else "!all", channelIndex=c_idx, wantAck=True) + else: + self.interface.sendText(msg, destinationId=r_id, wantAck=True) return def get_route_hops(r, key='route'): @@ -273,19 +282,25 @@ def get_route_hops(r, key='route'): hops_fr.append(n.short_name if n else f"{nid:08x}"[-4:]) route_fr_str = ">".join(hops_fr) + (">" if hops_fr else "") + m_name + # Consolidate into a single message combined_response = f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" - # Consolidate into a single message to ensure delivery (less radio congestion) - # combined_response already defined above as: f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" - - # Wait for radio to settle after receiving the traceroute response - time.sleep(3) - - for requester_id in requesters: - logging.info(f"Sending consolidated traceroute result to {requester_id}:\n{combined_response}") - # Use wantAck=False for result delivery to reduce congestion - self.interface.sendText(combined_response, destinationId=requester_id, wantAck=False) - time.sleep(1) + # Longer wait for radio to settle after receiving the traceroute response + time.sleep(8) + + for ctx in requesters: + if isinstance(ctx, tuple): + r_id, is_pub, to_id, c_idx = ctx + dest_id = to_id if is_pub else r_id + else: + dest_id = ctx # Fallback for old pending traces + is_pub = False + c_idx = 0 + + logging.info(f"Sending consolidated traceroute result to {dest_id} (Pub: {is_pub}):\n{combined_response}") + # Use wantAck=True for result delivery to ensure it gets through + self.interface.sendText(combined_response, destinationId=dest_id, channelIndex=c_idx, wantAck=True) + time.sleep(2) logging.info(f"Finished processing traceroute for {target_id}") except Exception as e: diff --git a/src/commands/tr.py b/src/commands/tr.py index 33f54aa..a40bfd9 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -18,9 +18,9 @@ def handle_packet(self, packet: MeshPacket) -> None: def send_reply(msg): if is_public: - self.reply_in_channel(packet, msg, want_ack=False) + self.reply_in_channel(packet, msg, want_ack=True) else: - self.reply_in_dm(packet, msg, want_ack=False) + self.reply_in_dm(packet, msg, want_ack=True) # Add a reaction to show we are working on it logging.info(f"Adding reaction ⌛ for packet {packet.get('id')} from {packet.get('fromId')}") @@ -63,20 +63,24 @@ def send_reply(msg): logging.info(response) send_reply(response) - # Initiate actual traceroute - # Map target_id -> list of requester_ids + # Store for the callback if target_id not in self.bot.pending_traces: self.bot.pending_traces[target_id] = [] - if requester_id not in self.bot.pending_traces[target_id]: - self.bot.pending_traces[target_id].append(requester_id) + # Store context: (requester_id, is_public, to_id, channel_index) + to_id = packet.get('toId') + channel_index = packet.get('channel', 0) + context = (requester_id, is_public, to_id, channel_index) + + if context not in self.bot.pending_traces[target_id]: + self.bot.pending_traces[target_id].append(context) # Start a timeout timer (120 seconds) def check_timeout(): time.sleep(120) - if target_id in self.bot.pending_traces and requester_id in self.bot.pending_traces[target_id]: - # Remove this specific requester from the pending list - self.bot.pending_traces[target_id].remove(requester_id) + if target_id in self.bot.pending_traces: + # Find and remove this specific context from the pending list + self.bot.pending_traces[target_id] = [c for c in self.bot.pending_traces[target_id] if c[0] != requester_id] # If no more requesters for this target, clean up the key if not self.bot.pending_traces[target_id]: del self.bot.pending_traces[target_id] @@ -87,9 +91,9 @@ def check_timeout(): # Send the timeout message in a separate thread to avoid blocking the timer/interface def send_timeout(): if is_public: - self.message_in_channel(packet.get('channel', 0), timeout_msg, want_ack=False) + self.message_in_channel(packet.get('channel', 0), timeout_msg, want_ack=True) else: - self.message_in_dm(requester_id, timeout_msg, want_ack=False) + self.message_in_dm(requester_id, timeout_msg, want_ack=True) threading.Thread(target=send_timeout, daemon=True).start() From 80e75cc735774338170fb9656dca2284fceef266 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 12:20:51 +0000 Subject: [PATCH 63/93] Add hop counts to consolidated traceroute message --- src/bot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bot.py b/src/bot.py index b47cff8..fc9b897 100644 --- a/src/bot.py +++ b/src/bot.py @@ -282,8 +282,8 @@ def get_route_hops(r, key='route'): hops_fr.append(n.short_name if n else f"{nid:08x}"[-4:]) route_fr_str = ">".join(hops_fr) + (">" if hops_fr else "") + m_name - # Consolidate into a single message - combined_response = f"!tr {t_name}:\nTO: {route_to_str}\nFR: {route_fr_str}" + # Consolidate into a single message with hop counts + combined_response = f"!tr {t_name}:\nTO({len(route_ids)}h): {route_to_str}\nFR({len(route_back_ids)}h): {route_fr_str}" # Longer wait for radio to settle after receiving the traceroute response time.sleep(8) From c2e1a67b8079d679777a1315090015c801501b72 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 13:01:59 +0000 Subject: [PATCH 64/93] Update traceroute to reply in DM only and use thumbs up for public requests --- src/commands/tr.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/commands/tr.py b/src/commands/tr.py index a40bfd9..29b5ce7 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -17,14 +17,14 @@ def handle_packet(self, packet: MeshPacket) -> None: is_public = packet.get('toId') == '^all' or 'channel' in packet def send_reply(msg): - if is_public: - self.reply_in_channel(packet, msg, want_ack=True) - else: - self.reply_in_dm(packet, msg, want_ack=True) + # Always reply in DM + self.reply_in_dm(packet, msg, want_ack=True) - # Add a reaction to show we are working on it - logging.info(f"Adding reaction ⌛ for packet {packet.get('id')} from {packet.get('fromId')}") - self.bot.interface.sendReaction("⌛", messageId=packet['id'], destinationId=packet['fromId']) + # Add a reaction (thumbs up for public to acknowledge without spamming, hourglass for DM) + reaction_emoji = "👍" if is_public else "⌛" + reaction_dest = packet.get('toId') if is_public else packet.get('fromId') + logging.info(f"Adding reaction {reaction_emoji} for packet {packet.get('id')} to {reaction_dest}") + self.bot.interface.sendReaction(reaction_emoji, messageId=packet['id'], destinationId=reaction_dest) requester_id = packet.get('fromId') requester = self.bot.node_db.get_by_id(requester_id) @@ -67,10 +67,10 @@ def send_reply(msg): if target_id not in self.bot.pending_traces: self.bot.pending_traces[target_id] = [] - # Store context: (requester_id, is_public, to_id, channel_index) + # Store context: force is_public=False so bot.py always replies via DM to_id = packet.get('toId') channel_index = packet.get('channel', 0) - context = (requester_id, is_public, to_id, channel_index) + context = (requester_id, False, to_id, channel_index) if context not in self.bot.pending_traces[target_id]: self.bot.pending_traces[target_id].append(context) @@ -90,10 +90,7 @@ def check_timeout(): # Send the timeout message in a separate thread to avoid blocking the timer/interface def send_timeout(): - if is_public: - self.message_in_channel(packet.get('channel', 0), timeout_msg, want_ack=True) - else: - self.message_in_dm(requester_id, timeout_msg, want_ack=True) + self.message_in_dm(requester_id, timeout_msg, want_ack=True) threading.Thread(target=send_timeout, daemon=True).start() From e3bb6a1d853f612f1640a02ffddbc9635441aa17 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 5 Mar 2026 13:05:02 +0000 Subject: [PATCH 65/93] Ignore ROUTING_APP in StorageAPI to prevent backend API errors --- src/api/StorageAPI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index 13d212c..d64a98c 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -71,7 +71,7 @@ def store_raw_packet(self, packet: dict): logging.info(f"store_raw_packet called for portnum: {packet.get('decoded', {}).get('portnum')}") # Filter out packet types that the API doesn't support or we don't want to store - ignored_ports = [345, 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP'] + ignored_ports = [345, 'TRACEROUTE_APP', 'ADMIN_APP', 'NEIGHBORINFO_APP', 'ROUTING_APP'] portnum = packet.get('decoded', {}).get('portnum') if portnum in ignored_ports: return From 266c9b9205fc1582890f1db965d00d1f633c0ffe Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 5 Mar 2026 14:40:02 +0000 Subject: [PATCH 66/93] chore: add macos files to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 20ee143..0fa11a9 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,9 @@ all_state.json data/ docker-data/ +# OS files +.DS_Store + ### Python template # Byte-compiled / optimized / DLL files __pycache__/ From fc359e06f7d238d21821c5eba5dd05793ed5d2ce Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 5 Mar 2026 14:40:30 +0000 Subject: [PATCH 67/93] ci: run unit tests on more modern python versions --- .github/workflows/unit-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index ce5eca1..5619db2 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -16,7 +16,7 @@ jobs: strategy: matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13" ] + python-version: [ "3.12", "3.13", "3.14" ] steps: - name: Checkout Repository From d15074133851da1c8c52f19780813e55ba6c3790 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 6 Mar 2026 09:25:16 +0000 Subject: [PATCH 68/93] Enhance logging output and documentation updates --- README.md | 12 +++++------- src/bot.py | 20 ++++++++++---------- src/commands/nodes.py | 5 ----- src/commands/tr.py | 11 +++++------ 4 files changed, 20 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 4e3e424..333e607 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,7 @@ The bot listens for messages and responds to commands. You can interact with it | `!help` | Displays a list of available commands | | `!hello` | Displays information about the bot | | `!ping` | Responds with "Pong!" | -| `!nodes` | Displays a list of connected nodes, stats, etc | -| `!nodes totals` | Manually triggers a node count report | +| `!nodes busy` | Displays a summary of the busiest nodes | | `!whoami` | Displays information about the sender | | `!tr` | Performs a traceroute to the sender (outbound & inbound) | | `!tr ` | Performs a traceroute to a specific node by its short name from management node (outbound & inbound) | @@ -141,11 +140,10 @@ The bot listens for messages and responds to commands. You can interact with it ## Features -### Node Count Reporting -The bot monitors mesh visibility and provides automated reporting: -- **Scheduled Reports:** Every 3 hours, a status update is sent to a configurable channel (defaulting to Channel 2) with the current online node count. This can be adjusted using `CHANNEL_FOR_NODE_TOTAL_BROADCAST` in your `.env` file. -- **Immediate Alerts:** If the visible node count drops to zero, the bot sends an immediate warning. -- **Manual Check:** Use `!nodes totals` to get an instant report via DM. +### Usage Statistics +- **Busy Nodes:** Use `!nodes busy` to see a summary of the most active nodes on your mesh. +- **Detailed Stats:** Use `!nodes busy detailed` for an in-depth breakdown of packet types for those busiest nodes. +- **Specific Node:** Use `!nodes busy ` to see stats for a particular node. ### Enhanced Connectivity (TCP Proxy) The bot now includes a built-in TCP proxy to manage the connection to the Meshtastic node. This improves stability and allows for automatic reconnection if the radio connection is lost. diff --git a/src/bot.py b/src/bot.py index fc9b897..1e130dc 100644 --- a/src/bot.py +++ b/src/bot.py @@ -144,7 +144,7 @@ def handle_private_message(self, packet: MeshPacket): from_id = packet['fromId'] sender = self.node_db.get_by_id(from_id) - logging.info(f"Received private message: '{message}' from {sender.long_name if sender else from_id}") + logging.info(f"✉️ [PRIVATE MSG] '{message}' from {sender.long_name if sender else from_id}") words = message.split() command_name = words[0] @@ -154,11 +154,11 @@ def handle_private_message(self, packet: MeshPacket): def run_command(): try: - logging.info(f"Running command {command_name} in thread for {from_id}") + logging.info(f"🤖 [BOT CMD] Running private command {command_name} in thread for {from_id}") command_instance.handle_packet(packet) - logging.info(f"Finished command {command_name} for {from_id}") + logging.info(f"✅ [BOT CMD] Finished private command {command_name} for {from_id}") except Exception as e: - logging.error(f"Error handling private command {command_name}: {e}", exc_info=True) + logging.error(f"❌ [BOT CMD] Error handling private command {command_name}: {e}", exc_info=True) threading.Thread(target=run_command, daemon=True).start() else: @@ -184,7 +184,7 @@ def handle_public_message(self, packet: MeshPacket): sender_name = sender.long_name if sender else from_id channel_name = self.get_channel_name(packet) - logging.info(f"Received group message on channel '{channel_name}' from {sender_name}: {message}") + logging.info(f"📢 [GROUP MSG] Channel '{channel_name}' from {sender_name}: {message}") # Allow certain commands in public channels words = message.split() @@ -193,17 +193,17 @@ def handle_public_message(self, packet: MeshPacket): if command_name in ["!tr", "!ping", "!hello", "!nodes", "!status", "!whoami"]: env_var_name = f"ENABLE_COMMAND_{command_name.lstrip('!').upper()}" if get_env_bool(env_var_name, True): - logging.info(f"Received public {command_name} from {sender_name}") + logging.info(f"🤖 [BOT CMD] Received public {command_name} from {sender_name}") command_instance = CommandFactory.create_command(command_name, self) if command_instance: def run_command(): try: - logging.info(f"Running public command {command_name} in thread for {from_id}") + logging.info(f"🤖 [BOT CMD] Running public command {command_name} in thread for {from_id}") # Commands by default reply via DM (reply_in_dm). command_instance.handle_packet(packet) - logging.info(f"Finished public command {command_name} for {from_id}") + logging.info(f"✅ [BOT CMD] Finished public command {command_name} for {from_id}") except Exception as e: - logging.error(f"Error handling public command {command_name}: {e}", exc_info=True) + logging.error(f"❌ [BOT CMD] Error handling public command {command_name}: {e}", exc_info=True) threading.Thread(target=run_command, daemon=True).start() return # Stop processing responders @@ -215,7 +215,7 @@ def run_command(): if outcome: logging.info( - f"Handled message from {sender.long_name if sender else from_id} with responder {responder.__class__.__name__}: {message}") + f"🤖 [RESPONDER] Handled message from {sender.long_name if sender else from_id} with responder {responder.__class__.__name__}: {message}") self.command_logger.log_responder_handled(from_id, responder, message) except (KeyError, ValueError) as e: logging.error(f"Packet format error handling message: {e}", exc_info=True) diff --git a/src/commands/nodes.py b/src/commands/nodes.py index ffa2571..0b14120 100644 --- a/src/commands/nodes.py +++ b/src/commands/nodes.py @@ -14,7 +14,6 @@ class NodesCommand(AbstractCommandWithSubcommands): def __init__(self, bot: MeshtasticBot): super().__init__(bot, 'nodes') self.sub_commands['busy'] = self.handle_busy - self.sub_commands['totals'] = self.handle_totals def get_busy_nodes(self) -> list[MeshNode.User]: return sorted(self.bot.node_db.list_nodes(), @@ -38,9 +37,6 @@ def handle_base_command(self, packet: MeshPacket, args: list[str]) -> None: self.reply(packet, response) - def handle_totals(self, packet: MeshPacket, args: list[str]) -> None: - self.bot.report_node_count(destination=packet['fromId']) - def handle_busy(self, packet: MeshPacket, args: list[str]) -> None: sender = packet['fromId'] @@ -100,7 +96,6 @@ def send_detailed_nodeinfo(self, sender: str, node_id: str): def show_help(self, packet: MeshPacket, args: list[str]) -> None: help_text = "!nodes: details about nodes this device has seen\n" - help_text += "!nodes totals: report the current node count\n" help_text += "!nodes busy: summary of busiest nodes\n" help_text += "!nodes busy detailed: detailed info about busiest nodes\n" self.reply(packet, help_text) diff --git a/src/commands/tr.py b/src/commands/tr.py index 29b5ce7..c1b3623 100644 --- a/src/commands/tr.py +++ b/src/commands/tr.py @@ -54,14 +54,11 @@ def send_reply(msg): hops_away = hop_start - hop_limit snr = packet.get('rxSnr', 0.0) - status_msg = f"{requester_name} ({hops_away} hops, {snr}dB). Starting real-time trace..." - logging.info(f"Detected {hops_away} hops for {target_id}. {status_msg}") - send_reply(status_msg) + # We can log this, but no need to send it explicitly over the radio to save airtime + logging.info(f"Detected {hops_away} hops for {target_id}. SNR: {snr}dB.") else: # Tracing to a different node - response = f"Starting traceroute to {target_long_name} ({target_id}) for you..." - logging.info(response) - send_reply(response) + logging.info(f"Starting traceroute to {target_long_name} ({target_id}) for you...") # Store for the callback if target_id not in self.bot.pending_traces: @@ -97,6 +94,8 @@ def send_timeout(): threading.Thread(target=check_timeout, daemon=True).start() try: + # Let the reaction settle before firing the trace + time.sleep(2) logging.info(f"Initiating traceroute to {target_id} requested by {requester_id}") # hopLimit=7 is standard max p = self.bot.interface.sendTraceRoute(target_id, hopLimit=7) From 0d3c3be66d285879907294311982df387a52dad7 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 6 Mar 2026 12:18:46 +0000 Subject: [PATCH 69/93] Update README with new logging formats --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 333e607..d243b32 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,13 @@ The bot now includes a built-in TCP proxy to manage the connection to the Meshta ### Improved Logging Messages received on named Group Channels (e.g., 'LongRange', 'PrivateChat') are now logged with their specific channel name, making it easier to track conversations across different mesh networks. +**Log Format Details:** +The bot uses emojis and badges in its standard output logs to easily identify incoming requests: +- **Private Messages**: `✉️ [PRIVATE MSG]` +- **Group Messages**: `📢 [GROUP MSG]` +- **Bot Commands**: `🤖 [BOT CMD]` +- **Responder Actions**: `🤖 [RESPONDER]` + ### Advanced Traceroute The `!tr` command provides visibility into the mesh topology: - **Full Path visibility:** Shows the complete route including the target node. From a6f77d83d83557f52279ed862e847fb4415e2dfe Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Fri, 6 Mar 2026 16:13:30 +0000 Subject: [PATCH 70/93] Add undocumented commands to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d243b32..d7a1c4c 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,8 @@ The bot listens for messages and responds to commands. You can interact with it | `!tr` | Performs a traceroute to the sender (outbound & inbound) | | `!tr ` | Performs a traceroute to a specific node by its short name from management node (outbound & inbound) | | `!status` | Displays bot status and radio connection details | +| `!prefs` | Configure bot settings related to your node | +| `!admin` | (Admin only) Admin commands like `reset packets` and `users` | ## Features From 7bcbaa4fc7c26e45f71a7d569300b4ba1a799ddc Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 6 Mar 2026 23:23:26 +0000 Subject: [PATCH 71/93] ci: bump dorny to v2 --- .github/workflows/unit-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 5619db2..c389df9 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -36,7 +36,7 @@ jobs: pytest test/ --doctest-modules --junitxml=reports/test-results-${{ matrix.python-version }}.xml - name: Test Report - uses: dorny/test-reporter@v1 + uses: dorny/test-reporter@v2 with: name: pytest-results-${{ matrix.python-version }} path: reports/test-results-${{ matrix.python-version }}.xml From 17fa017bb5e0ad74612c0cf761138e9536f4484f Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 6 Mar 2026 23:23:30 +0000 Subject: [PATCH 72/93] chore: add github pr template --- .github/pull_request_template.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..4c0c392 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,26 @@ +# Summary + + + + + +## Testing performed + + From f7301a08cb28337290db00a120a53819948c992d Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Mon, 9 Mar 2026 17:11:36 +0000 Subject: [PATCH 73/93] feat: add admin command to help menu --- src/commands/help.py | 6 ++++++ test/commands/test_help.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/commands/help.py b/src/commands/help.py index 42a0991..0f44568 100644 --- a/src/commands/help.py +++ b/src/commands/help.py @@ -22,6 +22,8 @@ def __init__(self, bot: MeshtasticBot): self.sub_commands['prefs'] = self.handle_prefs if get_env_bool('ENABLE_COMMAND_STATUS', True): self.sub_commands['status'] = self.handle_status + if get_env_bool('ENABLE_COMMAND_ADMIN', True): + self.sub_commands['admin'] = self.handle_admin # if get_env_bool('ENABLE_COMMAND_ENROLL', True): # self.sub_commands['enroll'] = self.handle_enroll # if get_env_bool('ENABLE_COMMAND_LEAVE', True): @@ -92,5 +94,9 @@ def handle_status(self, packet: MeshPacket, args: list[str]) -> None: response = "!status: show current bot and proxy health status" self.reply(packet, response) + def handle_admin(self, packet: MeshPacket, args: list[str]) -> None: + response = "!admin: admin commands (restricted)" + self.reply(packet, response) + def get_command_for_logging(self, message: str) -> (str, list[str] | None, str | None): return self._gcfl_base_command_and_args(message) diff --git a/test/commands/test_help.py b/test/commands/test_help.py index db04c36..0fac70d 100644 --- a/test/commands/test_help.py +++ b/test/commands/test_help.py @@ -19,7 +19,7 @@ def test_handle_packet_no_additional_message(self): response = self.mock_interface.sendText.call_args[0][0] - skipped_commands = ['!admin'] + skipped_commands = [] # Ensure every command in CommandFactory is mentioned in the response for command in CommandFactory.commands.keys(): From 5856d3ff51d9a04b8061a1e9fa39e71cff1549f4 Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Wed, 11 Mar 2026 22:25:16 +0000 Subject: [PATCH 74/93] chore: add tmp/ and temp/ to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 0fa11a9..db8f5e0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ nodes.json all_state.json data/ docker-data/ +tmp/ +temp/ # OS files .DS_Store From 2fa1ad8dc8cf892bb404fad85e28df89cdfea4fe Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Wed, 11 Mar 2026 22:48:49 +0000 Subject: [PATCH 75/93] docs: add AGENTS.md for Meshtastic Bot project structure and key concepts --- AGENTS.md | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..4f5a722 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,105 @@ +# Meshtastic Bot – Agent Context + +Python bot for interacting with Meshtastic devices. Connects to a Meshtastic node over TCP, listens for messages, processes commands, and reports packets to the Meshflow API. Part of the Meshflow system alongside meshflow-api and meshtastic-bot-ui. + +## Project Structure + +``` +src/ +├── main.py # Entry point, env config, bot setup +├── bot.py # MeshtasticBot: pubsub handlers, connection, commands +├── tcp_interface.py # AutoReconnectTcpInterface (Meshtastic TCP connection) +├── ws_client.py # MeshflowWSClient – receives commands from API (e.g. traceroute) +├── traceroute.py # Traceroute command (triggered via WebSocket) +├── data_classes.py # MeshNode, packet data structures +├── helpers.py # pretty_print_last_heard, safe_encode_node_name, etc. +├── base_feature.py # AbstractBaseFeature – reply_in_channel, message_in_dm, etc. +├── commands/ # Text commands (!help, !nodes, !ping, etc.) +│ ├── factory.py # CommandFactory – registers commands +│ ├── command.py # AbstractCommand base class +│ ├── help.py, hello.py, nodes.py, ping.py, prefs.py, admin.py, template.py +│ └── enroll.py # (commented out) +├── responders/ # Non-command message handlers +│ ├── responder_factory.py # ResponderFactory +│ ├── responder.py # AbstractResponder base class +│ └── message_reaction_responder.py +├── api/ # Meshflow API integration +│ ├── StorageAPI.py # StorageAPIWrapper – packet ingestion, node sync +│ ├── BaseAPIWrapper.py # Base HTTP client +│ └── serializers.py # MeshNodeSerializer +└── persistence/ # Local storage + ├── node_db.py # AbstractNodeDB, SqliteNodeDB + ├── node_info.py # AbstractNodeInfoStore, InMemoryNodeInfoStore + ├── commands_logger.py # AbstractCommandLogger, SqliteCommandLogger + ├── user_prefs.py # AbstractUserPrefsPersistence, SqliteUserPrefsPersistence + └── packet_dump.py # Packet dump utilities + +test/ # pytest unit tests +deploy/ # Deployment scripts (Raspberry Pi, Docker) +``` + +## Key Concepts + +- **MeshtasticBot**: Central class. Subscribes to pubsub (`meshtastic.receive`, `meshtastic.receive.text`, `meshtastic.node.updated`, `meshtastic.connection.established`). Owns interface, node_db, node_info, storage_apis, ws_client. +- **Commands**: Text messages starting with `!` (e.g. `!help`, `!nodes`). Registered in `CommandFactory`; extend `AbstractCommand`. +- **Responders**: Handle public channel messages without `!` prefix. Extend `AbstractResponder`. +- **StorageAPIWrapper**: Reports raw packets and node data to Meshflow API. Supports v1 and v2 API paths. Uses `STORAGE_API_*` or `STORAGE_API_2_*` env vars. +- **MeshflowWSClient**: Connects to `ws/nodes/?api_key=...` to receive remote commands (e.g. traceroute). Started after connection; uses same token as storage API. + +## API Integration + +- **Packet ingestion**: `StorageAPIWrapper` posts to `/api/packets/{my_nodenum}/ingest/` (v2) or `/api/raw-packet/` (v1). +- **Node sync**: `StorageAPIWrapper` fetches nodes from API for reconciliation. +- **WebSocket**: `MeshflowWSClient` connects to Meshflow API; receives JSON commands (e.g. `{"type": "traceroute", "target_node_id": 123}`). Invokes `on_traceroute_command` on the bot. + +## Development + +```bash +# activate venv +source venv/bin/activate + +pip install -r requirements.txt +# Copy .env.example to .env and configure +python main.py +# or: python -m src.main (from project root) +``` + +## Testing + +- **Unit tests**: `pytest test/ --doctest-modules` +- Tests live under `test/` (commands, persistence, responders, etc.) +- CI runs on Python 3.12, 3.13, 3.14 + +## Tech Stack + +- Python 3.12+ +- meshtastic (protobuf, TCP interface) +- Pypubsub (pub/sub events) +- requests (HTTP to Meshflow API) +- websockets (MeshflowWSClient) +- schedule (periodic tasks) +- pytest + +## Configuration + +Environment variables (see `.env.example`): + +- `MESHTASTIC_IP` – Meshtastic node IP (TCP connection) +- `ADMIN_NODES` – Comma-separated node IDs (e.g. `!aae8900d`) for admin commands +- `STORAGE_API_ROOT`, `STORAGE_API_TOKEN`, `STORAGE_API_VERSION` – Primary Meshflow API +- `STORAGE_API_2_*` – Optional second API +- `MESHFLOW_WS_URL` – WebSocket URL (optional; derived from storage API if unset) +- `DATA_DIR` – Data directory (default `data/`) + +## Conventions + +- Commands: Add class to `src/commands/`, register in `CommandFactory.commands`. +- Responders: Add class to `src/responders/`, register in `ResponderFactory`. +- Use `reply_in_channel` / `reply_in_dm` from `AbstractBaseFeature`; avoid deprecated `reply` / `reply_to`. +- Node IDs: hex string format (e.g. `!12345678`). `my_nodenum` is decimal. +- Data persisted in `data/` (node_info.json, SQLite DBs, failed_packets). + +## Source control + +When asked to create a pull request description, follow the template at +.github/pull_request_template.md, and output a markdown file named `tmp/PR.md` From f9c1725d16dfbda384b4f3d7ef9b97760da984cf Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 12 Mar 2026 12:21:07 +0000 Subject: [PATCH 76/93] build(local): add vscode launch.json file --- .vscode/launch.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..f6c8b54 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,13 @@ +{ + "configurations": [ + { + "name": "Meshtastic Bot", + "type": "debugpy", + "request": "launch", + "module": "src.main", + "python": "venv/bin/python", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file From d9fe8e8fd0174b7066f3be1959823af40c4c1fc5 Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 12 Mar 2026 12:21:29 +0000 Subject: [PATCH 77/93] feat: implement WebSocket client for Meshflow API commands - Added MeshflowWSClient to handle WebSocket connections and receive commands. - Integrated traceroute command handling in MeshtasticBot. - Updated .env.example with new configuration options for Meshflow API. - Added websockets dependency to requirements.txt. - Created traceroute.py for managing traceroute requests and responses. --- .env.example | 11 +++ requirements.txt | 1 + src/bot.py | 10 +++ src/main.py | 22 ++++++ src/traceroute.py | 50 ++++++++++++++ src/ws_client.py | 166 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 src/traceroute.py create mode 100644 src/ws_client.py diff --git a/.env.example b/.env.example index b7e1be9..930ce9f 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,16 @@ +# The IP address of your Meshtastic node MESHTASTIC_IP=192.168.123.123 ADMIN_NODES='!aae8900d' +# The root URL of the Meshflow API STORAGE_API_ROOT='http://localhost:8000' STORAGE_API_TOKEN=... +STORAGE_API_VERSION=2 + +# Use these if you want to upload to a second API (usually used during testing) +# STORAGE_API_2_ROOT=... +# STORAGE_API_2_VERSION=2 +# STORAGE_API_2_TOKEN=... + +# Use this if you want to receive commands from the Meshflow server (e.g. traceroute) +MESHFLOW_WS_URL=ws://localhost:8000 diff --git a/requirements.txt b/requirements.txt index c10f372..88ced90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # dependencies meshtastic>=2.5.0,<3.0.0 +websockets>=14.0 Pypubsub~=4.0.3 jinja2~=3.1.6 schedule~=1.2.2 diff --git a/src/bot.py b/src/bot.py index 8a017d5..e6f7bc8 100644 --- a/src/bot.py +++ b/src/bot.py @@ -10,6 +10,7 @@ from src.api.StorageAPI import StorageAPIWrapper from src.commands.factory import CommandFactory +from src.traceroute import on_traceroute_command from src.data_classes import MeshNode from src.helpers import pretty_print_last_heard, safe_encode_node_name from src.persistence.commands_logger import AbstractCommandLogger @@ -36,6 +37,7 @@ class MeshtasticBot: user_prefs_persistence: AbstractUserPrefsPersistence storage_apis: list[StorageAPIWrapper] + ws_client: object | None # MeshflowWSClient when configured def __init__(self, address: str): self.address = address @@ -52,6 +54,7 @@ def __init__(self, address: str): self.command_logger = None self.user_prefs_persistence = None self.storage_apis = [] + self.ws_client = None pub.subscribe(self.on_receive, "meshtastic.receive") pub.subscribe(self.on_receive_text, "meshtastic.receive.text") @@ -106,6 +109,10 @@ def disconnect(self): except OSError as ex: logging.warning(f"Failed to close connection. Continuing anyway: {ex}") + def on_traceroute_command(self, target_node_id: int): + """Handle traceroute command from WebSocket (e.g. from Meshflow API).""" + on_traceroute_command(self, target_node_id) + def on_connection(self, interface, topic=pub.AUTO_TOPIC): self.my_nodenum = interface.localNode.nodeNum # in dec self.my_id = f"!{hex(self.my_nodenum)[2:]}" @@ -114,6 +121,9 @@ def on_connection(self, interface, topic=pub.AUTO_TOPIC): logging.info('Connected to Meshtastic node') self.print_nodes() + if self.ws_client: + self.ws_client.start() + def on_receive_text(self, packet: MeshPacket, interface): """Callback function triggered when a text message is received.""" diff --git a/src/main.py b/src/main.py index 2d9014d..a6b5fee 100644 --- a/src/main.py +++ b/src/main.py @@ -21,6 +21,7 @@ # Now we can import the rest of our local files from src.api.StorageAPI import StorageAPIWrapper from src.bot import MeshtasticBot +from src.ws_client import MeshflowWSClient from src.persistence.commands_logger import SqliteCommandLogger from src.persistence.node_info import InMemoryNodeInfoStore from src.persistence.node_db import SqliteNodeDB @@ -36,6 +37,7 @@ STORAGE_API_2_ROOT = os.getenv("STORAGE_API_2_ROOT") STORAGE_API_2_TOKEN = os.getenv("STORAGE_API_2_TOKEN", None) STORAGE_API_2_VERSION = int(os.getenv("STORAGE_API_2_VERSION", 1)) +MESHFLOW_WS_URL = os.getenv("MESHFLOW_WS_URL") # e.g. ws://localhost:8000; derived from storage API if unset def main(): @@ -62,6 +64,24 @@ def main(): if STORAGE_API_2_ROOT: bot.storage_apis.append(StorageAPIWrapper(bot, STORAGE_API_2_ROOT, STORAGE_API_2_TOKEN, STORAGE_API_2_VERSION, failed_packets_dir)) + # WebSocket client for receiving commands (e.g. traceroute) + ws_url = MESHFLOW_WS_URL + ws_token = None + if not ws_url: + base = STORAGE_API_ROOT + if base: + ws_url = base \ + .replace("http://", "ws://") \ + .replace("https://", "wss://") + if STORAGE_API_ROOT and STORAGE_API_TOKEN: + ws_token = STORAGE_API_TOKEN + if ws_url and ws_token: + bot.ws_client = MeshflowWSClient( + ws_url=ws_url, + api_key=ws_token, + on_traceroute=bot.on_traceroute_command, + ) + try: node_info.load_from_file(str(node_info_file)) bot.connect() @@ -70,6 +90,8 @@ def main(): except Exception as e: logging.error(f"Error: {e}") finally: + if bot.ws_client: + bot.ws_client.stop() bot.disconnect() node_info.persist_to_file(str(node_info_file)) diff --git a/src/traceroute.py b/src/traceroute.py new file mode 100644 index 0000000..602cc33 --- /dev/null +++ b/src/traceroute.py @@ -0,0 +1,50 @@ +""" +Traceroute command handling: send traceroute requests and upload TRACEROUTE_APP responses. +""" + +import logging +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from src.bot import MeshtasticBot + +logger = logging.getLogger(__name__) + + +def on_traceroute_command(bot: "MeshtasticBot", target_node_id: int, hop_limit: int = 5, channel_index: int = 0): + """ + Send a traceroute request to the target node. + + Args: + bot: The MeshtasticBot instance + target_node_id: Target node ID (integer, e.g. 1623194643) + hop_limit: Maximum hops for the traceroute (default 5) + channel_index: Channel index (default 0) + """ + if not bot.interface or not bot.init_complete: + logger.warning("Traceroute: bot not connected, skipping") + return + + try: + bot.interface.sendTraceRoute(target_node_id, hop_limit, channelIndex=channel_index) + logger.info(f"Traceroute: sent to target={target_node_id}") + except Exception as e: + logger.error(f"Traceroute: failed to send to {target_node_id}: {e}") + + +def setup_traceroute_handler(bot: "MeshtasticBot"): + """ + Subscribe to TRACEROUTE_APP packets and upload them to storage APIs. + + Call this once when the bot is initialized. TRACEROUTE_APP packets + received via meshtastic.receive are already passed to storage_apis in + bot.on_receive, so no extra subscription is needed for upload. + + This function exists for any traceroute-specific setup (e.g. filtering + or logging). The main packet flow is: receive -> on_receive -> storage_apis. + """ + # TRACEROUTE_APP packets are handled by bot.on_receive which forwards + # all packets to storage_apis. No additional subscription needed. + # We could add a dedicated handler here if we needed traceroute-specific + # logic (e.g. only upload TR packets, or different handling). + pass diff --git a/src/ws_client.py b/src/ws_client.py new file mode 100644 index 0000000..63c364c --- /dev/null +++ b/src/ws_client.py @@ -0,0 +1,166 @@ +""" +WebSocket client for receiving commands from the Meshflow API. + +Connects to ws/nodes/?api_key= and invokes callbacks when commands +(e.g. traceroute) are received. +""" + +import asyncio +import json +import logging +from typing import Callable, Optional + +logger = logging.getLogger(__name__) + + +class MeshflowWSClient: + """ + WebSocket client that connects to the Meshflow API node command endpoint. + + Runs in a background thread and invokes callbacks for received commands. + Reconnects with exponential backoff on disconnect. + """ + + def __init__( + self, + ws_url: str, + api_key: str, + on_traceroute: Callable[[int], None], + on_connect: Optional[Callable[[], None]] = None, + on_disconnect: Optional[Callable[[], None]] = None, + ): + """ + Args: + ws_url: Base WebSocket URL (e.g. ws://localhost:8000) + api_key: NodeAPIKey for authentication + on_traceroute: Callback(target_node_id: int) when traceroute command received + on_connect: Optional callback when connected + on_disconnect: Optional callback when disconnected + """ + self.ws_url = ws_url.rstrip("/") + self.api_key = api_key + self.on_traceroute = on_traceroute + self.on_connect = on_connect + self.on_disconnect = on_disconnect + + self._running = False + self._task: Optional[asyncio.Task] = None + self._loop: Optional[asyncio.AbstractEventLoop] = None + self._backoff = 1.0 # Reset on successful connect so reconnects start fast + + def _get_ws_endpoint(self) -> str: + return f"{self.ws_url}/ws/nodes/?api_key={self.api_key}" + + def start(self): + """Start the WebSocket client in a background thread.""" + if self._running: + return + self._running = True + import threading + + def run(): + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + try: + self._loop.run_until_complete(self._run()) + finally: + self._loop.close() + + thread = threading.Thread(target=run, daemon=True) + thread.start() + logger.info("MeshflowWSClient: started") + + def stop(self): + """Stop the WebSocket client.""" + self._running = False + if self._loop and self._task: + self._loop.call_soon_threadsafe(self._task.cancel) + + async def _run(self): + """Main loop with reconnection.""" + backoff = 1.0 + max_backoff = 300.0 + + while self._running: + try: + await self._connect_and_receive() + except asyncio.CancelledError: + logger.info("MeshflowWSClient: stopped") + break + except Exception as e: + logger.warning( + f"MeshflowWSClient: connection lost ({type(e).__name__}: {e}). " + f"Reconnecting in {backoff:.0f}s..." + ) + if self.on_disconnect: + try: + self.on_disconnect() + except Exception: + pass + + if not self._running: + break + + await asyncio.sleep(backoff) + backoff = getattr(self, "_backoff", backoff) # Use reset value from successful connect + backoff = min(backoff * 1.5, max_backoff) + + logger.info("MeshflowWSClient: run loop ended") + + async def _connect_and_receive(self): + """Connect to WebSocket and receive messages until disconnect.""" + try: + import websockets + from websockets.exceptions import ConnectionClosed + except ImportError: + raise ImportError("websockets package required. Install with: pip install websockets") + + endpoint = self._get_ws_endpoint() + # Django Channels AllowedHostsOriginValidator requires Origin header. + # Derive from ws_url (e.g. ws://localhost:8000 -> http://localhost:8000) + origin = self.ws_url.replace("ws://", "http://").replace("wss://", "https://") + async with websockets.connect( + endpoint, + origin=origin, + close_timeout=5, + ping_interval=20, + ping_timeout=10, + ) as ws: + self._backoff = 1.0 # Reset so next reconnect starts with short delay + logger.info("MeshflowWSClient: connected") + if self.on_connect: + try: + self.on_connect() + except Exception as e: + logger.warning(f"MeshflowWSClient: on_connect error: {e}") + + while self._running: + try: + msg = await asyncio.wait_for(ws.recv(), timeout=60.0) + except asyncio.TimeoutError: + continue + except ConnectionClosed as e: + code = getattr(getattr(e, "rcvd", None), "code", None) + logger.info(f"MeshflowWSClient: connection closed by server (code={code})") + raise + + try: + data = json.loads(msg) + except json.JSONDecodeError: + logger.warning(f"MeshflowWSClient: invalid JSON: {msg[:100]}") + continue + + cmd_type = data.get("type") + if cmd_type == "traceroute": + target = data.get("target") + if target is not None: + try: + target_id = int(target) + logger.info(f"MeshflowWSClient: received traceroute command, target={target_id}") + self.on_traceroute(target_id) + except (TypeError, ValueError): + logger.warning(f"MeshflowWSClient: invalid traceroute target: {target}") + else: + logger.warning("MeshflowWSClient: traceroute command missing target") + else: + logger.debug(f"MeshflowWSClient: ignored command type: {cmd_type}") From 04bc77d882857257d0e81321444e74e04c4ee008 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Thu, 12 Mar 2026 13:35:16 +0000 Subject: [PATCH 78/93] feat: implement exponential backoff and improved logging in TcpProxy --- src/tcp_proxy.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index a2c05f5..9a5e50f 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -116,6 +116,10 @@ async def _watchdog(self): await asyncio.sleep(5) async def _target_connection_manager(self): + backoff_time = 5.0 + max_backoff_time = 60.0 + backoff_rate = 2.0 + while self.running: if self.target_writer is None or self.target_reader is None: self.reconnecting = True @@ -124,6 +128,7 @@ async def _target_connection_manager(self): self.rolling_packets.clear() try: + logging.info(f"Proxy attempting to connect to target device at {self.target_host}:{self.target_port}...") reader, writer = await asyncio.wait_for( asyncio.open_connection(self.target_host, self.target_port), timeout=5.0 @@ -132,11 +137,17 @@ async def _target_connection_manager(self): self.target_writer = writer self.last_target_activity = time.time() self.reconnecting = False - logging.info(f"Proxy connected to target device at {self.target_host}:{self.target_port}") + backoff_time = 5.0 # Reset backoff on success + logging.info(f"Proxy successfully connected to target device at {self.target_host}:{self.target_port}") asyncio.create_task(self._read_from_target()) + except (asyncio.TimeoutError, ConnectionError, OSError) as e: + logging.error(f"Failed to connect to target ({self.target_host}): {e}. Retrying in {backoff_time:.1f}s...") + await asyncio.sleep(backoff_time) + backoff_time = min(backoff_time * backoff_rate, max_backoff_time) except Exception as e: - logging.error(f"Failed to connect to target ({self.target_host}): {e}") - await asyncio.sleep(5.0) + logging.error(f"Unexpected error in target connection manager: {e}", exc_info=True) + await asyncio.sleep(backoff_time) + backoff_time = min(backoff_time * backoff_rate, max_backoff_time) else: await asyncio.sleep(1) From dfaad66b5f9313e29e0ff0f1f7df32ad6effd39e Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 12 Mar 2026 13:54:23 +0000 Subject: [PATCH 79/93] feat: run traceroute command in a separate thread to prevent blocking - Updated the traceroute command handling in MeshflowWSClient to execute in a separate thread. - Added a callback to log warnings if the traceroute task fails. --- src/ws_client.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/ws_client.py b/src/ws_client.py index 63c364c..8641dce 100644 --- a/src/ws_client.py +++ b/src/ws_client.py @@ -157,7 +157,18 @@ async def _connect_and_receive(self): try: target_id = int(target) logger.info(f"MeshflowWSClient: received traceroute command, target={target_id}") - self.on_traceroute(target_id) + # Run in thread so a blocking/long-running TR doesn't block receiving + # further commands (e.g. multiple TRs in quick succession, or TR that never returns) + task = asyncio.create_task(asyncio.to_thread(self.on_traceroute, target_id)) + + def _task_done(t): + if t.cancelled(): + return + exc = t.exception() + if exc: + logger.warning(f"MeshflowWSClient: traceroute task failed: {exc}") + + task.add_done_callback(_task_done) except (TypeError, ValueError): logger.warning(f"MeshflowWSClient: invalid traceroute target: {target}") else: From ad031a4512697af82c200237efcee4d739735ef0 Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Wed, 11 Mar 2026 22:34:54 +0000 Subject: [PATCH 80/93] chore: bump github action versions --- .github/workflows/armv7-docker-base-image-build.yaml | 4 ++-- .github/workflows/docker-build.yaml | 4 ++-- .github/workflows/unit-tests.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/armv7-docker-base-image-build.yaml b/.github/workflows/armv7-docker-base-image-build.yaml index cceafff..3e682d2 100644 --- a/.github/workflows/armv7-docker-base-image-build.yaml +++ b/.github/workflows/armv7-docker-base-image-build.yaml @@ -20,10 +20,10 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Log in to GitHub Container Registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $GITHUB_ACTOR --password-stdin diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index a12ea71..2783f2d 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -38,10 +38,10 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Log in to GitHub Container Registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $GITHUB_ACTOR --password-stdin diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index c389df9..6659fbf 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -20,10 +20,10 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} From 07bf6072d25bc1cb331f65e5d12c7570c59c93dc Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Thu, 12 Mar 2026 23:23:13 +0000 Subject: [PATCH 81/93] build: update Dockerfile and GitHub workflows for improved image building - Bumped base image version from python:3.12 to python:3.14 in Dockerfile. - Introduced VERSION argument in Dockerfile for better version management. - Refactored docker-build workflow to use new VERSION_LABEL input and removed unnecessary inputs. - Enhanced multi-architecture image building and tagging logic in workflows. - Deleted obsolete manual-release workflow to streamline CI/CD process. --- .github/workflows/docker-build.yaml | 127 +++++++++++++++----------- .github/workflows/main.yaml | 34 +++++++ .github/workflows/manual-release.yaml | 29 ------ .github/workflows/release.yaml | 15 ++- Dockerfile | 6 +- 5 files changed, 118 insertions(+), 93 deletions(-) create mode 100644 .github/workflows/main.yaml delete mode 100644 .github/workflows/manual-release.yaml diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 2783f2d..83b6c9e 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -3,39 +3,22 @@ name: docker-build-and-push on: workflow_call: inputs: - VERSION_TAG: + VERSION_LABEL: required: true type: string - IS_LATEST: - required: false - type: boolean - default: false - IS_PRERELEASE: - required: false - type: boolean - default: false - + ENVIRONMENT: + required: true + type: string + description: "dev | preprod | prod" permissions: contents: read packages: write - jobs: build-and-push: runs-on: ubuntu-latest - strategy: - matrix: - platform: [ linux/amd64, linux/arm/v7, linux/arm64 ] - include: - - platform: linux/amd64 - base_image: "python:3.12" - - platform: linux/arm/v7 - base_image: "ghcr.io/pskillen/meshtastic-bot-armv7-base:py3.12" - - platform: linux/arm64 - base_image: "arm64v8/python:3.12" - steps: - name: Checkout Repository uses: actions/checkout@v6 @@ -44,47 +27,85 @@ jobs: uses: docker/setup-buildx-action@v4 - name: Log in to GitHub Container Registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $GITHUB_ACTOR --password-stdin + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Setup vars - run: | - PLATFORM_TAG=$(echo "${{ matrix.platform }}" | sed 's|/|-|g') - echo "PLATFORM_TAG=$PLATFORM_TAG" >> $GITHUB_ENV - - - name: Build and Push Docker Image - run: | - docker buildx build \ - --platform ${{ matrix.platform }} \ - --build-arg BASE_IMAGE=${{ matrix.base_image }} \ - -t ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }}-${{ env.PLATFORM_TAG }} \ - --push . + - name: Build and Push Multi-Arch Image + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository }}:${{ inputs.ENVIRONMENT == 'dev' && 'latest-dev' || inputs.VERSION_LABEL }} + build-args: | + VERSION=${{ inputs.VERSION_LABEL }} + cache-from: type=gha + cache-to: type=gha,mode=max - create-manifest: + tag-and-cleanup: runs-on: ubuntu-latest needs: - build-and-push + if: inputs.ENVIRONMENT != 'dev' steps: - name: Log in to GitHub Container Registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $GITHUB_ACTOR --password-stdin + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Create and Push Multi-Arch Manifest + - name: Tag for pre-release (latest-rc + version) + if: inputs.ENVIRONMENT == 'preprod' run: | docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }} \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }}-linux-amd64 \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }}-linux-arm-v7 \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }}-linux-arm64 + -t ghcr.io/${{ github.repository }}:latest-rc \ + ghcr.io/${{ github.repository }}:${{ inputs.VERSION_LABEL }} - - name: Tag as Latest or RC - if: ${{ inputs.IS_LATEST == true }} + - name: Tag for release (latest + semver components) + if: inputs.ENVIRONMENT == 'prod' run: | - if [[ "${{ inputs.IS_PRERELEASE }}" == "true" ]]; then - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:latest-rc \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }} - else - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:latest \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_TAG }} - fi + VERSION="${{ inputs.VERSION_LABEL }}" + docker buildx imagetools create \ + -t ghcr.io/${{ github.repository }}:latest \ + -t ghcr.io/${{ github.repository }}:${VERSION} \ + ghcr.io/${{ github.repository }}:${VERSION} + # Parse semver and create component tags + MAJOR=$(echo "${VERSION}" | cut -d. -f1) + MINOR=$(echo "${VERSION}" | cut -d. -f2) + docker buildx imagetools create \ + -t ghcr.io/${{ github.repository }}:${MAJOR} \ + ghcr.io/${{ github.repository }}:${VERSION} + docker buildx imagetools create \ + -t ghcr.io/${{ github.repository }}:${MAJOR}.${MINOR} \ + ghcr.io/${{ github.repository }}:${VERSION} + + - name: Delete untagged images + uses: quartx-analytics/ghcr-cleaner@v1 + with: + owner-type: user + token: ${{ secrets.GITHUB_TOKEN }} + repository_owner: ${{ github.repository_owner }} + repository-name: ${{ github.repository }} + delete-untagged: true + + cleanup-dev: + runs-on: ubuntu-latest + needs: + - build-and-push + if: inputs.ENVIRONMENT == 'dev' + + steps: + - name: Delete untagged images + uses: quartx-analytics/ghcr-cleaner@v1 + with: + owner-type: user + token: ${{ secrets.GITHUB_TOKEN }} + repository_owner: ${{ github.repository_owner }} + repository-name: ${{ github.repository }} + delete-untagged: true diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..062f10f --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,34 @@ +name: Release main + +on: + push: + branches: + - main + +permissions: + contents: read + packages: write + +jobs: + extract-dev-tag: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v6 + + - name: Get short SHA + id: get_sha + run: | + SHORT_SHA=$(echo $GITHUB_SHA | cut -c1-7) + echo "VERSION_LABEL=main-$SHORT_SHA" >> "$GITHUB_OUTPUT" + + outputs: + VERSION_LABEL: ${{ steps.get_sha.outputs.VERSION_LABEL }} + + build: + needs: + - extract-dev-tag + uses: ./.github/workflows/docker-build.yaml + with: + VERSION_LABEL: ${{ needs.extract-dev-tag.outputs.VERSION_LABEL }} + ENVIRONMENT: dev diff --git a/.github/workflows/manual-release.yaml b/.github/workflows/manual-release.yaml deleted file mode 100644 index d61044d..0000000 --- a/.github/workflows/manual-release.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Manual release - -on: - workflow_dispatch: - inputs: - VERSION_TAG: - required: true - type: string - IS_LATEST: - required: true - type: boolean - default: false - IS_PRERELEASE: - required: true - type: boolean - default: false - -jobs: - unit-tests: - uses: ./.github/workflows/unit-tests.yaml - - build-and-push: - needs: - - unit-tests - uses: ./.github/workflows/docker-build.yaml - with: - VERSION_TAG: ${{ inputs.VERSION_TAG }} - IS_PRERELEASE: ${{ inputs.IS_PRERELEASE }} - IS_LATEST: ${{ inputs.IS_LATEST }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 1617a6f..7b58496 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -2,7 +2,7 @@ name: Release on: release: - types: [ published ] + types: [published] jobs: unit-tests: @@ -13,13 +13,11 @@ jobs: steps: - name: Extract Release Tag id: get_tag - run: | - echo "VERSION_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT" - echo "IS_PRERELEASE=${{ github.event.release.prerelease }}" >> "$GITHUB_OUTPUT" + run: echo "VERSION_LABEL=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT" outputs: - VERSION_TAG: ${{ steps.get_tag.outputs.VERSION_TAG }} - IS_PRERELEASE: ${{ steps.get_tag.outputs.IS_PRERELEASE }} + VERSION_LABEL: ${{ steps.get_tag.outputs.VERSION_LABEL }} + IS_PRERELEASE: ${{ github.event.release.prerelease }} build-and-push: needs: @@ -27,6 +25,5 @@ jobs: - extract-tag uses: ./.github/workflows/docker-build.yaml with: - VERSION_TAG: ${{ needs.extract-tag.outputs.VERSION_TAG }} - IS_PRERELEASE: ${{ fromJson(needs.extract-tag.outputs.IS_PRERELEASE) }} # must be boolean - IS_LATEST: true + VERSION_LABEL: ${{ needs.extract-tag.outputs.VERSION_LABEL }} + ENVIRONMENT: ${{ needs.extract-tag.outputs.IS_PRERELEASE == 'true' && 'preprod' || 'prod' }} diff --git a/Dockerfile b/Dockerfile index 6329ff0..69d02b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ -ARG BASE_IMAGE=python:3.12 -FROM ${BASE_IMAGE} +FROM python:3.14 + +ARG VERSION=development +ENV APP_VERSION=${VERSION} WORKDIR /app From 87c838705245ea35286f3f00bce4cc7e69b9c500 Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 15:10:35 +0000 Subject: [PATCH 82/93] ci: add dev workflow and refactor docker build process - Introduced a new GitHub Actions workflow for the 'dev' branch to automate the build process. - Added a job to extract a short SHA for version labeling. - Refactored the tagging job in the docker build workflow for clarity and updated the image cleanup action to a new version. - Streamlined the cleanup process to ensure it runs conditionally based on the success of the build. --- .github/workflows/dev.yaml | 34 +++++++++++++++++++++++++++++ .github/workflows/docker-build.yaml | 25 +++++++-------------- 2 files changed, 42 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/dev.yaml diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml new file mode 100644 index 0000000..c27c251 --- /dev/null +++ b/.github/workflows/dev.yaml @@ -0,0 +1,34 @@ +name: dev + +on: + push: + branches: + - dev + +permissions: + contents: read + packages: write + +jobs: + extract-dev-tag: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v6 + + - name: Get short SHA + id: get_sha + run: | + SHORT_SHA=$(echo $GITHUB_SHA | cut -c1-7) + echo "VERSION_LABEL=dev-$SHORT_SHA" >> "$GITHUB_OUTPUT" + + outputs: + VERSION_LABEL: ${{ steps.get_sha.outputs.VERSION_LABEL }} + + build: + needs: + - extract-dev-tag + uses: ./.github/workflows/docker-build.yaml + with: + VERSION_LABEL: ${{ needs.extract-dev-tag.outputs.VERSION_LABEL }} + ENVIRONMENT: dev diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 83b6c9e..1197883 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -46,7 +46,7 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max - tag-and-cleanup: + tagging: runs-on: ubuntu-latest needs: - build-and-push @@ -85,27 +85,18 @@ jobs: -t ghcr.io/${{ github.repository }}:${MAJOR}.${MINOR} \ ghcr.io/${{ github.repository }}:${VERSION} - - name: Delete untagged images - uses: quartx-analytics/ghcr-cleaner@v1 - with: - owner-type: user - token: ${{ secrets.GITHUB_TOKEN }} - repository_owner: ${{ github.repository_owner }} - repository-name: ${{ github.repository }} - delete-untagged: true - - cleanup-dev: + cleanup: runs-on: ubuntu-latest needs: - build-and-push - if: inputs.ENVIRONMENT == 'dev' - + - tagging + if: always() && !cancelled() && needs.build-and-push.result == 'success' steps: - name: Delete untagged images - uses: quartx-analytics/ghcr-cleaner@v1 + uses: Chizkiyahu/delete-untagged-ghcr-action@v6.1.0 with: - owner-type: user token: ${{ secrets.GITHUB_TOKEN }} repository_owner: ${{ github.repository_owner }} - repository-name: ${{ github.repository }} - delete-untagged: true + package_name: meshtastic-bot + owner_type: user + untagged_only: true From f8cbdbebe451ad7914064964588f22851113e42c Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 16:46:37 +0000 Subject: [PATCH 83/93] ci: refactor Docker build workflow for multi-architecture support - Updated the GitHub Actions workflow to support multi-architecture builds for both linux/amd64 and linux/arm64. - Introduced a matrix strategy for build jobs to enhance flexibility and maintainability. - Added steps to export and upload image digests as artifacts. - Streamlined tagging logic for different environments (dev, preprod, prod) based on digest references. --- .github/workflows/docker-build.yaml | 112 ++++++++++++++++++---------- Dockerfile | 7 +- 2 files changed, 74 insertions(+), 45 deletions(-) diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 1197883..5ca59ad 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -11,21 +11,31 @@ on: type: string description: "dev | preprod | prod" +env: + IMAGE: ghcr.io/${{ github.repository }} + permissions: contents: read packages: write jobs: - build-and-push: - runs-on: ubuntu-latest + build: + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + artifact: linux-amd64 + - platform: linux/arm64 + runner: ubuntu-24.04-arm + artifact: linux-arm64 + runs-on: ${{ matrix.runner }} steps: - name: Checkout Repository uses: actions/checkout@v6 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v4 - - name: Log in to GitHub Container Registry uses: docker/login-action@v4 with: @@ -33,24 +43,36 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and Push Multi-Arch Image + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push by digest + id: build uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64,linux/arm64 - push: true - tags: | - ghcr.io/${{ github.repository }}:${{ inputs.ENVIRONMENT == 'dev' && 'latest-dev' || inputs.VERSION_LABEL }} + platforms: ${{ matrix.platform }} + outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true build-args: | VERSION=${{ inputs.VERSION_LABEL }} - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=${{ github.ref_name }}-meshtastic-bot + cache-to: type=gha,mode=max,scope=${{ github.ref_name }}-meshtastic-bot + + - name: Export digest + run: | + mkdir -p ${{ runner.temp }}/digests + echo "${{ steps.build.outputs.digest }}" | sed 's/^sha256://' | xargs -I{} touch "${{ runner.temp }}/digests/{}" + + - name: Upload digest artifact + uses: actions/upload-artifact@v4 + with: + name: digests-${{ matrix.artifact }} + path: ${{ runner.temp }}/digests/* + retention-days: 1 - tagging: + merge: + needs: build runs-on: ubuntu-latest - needs: - - build-and-push - if: inputs.ENVIRONMENT != 'dev' steps: - name: Log in to GitHub Container Registry @@ -60,37 +82,45 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Tag for pre-release (latest-rc + version) - if: inputs.ENVIRONMENT == 'preprod' - run: | - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:latest-rc \ - ghcr.io/${{ github.repository }}:${{ inputs.VERSION_LABEL }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Download digest artifacts + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/digests + pattern: digests-* + merge-multiple: true - - name: Tag for release (latest + semver components) - if: inputs.ENVIRONMENT == 'prod' + - name: Create and push multi-arch manifest + working-directory: ${{ runner.temp }}/digests run: | - VERSION="${{ inputs.VERSION_LABEL }}" - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:latest \ - -t ghcr.io/${{ github.repository }}:${VERSION} \ - ghcr.io/${{ github.repository }}:${VERSION} - # Parse semver and create component tags - MAJOR=$(echo "${VERSION}" | cut -d. -f1) - MINOR=$(echo "${VERSION}" | cut -d. -f2) - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:${MAJOR} \ - ghcr.io/${{ github.repository }}:${VERSION} - docker buildx imagetools create \ - -t ghcr.io/${{ github.repository }}:${MAJOR}.${MINOR} \ - ghcr.io/${{ github.repository }}:${VERSION} + DIGEST_REFS=$(printf "${{ env.IMAGE }}@sha256:%s " *) + if [ "${{ inputs.ENVIRONMENT }}" = "dev" ]; then + docker buildx imagetools create -t ${{ env.IMAGE }}:latest-dev ${DIGEST_REFS} + elif [ "${{ inputs.ENVIRONMENT }}" = "preprod" ]; then + docker buildx imagetools create \ + -t ${{ env.IMAGE }}:${{ inputs.VERSION_LABEL }} \ + -t ${{ env.IMAGE }}:latest-rc \ + ${DIGEST_REFS} + else + VERSION="${{ inputs.VERSION_LABEL }}" + MAJOR=$(echo "${VERSION}" | cut -d. -f1) + MINOR=$(echo "${VERSION}" | cut -d. -f2) + docker buildx imagetools create \ + -t ${{ env.IMAGE }}:${VERSION} \ + -t ${{ env.IMAGE }}:latest \ + -t ${{ env.IMAGE }}:${MAJOR} \ + -t ${{ env.IMAGE }}:${MAJOR}.${MINOR} \ + ${DIGEST_REFS} + fi cleanup: runs-on: ubuntu-latest needs: - - build-and-push - - tagging - if: always() && !cancelled() && needs.build-and-push.result == 'success' + - build + - merge + if: always() && !cancelled() && needs.merge.result == 'success' steps: - name: Delete untagged images uses: Chizkiyahu/delete-untagged-ghcr-action@v6.1.0 diff --git a/Dockerfile b/Dockerfile index 69d02b1..8d4832b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,12 +8,11 @@ WORKDIR /app # Copy only the requirements file first to leverage Docker layer caching COPY requirements.txt . -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt +# Install dependencies (cache mount speeds up repeat builds) +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.txt # Copy the rest of the application files COPY . . -RUN pip install -r requirements.txt - CMD ["python", "-m", "src.main"] From 883a5d26a42bee48f39a62373c55d87deb4c0820 Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 16:56:03 +0000 Subject: [PATCH 84/93] ci: cancel other PR builds on push - Added concurrency settings to the pull request workflow to group and cancel in-progress jobs based on the pull request number, improving resource management and efficiency. --- .github/workflows/pull-request.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml index c0aeaf3..8ec7353 100644 --- a/.github/workflows/pull-request.yaml +++ b/.github/workflows/pull-request.yaml @@ -3,6 +3,10 @@ name: pull request on: pull_request: +concurrency: + group: pr-${{ github.event.pull_request.number }} + cancel-in-progress: true + jobs: UnitTest: uses: ./.github/workflows/unit-tests.yaml From a5124892dc365ace3440b354d289d1609322018d Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 16:50:25 +0000 Subject: [PATCH 85/93] feat: add support for ignoring specific portnums in API submissions - Updated .env.example to include IGNORE_PORTNUMS for configurable portnums to skip. - Refactored MeshtasticBot to utilize IGNORE_PORTNUMS, preventing API submissions for specified portnums. - Enhanced main.py to initialize ignore_portnums from environment variable. --- .env.example | 3 +++ src/bot.py | 26 +++++++++++++++++--------- src/main.py | 7 +++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 930ce9f..6139719 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,6 @@ STORAGE_API_VERSION=2 # Use this if you want to receive commands from the Meshflow server (e.g. traceroute) MESHFLOW_WS_URL=ws://localhost:8000 + +# Comma-separated portnums to skip when submitting packets to the API (e.g. custom or rejected ports) +IGNORE_PORTNUMS=345,ROUTING_APP diff --git a/src/bot.py b/src/bot.py index e6f7bc8..6000e7c 100644 --- a/src/bot.py +++ b/src/bot.py @@ -24,6 +24,7 @@ class MeshtasticBot: admin_nodes: list[str] + ignore_portnums: frozenset # Portnums to skip when submitting to API (from IGNORE_PORTNUMS env) interface: SupportsMessageReactionInterface init_complete: bool @@ -43,6 +44,7 @@ def __init__(self, address: str): self.address = address self.admin_nodes = [] + self.ignore_portnums = frozenset() self.interface = None self.init_complete = False @@ -176,15 +178,21 @@ def on_receive(self, packet: MeshPacket, interface): # dump the packet to disk (if enabled) dump_packet(packet) - for storage_api in self.storage_apis: - try: - storage_api.store_raw_packet(packet) - except HTTPError as ex: - logging.warning(f"Error storing packet: {ex.response.text}") - pass - except Exception as ex: - logging.warning(f"Error storing packet in API: {ex}") - pass + portnum = packet.get("decoded", {}).get("portnum", "unknown") + portnum_key = str(portnum).upper() + if self.ignore_portnums and portnum_key in self.ignore_portnums: + logging.info(f"Skipping API submission for packet with portnum {portnum} (in IGNORE_PORTNUMS)") + # Continue with node_info etc. below, just skip storage API + else: + for storage_api in self.storage_apis: + try: + storage_api.store_raw_packet(packet) + except HTTPError as ex: + logging.warning(f"Error storing packet: {ex.response.text}") + pass + except Exception as ex: + logging.warning(f"Error storing packet in API: {ex}") + pass sender = packet['fromId'] node = self.node_db.get_by_id(sender) diff --git a/src/main.py b/src/main.py index a6b5fee..69c3bb9 100644 --- a/src/main.py +++ b/src/main.py @@ -39,6 +39,12 @@ STORAGE_API_2_VERSION = int(os.getenv("STORAGE_API_2_VERSION", 1)) MESHFLOW_WS_URL = os.getenv("MESHFLOW_WS_URL") # e.g. ws://localhost:8000; derived from storage API if unset +# Comma-separated portnums to skip when submitting to API (e.g. 345,ROUTING_APP) +_ignore_portnums_raw = os.getenv("IGNORE_PORTNUMS", "") +IGNORE_PORTNUMS = frozenset( + p.strip().upper() for p in _ignore_portnums_raw.split(",") if p.strip() +) + def main(): # Ensure data dir exists @@ -53,6 +59,7 @@ def main(): # Connect to the Meshtastic node over WiFi bot = MeshtasticBot(MESHTASTIC_IP) + bot.ignore_portnums = IGNORE_PORTNUMS bot.admin_nodes = ADMIN_NODES bot.user_prefs_persistence = SqliteUserPrefsPersistence(str(user_prefs_file)) bot.command_logger = SqliteCommandLogger(str(command_log_file)) From b875dc330302b0a119666b3cdb5a4d707861d9fe Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 19:01:39 +0000 Subject: [PATCH 86/93] fix: enhance error handling and logging in StorageAPI and MeshtasticBot - Added checks for decoded data in MeshtasticBot to skip API submissions when necessary. - Improved error logging in StorageAPI to provide clearer messages for HTTP errors and general exceptions. - Ensured failed packets are dumped to a .json file upon encountering errors during storage. --- src/api/StorageAPI.py | 14 +++++++++++--- src/bot.py | 3 +++ test/test_bot.py | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index ff38e64..fb588ed 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -76,18 +76,26 @@ def store_raw_packet(self, packet: dict): logging.debug(f"Storing packet: {packet}") try: response = self._post(self._get_url('raw_packet'), json=packet) + + response_json = response.json() + return response_json except HTTPError as ex: - logging.error(f"Error storing packet: {ex.response.text}") + logging.error(f"HTTP error storing packet: {ex.response.text}") logging.error(f"Packet: {packet}") # Dump the packet to a .json file if self.failed_packets_dir: self._dump_failed_packet(packet, ex) return + except Exception as ex: + logging.error(f"Error storing packet: {ex}") + logging.error(f"Packet: {packet}") - logging.debug(f"Response: {response.json()}") + # Dump the packet to a .json file + if self.failed_packets_dir: + self._dump_failed_packet(packet, ex) - return response.json() + return def list_nodes(self) -> list[MeshNode]: """ diff --git a/src/bot.py b/src/bot.py index 6000e7c..6b073de 100644 --- a/src/bot.py +++ b/src/bot.py @@ -180,9 +180,12 @@ def on_receive(self, packet: MeshPacket, interface): portnum = packet.get("decoded", {}).get("portnum", "unknown") portnum_key = str(portnum).upper() + has_decoded = 'decoded' in packet or 'decrypted' in packet if self.ignore_portnums and portnum_key in self.ignore_portnums: logging.info(f"Skipping API submission for packet with portnum {portnum} (in IGNORE_PORTNUMS)") # Continue with node_info etc. below, just skip storage API + elif not has_decoded: + pass # Skip API submission for packets with no decoded data else: for storage_api in self.storage_apis: try: diff --git a/test/test_bot.py b/test/test_bot.py index cadbb00..d86c315 100644 --- a/test/test_bot.py +++ b/test/test_bot.py @@ -15,8 +15,8 @@ def setUp(self): def test_connect(self, mock_pub): self.bot.connect() self.bot.interface.connect.assert_called_once() + mock_pub.subscribe.assert_any_call(self.bot.on_receive, "meshtastic.receive") mock_pub.subscribe.assert_any_call(self.bot.on_receive_text, "meshtastic.receive.text") - mock_pub.subscribe.assert_any_call(self.bot.on_receive_user, "meshtastic.receive.user") mock_pub.subscribe.assert_any_call(self.bot.on_node_updated, "meshtastic.node.updated") mock_pub.subscribe.assert_any_call(self.bot.on_connection, "meshtastic.connection.established") From 9fd5af4a4e1dc1962bf4b300350c292abc07913d Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 22:17:16 +0000 Subject: [PATCH 87/93] ci: fix docker tag on release --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7b58496..78a702b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,7 +13,7 @@ jobs: steps: - name: Extract Release Tag id: get_tag - run: echo "VERSION_LABEL=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT" + run: echo "VERSION_LABEL=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT" outputs: VERSION_LABEL: ${{ steps.get_tag.outputs.VERSION_LABEL }} From 48fd9d9ab6ec287849aa0ac7d6c514b5349a0afb Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 22:11:37 +0000 Subject: [PATCH 88/93] feat: add TR_HOPS_LIMIT env var --- .env.example | 3 +++ src/traceroute.py | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index 6139719..d2adefa 100644 --- a/.env.example +++ b/.env.example @@ -17,3 +17,6 @@ MESHFLOW_WS_URL=ws://localhost:8000 # Comma-separated portnums to skip when submitting packets to the API (e.g. custom or rejected ports) IGNORE_PORTNUMS=345,ROUTING_APP + +# Tracerouteconfig +TR_HOPS_LIMIT=5 diff --git a/src/traceroute.py b/src/traceroute.py index 602cc33..18a4ac9 100644 --- a/src/traceroute.py +++ b/src/traceroute.py @@ -5,20 +5,31 @@ import logging from typing import TYPE_CHECKING +import os + if TYPE_CHECKING: from src.bot import MeshtasticBot logger = logging.getLogger(__name__) +TR_HOPS_LIMIT = int(os.getenv("TR_HOPS_LIMIT", '5')) +if TR_HOPS_LIMIT < 3: + logger.warning(f"TR_HOPS_LIMIT is less than 3, traceroutes are likely to fail. Capping at 3.") + TR_HOPS_LIMIT = 3 +elif TR_HOPS_LIMIT < 5: + logger.warning(f"TR_HOPS_LIMIT is less than 5, traceroutes are likely to fail") + +if TR_HOPS_LIMIT > 7: + logger.warning(f"TR_HOPS_LIMIT is greater than the Meshtastic limit of 7. Capping at 7.") + TR_HOPS_LIMIT = 7 -def on_traceroute_command(bot: "MeshtasticBot", target_node_id: int, hop_limit: int = 5, channel_index: int = 0): +def on_traceroute_command(bot: "MeshtasticBot", target_node_id: int, channel_index: int = 0): """ Send a traceroute request to the target node. Args: bot: The MeshtasticBot instance target_node_id: Target node ID (integer, e.g. 1623194643) - hop_limit: Maximum hops for the traceroute (default 5) channel_index: Channel index (default 0) """ if not bot.interface or not bot.init_complete: @@ -26,7 +37,7 @@ def on_traceroute_command(bot: "MeshtasticBot", target_node_id: int, hop_limit: return try: - bot.interface.sendTraceRoute(target_node_id, hop_limit, channelIndex=channel_index) + bot.interface.sendTraceRoute(target_node_id, TR_HOPS_LIMIT, channelIndex=channel_index) logger.info(f"Traceroute: sent to target={target_node_id}") except Exception as e: logger.error(f"Traceroute: failed to send to {target_node_id}: {e}") From e05fc7032d3115b5e06126ed9c6573f38e7d544d Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 22:27:49 +0000 Subject: [PATCH 89/93] feat: add TEXT_MESSAGE_MAX_HOPS env var - Added TEXT_MESSAGE_MAX_HOPS to .env.example for configurable max hops in text messages. - Updated base_feature.py to utilize TEXT_MESSAGE_MAX_HOPS, enforcing limits between 1 and 7. - Modified sendText calls to include hopLimit based on the new configuration. - Enhanced unit tests to verify hopLimit functionality in message sending. --- .env.example | 5 ++++- src/base_feature.py | 20 ++++++++++++++++++-- test/__init__.py | 4 +++- test/test_base_feature.py | 17 ++++++++++++----- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index d2adefa..6e76c66 100644 --- a/.env.example +++ b/.env.example @@ -18,5 +18,8 @@ MESHFLOW_WS_URL=ws://localhost:8000 # Comma-separated portnums to skip when submitting packets to the API (e.g. custom or rejected ports) IGNORE_PORTNUMS=345,ROUTING_APP -# Tracerouteconfig +# Traceroute config TR_HOPS_LIMIT=5 + +# Max hops for text messages sent by the bot (1-7, default 5) +TEXT_MESSAGE_MAX_HOPS=5 diff --git a/src/base_feature.py b/src/base_feature.py index 28d1e2a..27d6879 100644 --- a/src/base_feature.py +++ b/src/base_feature.py @@ -1,10 +1,19 @@ import logging +import os from abc import ABC from meshtastic.protobuf.mesh_pb2 import MeshPacket from src.bot import MeshtasticBot +TEXT_MESSAGE_MAX_HOPS = int(os.getenv("TEXT_MESSAGE_MAX_HOPS", "5")) +if TEXT_MESSAGE_MAX_HOPS < 1: + logging.warning("TEXT_MESSAGE_MAX_HOPS is less than 1, capping at 1.") + TEXT_MESSAGE_MAX_HOPS = 1 +elif TEXT_MESSAGE_MAX_HOPS > 7: + logging.warning("TEXT_MESSAGE_MAX_HOPS is greater than the Meshtastic limit of 7. Capping at 7.") + TEXT_MESSAGE_MAX_HOPS = 7 + class AbstractBaseFeature(ABC): """ @@ -27,7 +36,9 @@ def message_in_channel(self, channel: int, message: str, want_ack=False) -> None Send a message in a channel """ logging.debug(f"Sending message: '{message}'") - self.bot.interface.sendText(message, channelIndex=channel, wantAck=want_ack) + self.bot.interface.sendText( + message, channelIndex=channel, wantAck=want_ack, hopLimit=TEXT_MESSAGE_MAX_HOPS + ) def reply_in_dm(self, packet: MeshPacket, message: str, want_ack=False) -> None: """ @@ -41,7 +52,12 @@ def message_in_dm(self, destination_id: str, message: str, want_ack=False) -> No Reply in a direct message to a user """ logging.debug(f"Sending DM: '{message}'") - self.bot.interface.sendText(message, destinationId=destination_id, wantAck=want_ack) + self.bot.interface.sendText( + message, + destinationId=destination_id, + wantAck=want_ack, + hopLimit=TEXT_MESSAGE_MAX_HOPS, + ) def react_in_channel(self, packet: MeshPacket, emoji: str) -> None: """ diff --git a/test/__init__.py b/test/__init__.py index 1b6ba50..55313a0 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -30,6 +30,7 @@ def assert_message_sent(self, expected_response: str, to: MeshNode, want_ack: bo for call_args in self.mock_interface.sendText.call_args_list: if (call_args[1]['destinationId'] == to.user.id and call_args[1]['wantAck'] == want_ack + and call_args[1].get('hopLimit') == 5 and call_args[0][0].strip() == expected_response): return @@ -43,7 +44,8 @@ def assert_message_sent(self, expected_response: str, to: MeshNode, want_ack: bo self.mock_interface.sendText.assert_called_once_with( expected_response, destinationId=to.user.id, - wantAck=want_ack + wantAck=want_ack, + hopLimit=5, ) def assert_reaction_sent(self, emoji: str, reply_id: int, channel=0, sender_id: str = None): diff --git a/test/test_base_feature.py b/test/test_base_feature.py index c0910d9..57ce3ef 100644 --- a/test/test_base_feature.py +++ b/test/test_base_feature.py @@ -21,23 +21,30 @@ def test_reply_in_channel(self): sender = self.test_non_admin_nodes[1] packet = build_test_text_packet('!test', sender.user.id, self.bot.my_id, channel=1) self.feature.reply_in_channel(packet, "Test message") - self.mock_interface.sendText.assert_called_once_with("Test message", channelIndex=1, wantAck=False) + self.mock_interface.sendText.assert_called_once_with( + "Test message", channelIndex=1, wantAck=False, hopLimit=5 + ) def test_message_in_channel(self): self.feature.message_in_channel(1, "Test message") - self.mock_interface.sendText.assert_called_once_with("Test message", channelIndex=1, wantAck=False) + self.mock_interface.sendText.assert_called_once_with( + "Test message", channelIndex=1, wantAck=False, hopLimit=5 + ) def test_reply_in_dm(self): sender = self.test_non_admin_nodes[1] packet = build_test_text_packet('!test', sender.user.id, self.bot.my_id) self.feature.reply_in_dm(packet, "Test message") - self.mock_interface.sendText.assert_called_once_with("Test message", destinationId=sender.user.id, wantAck=False) + self.mock_interface.sendText.assert_called_once_with( + "Test message", destinationId=sender.user.id, wantAck=False, hopLimit=5 + ) def test_message_in_dm(self): sender = self.test_non_admin_nodes[1] self.feature.message_in_dm(sender.user.id, "Test message") - self.mock_interface.sendText.assert_called_once_with("Test message", destinationId=sender.user.id, - wantAck=False) + self.mock_interface.sendText.assert_called_once_with( + "Test message", destinationId=sender.user.id, wantAck=False, hopLimit=5 + ) def test_react_in_channel(self): sender = self.test_non_admin_nodes[1] From a7a4d7884fe474f9b249371c3abf16b0f8c498bc Mon Sep 17 00:00:00 2001 From: Patrick Skillen Date: Fri, 13 Mar 2026 23:22:06 +0000 Subject: [PATCH 90/93] feat: implement rate limiting for traceroute requests - Added TR_MIN_INTERVAL_SEC to .env.example to configure minimum interval between traceroute requests. - Updated traceroute.py to enforce rate limiting based on TR_MIN_INTERVAL_SEC, preventing rapid successive requests. - Refactored traceroute command handling to use sendData instead of sendTraceRoute for improved response handling. --- .env.example | 2 ++ src/traceroute.py | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 6e76c66..e88d6c2 100644 --- a/.env.example +++ b/.env.example @@ -20,6 +20,8 @@ IGNORE_PORTNUMS=345,ROUTING_APP # Traceroute config TR_HOPS_LIMIT=5 +# Min seconds between traceroutes (firmware enforces ~30s; we rate-limit client-side) +TR_MIN_INTERVAL_SEC=30 # Max hops for text messages sent by the bot (1-7, default 5) TEXT_MESSAGE_MAX_HOPS=5 diff --git a/src/traceroute.py b/src/traceroute.py index 18a4ac9..631ee69 100644 --- a/src/traceroute.py +++ b/src/traceroute.py @@ -3,15 +3,24 @@ """ import logging +import os +import threading +import time from typing import TYPE_CHECKING -import os +from meshtastic.protobuf import mesh_pb2, portnums_pb2 if TYPE_CHECKING: from src.bot import MeshtasticBot logger = logging.getLogger(__name__) +# Firmware enforces ~30s minimum between traceroutes. We rate-limit client-side to avoid +# sending requests the radio will reject (no ROUTING_APP packet). +TR_MIN_INTERVAL_SEC = int(os.getenv("TR_MIN_INTERVAL_SEC", "30")) +_last_tr_time: float = 0 +_tr_lock = threading.Lock() + TR_HOPS_LIMIT = int(os.getenv("TR_HOPS_LIMIT", '5')) if TR_HOPS_LIMIT < 3: logger.warning(f"TR_HOPS_LIMIT is less than 3, traceroutes are likely to fail. Capping at 3.") @@ -32,12 +41,36 @@ def on_traceroute_command(bot: "MeshtasticBot", target_node_id: int, channel_ind target_node_id: Target node ID (integer, e.g. 1623194643) channel_index: Channel index (default 0) """ + global _last_tr_time + if not bot.interface or not bot.init_complete: logger.warning("Traceroute: bot not connected, skipping") return + with _tr_lock: + now = time.monotonic() + elapsed = now - _last_tr_time + if elapsed < TR_MIN_INTERVAL_SEC: + logger.info( + f"Traceroute: rate limited (target={target_node_id}, " + f"{TR_MIN_INTERVAL_SEC - int(elapsed)}s remaining)" + ) + return + _last_tr_time = now + try: - bot.interface.sendTraceRoute(target_node_id, TR_HOPS_LIMIT, channelIndex=channel_index) + # Use sendData directly instead of sendTraceRoute: sendTraceRoute blocks until response + # (or timeout ~2min), causing a backlog when responses are slow/lost. TR responses + # arrive via meshtastic.receive and are handled by bot.on_receive. + r = mesh_pb2.RouteDiscovery() + bot.interface.sendData( + r, + destinationId=target_node_id, + portNum=portnums_pb2.PortNum.TRACEROUTE_APP, + wantResponse=True, + channelIndex=channel_index, + hopLimit=TR_HOPS_LIMIT, + ) logger.info(f"Traceroute: sent to target={target_node_id}") except Exception as e: logger.error(f"Traceroute: failed to send to {target_node_id}: {e}") From ef277b2b5b52cfb4cbd0d72bf69da4375ff3a82d Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Sat, 14 Mar 2026 16:46:51 +0000 Subject: [PATCH 91/93] feat: make TCP proxy cache sizes configurable via env vars --- .env.example | 2 ++ README.md | 2 ++ src/main.py | 6 +++++- src/tcp_proxy.py | 6 +++--- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.env.example b/.env.example index 051a1d7..f398baf 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,8 @@ STORAGE_API_TOKEN=... # Features ENABLE_TCP_PROXY=true +PROXY_HANDSHAKE_CACHE_SIZE=100 +PROXY_ROLLING_CACHE_SIZE=100 ENABLE_FEATURE_NODE_TOTALS=true FREQUENCY_OF_NODE_REPORTS=3 CHANNEL_FOR_NODE_TOTAL_BROADCAST=2 diff --git a/README.md b/README.md index d7a1c4c..f843d62 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,8 @@ You can enable or disable specific features and commands using environment varia ### Feature Toggles - `ENABLE_TCP_PROXY`: Set to `false` to disable the internal TCP proxy. The bot will connect directly to `MESHTASTIC_IP`. +- `PROXY_HANDSHAKE_CACHE_SIZE`: Number of initial packets to cache for connecting proxy clients (default `100`). +- `PROXY_ROLLING_CACHE_SIZE`: Number of recent packets to cache in a rolling queue for connecting proxy clients (default `100`). ### Command Toggles Set any of the following to `false` to disable the command and hide it from the `!help` menu: diff --git a/src/main.py b/src/main.py index e3cd660..466ab37 100644 --- a/src/main.py +++ b/src/main.py @@ -36,6 +36,8 @@ ADMIN_NODES = [node.strip() for node in admin_nodes_raw.split(',') if node.strip()] ENABLE_TCP_PROXY = get_env_bool("ENABLE_TCP_PROXY", True) +PROXY_HANDSHAKE_CACHE_SIZE = int(os.getenv("PROXY_HANDSHAKE_CACHE_SIZE", 100)) +PROXY_ROLLING_CACHE_SIZE = int(os.getenv("PROXY_ROLLING_CACHE_SIZE", 100)) DATA_DIR = os.getenv("DATA_DIR", "data") STORAGE_API_ROOT = os.getenv("STORAGE_API_ROOT") @@ -60,6 +62,8 @@ def main(): logging.info(f"--- Configuration ---") logging.info(f"MESHTASTIC_IP: {MESHTASTIC_IP}") logging.info(f"ENABLE_TCP_PROXY: {ENABLE_TCP_PROXY}") + logging.info(f"PROXY_HANDSHAKE_CACHE_SIZE: {PROXY_HANDSHAKE_CACHE_SIZE}") + logging.info(f"PROXY_ROLLING_CACHE_SIZE: {PROXY_ROLLING_CACHE_SIZE}") logging.info(f"ENABLE_FEATURE_NODE_TOTALS: {get_env_bool('ENABLE_FEATURE_NODE_TOTALS', True)}") logging.info(f"FREQUENCY_OF_NODE_REPORTS: {os.getenv('FREQUENCY_OF_NODE_REPORTS', '3')} hours") logging.info(f"CHANNEL_FOR_NODE_TOTAL_BROADCAST: {os.getenv('CHANNEL_FOR_NODE_TOTAL_BROADCAST', '2')}") @@ -74,7 +78,7 @@ def main(): if ENABLE_TCP_PROXY: # Start the TCP Proxy # It listens on 0.0.0.0:4403 and forwards to MESHTASTIC_IP:4403 - proxy = TcpProxy(target_host=MESHTASTIC_IP, target_port=4403, listen_host='0.0.0.0', listen_port=4403) + proxy = TcpProxy(target_host=MESHTASTIC_IP, target_port=4403, listen_host='0.0.0.0', listen_port=4403, handshake_cache_size=PROXY_HANDSHAKE_CACHE_SIZE, rolling_cache_size=PROXY_ROLLING_CACHE_SIZE) proxy.start() # Give the proxy a moment to bind to the port before the bot tries to connect diff --git a/src/tcp_proxy.py b/src/tcp_proxy.py index 9a5e50f..8a6e9ee 100644 --- a/src/tcp_proxy.py +++ b/src/tcp_proxy.py @@ -5,7 +5,7 @@ import threading class TcpProxy: - def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403): + def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_port=4403, handshake_cache_size=100, rolling_cache_size=100): self.target_host = target_host self.target_port = int(target_port) self.listen_host = listen_host @@ -22,8 +22,8 @@ def __init__(self, target_host, target_port=4403, listen_host='0.0.0.0', listen_ self.thread = None self.handshake_packets = [] - self.handshake_max_count = 50 - self.rolling_packets = deque(maxlen=50) + self.handshake_max_count = handshake_cache_size + self.rolling_packets = deque(maxlen=rolling_cache_size) self.last_target_activity = time.time() self.reconnecting = False From b25cc8f9d267fc0ebcb22e160988665fa1999a48 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Wed, 18 Mar 2026 10:04:28 +0000 Subject: [PATCH 92/93] Fix UnboundLocalError in StorageAPI response handling --- src/api/StorageAPI.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/api/StorageAPI.py b/src/api/StorageAPI.py index 3fec1da..c05d2d6 100644 --- a/src/api/StorageAPI.py +++ b/src/api/StorageAPI.py @@ -111,30 +111,24 @@ def store_raw_packet(self, packet: dict): try: response = self._post(self._get_url('raw_packet'), json=packet) - response_json = response.json() - return response_json + try: + response_json = response.json() + logging.info(f"API Response ({response.status_code}): {response_json}") + return response_json + except JSONDecodeError: + logging.info(f"API Response ({response.status_code}, not JSON): {response.text}") + return {'text': response.text} + except HTTPError as ex: logging.error(f"HTTP error storing packet: {ex.response.text}") logging.error(f"Packet: {packet}") - - # Dump the packet to a .json file if self.failed_packets_dir: self._dump_failed_packet(packet, ex) return + except Exception as ex: logging.error(f"Error storing packet: {ex}") logging.error(f"Packet: {packet}") - - try: - response_json = response.json() - logging.info(f"API Response ({response.status_code}): {response_json}") - return response_json - except JSONDecodeError: - logging.info(f"API Response ({response.status_code}, not JSON): {response.text}") - return {'text': response.text} - except Exception as ex: - logging.error(f"Error processing API response: {ex}") - # Dump the packet to a .json file if self.failed_packets_dir: self._dump_failed_packet(packet, ex) return From 551393872567df4a52291535bb314d1b72941fa3 Mon Sep 17 00:00:00 2001 From: Meshtastic Bot Date: Wed, 18 Mar 2026 10:13:55 +0000 Subject: [PATCH 93/93] Fix SQLite connection leak and improve IGNORE_PORTNUMS matching --- src/bot.py | 11 ++++++++++- src/main.py | 1 + src/persistence/__init__.py | 10 ++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/bot.py b/src/bot.py index 3638cec..f3ab458 100644 --- a/src/bot.py +++ b/src/bot.py @@ -310,9 +310,18 @@ def on_receive(self, packet: MeshPacket, interface): dump_packet(packet) portnum = packet.get("decoded", {}).get("portnum", "unknown") + # Ensure we check against both the string name and the integer ID if available portnum_key = str(portnum).upper() + has_decoded = 'decoded' in packet or 'decrypted' in packet - if self.ignore_portnums and portnum_key in self.ignore_portnums: + is_ignored = False + if self.ignore_portnums: + if portnum_key in self.ignore_portnums: + is_ignored = True + elif isinstance(portnum, int) and str(portnum) in self.ignore_portnums: + is_ignored = True + + if is_ignored: logging.info(f"Skipping API submission for packet with portnum {portnum} (in IGNORE_PORTNUMS)") elif not has_decoded: pass # Skip API submission for packets with no decoded data diff --git a/src/main.py b/src/main.py index 9a44b78..bfb6877 100644 --- a/src/main.py +++ b/src/main.py @@ -77,6 +77,7 @@ def main(): logging.info(f"CHANNEL_FOR_NODE_TOTAL_BROADCAST: {os.getenv('CHANNEL_FOR_NODE_TOTAL_BROADCAST', '2')}") logging.info(f"ENABLE_COMMAND_PING: {get_env_bool('ENABLE_COMMAND_PING', True)}") logging.info(f"ENABLE_COMMAND_TR: {get_env_bool('ENABLE_COMMAND_TR', True)}") + logging.info(f"IGNORE_PORTNUMS: {list(IGNORE_PORTNUMS)}") logging.info(f"STORAGE_API_ROOT: {STORAGE_API_ROOT}") if STORAGE_API_2_ROOT: logging.info(f"STORAGE_API_2_ROOT: {STORAGE_API_2_ROOT}") diff --git a/src/persistence/__init__.py b/src/persistence/__init__.py index f5209c9..2797e70 100644 --- a/src/persistence/__init__.py +++ b/src/persistence/__init__.py @@ -2,6 +2,7 @@ import logging import sqlite3 import threading +from contextlib import contextmanager from pathlib import Path @@ -18,9 +19,14 @@ def __init__(self, db_path: str): path_string = self.db_path logging.info(f"Connected to {self.__class__.__name__} DB at {path_string}") + @contextmanager def _get_connection(self): - """Returns a thread-safe sqlite3 connection.""" - return sqlite3.connect(self.db_path, check_same_thread=False) + """Returns a thread-safe sqlite3 connection and ensures it is closed.""" + conn = sqlite3.connect(self.db_path, check_same_thread=False) + try: + yield conn + finally: + conn.close() @abc.abstractmethod def _initialize_db(self):