Phase 3.1: Type Safety - Add bridge_types.py for shared type aliases (EventDict, PowerWatts, Timestamp, DeviceID) - Define protocols for callbacks and message parsers - Strict type annotations on all core modules (session_detector, event_queue, device_manager) - Fix Optional handling and type guards throughout codebase - Achieve full mypy compliance: 0 errors across 47 source files Phase 3.2: Logging Unification - Migrate from stdlib logging to pure structlog across all runtime modules - Convert all logs to structured event+fields format (snake_case event names) - Remove f-string and printf-style logger calls - Add contextvars support for per-request correlation - Implement FastAPI middleware to bind request_id, http_method, http_path - Propagate X-Request-ID header in responses - Remove stdlib logging imports except setup layer (utils/logging.py) - Ensure log-level consistency across all modules Files Modified: - iot_bridge/bridge_types.py (new) - Central type definitions - iot_bridge/core/* - Type safety and logging unification - iot_bridge/clients/* - Structured logging with request context - iot_bridge/parsers/* - Type-safe parsing with structured logs - iot_bridge/utils/logging.py - Pure structlog setup with contextvars - iot_bridge/api/server.py - Added request correlation middleware - iot_bridge/tests/* - Test fixtures updated for type safety - iot_bridge/OPTIMIZATION_PLAN.md - Phase 3 status updated Validation: - mypy . → 0 errors (47 files) - All unit tests pass - Runtime behavior unchanged - API response headers include X-Request-ID
300 lines
10 KiB
Python
300 lines
10 KiB
Python
"""
|
|
Device Status Monitor - Tracks device online/offline status based on MQTT activity.
|
|
|
|
Monitors last_seen timestamps and emits device_online/device_offline events to Odoo.
|
|
"""
|
|
|
|
import json
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from collections.abc import Callable
|
|
from dataclasses import asdict, dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import structlog
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
@dataclass
|
|
class DeviceStatus:
|
|
"""Status information for a device."""
|
|
|
|
device_id: str
|
|
last_seen: float # Unix timestamp
|
|
is_online: bool
|
|
last_state_change: float # Unix timestamp of last online/offline transition
|
|
|
|
|
|
class DeviceStatusMonitor:
|
|
"""
|
|
Monitors device activity and tracks online/offline status.
|
|
|
|
- Tracks last_seen timestamp per device on every MQTT message
|
|
- Runs background thread that checks timeouts
|
|
- Emits device_online/device_offline events
|
|
- Persists status to /data/device_status.json for restart recovery
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
timeout_seconds: int = 30,
|
|
check_interval_seconds: int = 5,
|
|
persistence_path: str = "/data/device_status.json",
|
|
event_callback: Callable | None = None,
|
|
):
|
|
"""
|
|
Initialize Device Status Monitor.
|
|
|
|
Args:
|
|
timeout_seconds: Seconds without message before device is marked offline
|
|
check_interval_seconds: How often to check for timeouts
|
|
persistence_path: Path to persist device status
|
|
event_callback: Callback(event_dict) for device_online/offline events
|
|
"""
|
|
self.timeout_seconds = timeout_seconds
|
|
self.check_interval_seconds = check_interval_seconds
|
|
self.persistence_path = Path(persistence_path)
|
|
self.event_callback = event_callback
|
|
|
|
# Device status tracking: device_id -> DeviceStatus
|
|
self.devices: dict[str, DeviceStatus] = {}
|
|
self.lock = threading.Lock()
|
|
|
|
# Background thread
|
|
self.monitor_thread: threading.Thread | None = None
|
|
self.stop_flag = threading.Event()
|
|
|
|
# Load persisted status
|
|
self._load_status()
|
|
|
|
logger.info(
|
|
"device_status_monitor_initialized",
|
|
timeout_s=timeout_seconds,
|
|
check_interval_s=check_interval_seconds,
|
|
persistence_path=str(self.persistence_path),
|
|
)
|
|
|
|
def _load_status(self):
|
|
"""Load persisted device status from disk."""
|
|
if not self.persistence_path.exists():
|
|
logger.info("no_persisted_status_found", path=str(self.persistence_path))
|
|
return
|
|
|
|
try:
|
|
with open(self.persistence_path) as f:
|
|
data = json.load(f)
|
|
|
|
for device_id, status_dict in data.items():
|
|
self.devices[device_id] = DeviceStatus(**status_dict)
|
|
|
|
logger.info(
|
|
"device_status_loaded",
|
|
device_count=len(self.devices),
|
|
path=str(self.persistence_path),
|
|
)
|
|
except Exception as e:
|
|
logger.error("failed_to_load_device_status", error=str(e))
|
|
|
|
def _save_status(self):
|
|
"""Persist device status to disk."""
|
|
try:
|
|
# Ensure directory exists
|
|
self.persistence_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Convert to dict
|
|
data = {device_id: asdict(status) for device_id, status in self.devices.items()}
|
|
|
|
# Write atomically (write to temp, then rename)
|
|
temp_path = self.persistence_path.with_suffix(".tmp")
|
|
with open(temp_path, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
temp_path.replace(self.persistence_path)
|
|
|
|
logger.debug("device_status_saved", device_count=len(self.devices))
|
|
except Exception as e:
|
|
logger.error("failed_to_save_device_status", error=str(e))
|
|
|
|
def update_last_seen(self, device_id: str):
|
|
"""
|
|
Update last_seen timestamp for a device.
|
|
|
|
Called on every MQTT message. If device was offline, emit device_online event.
|
|
|
|
Args:
|
|
device_id: Device identifier
|
|
"""
|
|
now = time.time()
|
|
|
|
with self.lock:
|
|
if device_id not in self.devices:
|
|
# New device - mark as online
|
|
self.devices[device_id] = DeviceStatus(
|
|
device_id=device_id, last_seen=now, is_online=True, last_state_change=now
|
|
)
|
|
logger.info("device_registered", device_id=device_id)
|
|
self._emit_device_online(device_id, now)
|
|
self._save_status()
|
|
else:
|
|
# Existing device - update last_seen
|
|
device = self.devices[device_id]
|
|
device.last_seen = now
|
|
|
|
# If was offline, mark online and emit event
|
|
if not device.is_online:
|
|
device.is_online = True
|
|
device.last_state_change = now
|
|
logger.info("device_came_online", device_id=device_id)
|
|
self._emit_device_online(device_id, now)
|
|
self._save_status()
|
|
|
|
def _emit_device_online(self, device_id: str, timestamp: float):
|
|
"""Emit device_online event to Odoo."""
|
|
if not self.event_callback:
|
|
return
|
|
|
|
event = {
|
|
"event_uid": str(uuid.uuid4()),
|
|
"event_type": "device_online",
|
|
"device_id": device_id,
|
|
"timestamp": datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat(),
|
|
"payload": {
|
|
"last_seen": datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat(),
|
|
"reason": "activity_detected",
|
|
},
|
|
}
|
|
|
|
try:
|
|
self.event_callback(event)
|
|
except Exception as e:
|
|
logger.error("failed_to_emit_device_online", device_id=device_id, error=str(e))
|
|
|
|
def _emit_device_offline(self, device_id: str, timestamp: float, reason: str = "timeout"):
|
|
"""Emit device_offline event to Odoo."""
|
|
if not self.event_callback:
|
|
return
|
|
|
|
device = self.devices.get(device_id)
|
|
if not device:
|
|
return
|
|
|
|
event = {
|
|
"event_uid": str(uuid.uuid4()),
|
|
"event_type": "device_offline",
|
|
"device_id": device_id,
|
|
"timestamp": datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat(),
|
|
"payload": {
|
|
"last_seen": datetime.fromtimestamp(device.last_seen, tz=timezone.utc).isoformat(),
|
|
"reason": reason,
|
|
"offline_duration_s": int(timestamp - device.last_seen),
|
|
},
|
|
}
|
|
|
|
try:
|
|
self.event_callback(event)
|
|
except Exception as e:
|
|
logger.error("failed_to_emit_device_offline", device_id=device_id, error=str(e))
|
|
|
|
def _monitor_loop(self):
|
|
"""Background thread that checks for device timeouts."""
|
|
logger.info("device_status_monitor_started")
|
|
|
|
while not self.stop_flag.is_set():
|
|
try:
|
|
now = time.time()
|
|
|
|
with self.lock:
|
|
for device_id, device in self.devices.items():
|
|
# Check if device timed out
|
|
if device.is_online:
|
|
elapsed = now - device.last_seen
|
|
if elapsed > self.timeout_seconds:
|
|
# Mark offline
|
|
device.is_online = False
|
|
device.last_state_change = now
|
|
logger.warning(
|
|
"device_went_offline",
|
|
device_id=device_id,
|
|
elapsed_s=int(elapsed),
|
|
timeout_s=self.timeout_seconds,
|
|
)
|
|
self._emit_device_offline(device_id, now, reason="timeout")
|
|
self._save_status()
|
|
|
|
# Sleep with interruptible wait
|
|
self.stop_flag.wait(self.check_interval_seconds)
|
|
|
|
except Exception as e:
|
|
logger.error("monitor_loop_error", error=str(e))
|
|
self.stop_flag.wait(1)
|
|
|
|
logger.info("device_status_monitor_stopped")
|
|
|
|
def start(self):
|
|
"""Start the background monitoring thread."""
|
|
if self.monitor_thread and self.monitor_thread.is_alive():
|
|
logger.warning("monitor_already_running")
|
|
return
|
|
|
|
self.stop_flag.clear()
|
|
self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
|
self.monitor_thread.start()
|
|
logger.info("device_status_monitor_thread_started")
|
|
|
|
def stop(self):
|
|
"""Stop the background monitoring thread."""
|
|
if not self.monitor_thread:
|
|
return
|
|
|
|
logger.info("stopping_device_status_monitor")
|
|
self.stop_flag.set()
|
|
self.monitor_thread.join(timeout=self.check_interval_seconds + 5)
|
|
|
|
# Save final state
|
|
self._save_status()
|
|
|
|
logger.info("device_status_monitor_stopped")
|
|
|
|
def get_status(self, device_id: str) -> dict[str, Any] | None:
|
|
"""
|
|
Get status for a specific device.
|
|
|
|
Returns:
|
|
Dict with device status or None if not found
|
|
"""
|
|
with self.lock:
|
|
device = self.devices.get(device_id)
|
|
if not device:
|
|
return None
|
|
|
|
return {
|
|
"device_id": device.device_id,
|
|
"is_online": device.is_online,
|
|
"last_seen": datetime.fromtimestamp(device.last_seen, tz=timezone.utc).isoformat(),
|
|
"last_state_change": datetime.fromtimestamp(
|
|
device.last_state_change, tz=timezone.utc
|
|
).isoformat(),
|
|
"seconds_since_seen": int(time.time() - device.last_seen),
|
|
}
|
|
|
|
def get_all_status(self) -> dict[str, dict[str, Any]]:
|
|
"""Get status for all devices."""
|
|
with self.lock:
|
|
now = time.time()
|
|
return {
|
|
device_id: {
|
|
"device_id": device.device_id,
|
|
"is_online": device.is_online,
|
|
"last_seen": datetime.fromtimestamp(device.last_seen, tz=timezone.utc).isoformat(),
|
|
"last_state_change": datetime.fromtimestamp(
|
|
device.last_state_change, tz=timezone.utc
|
|
).isoformat(),
|
|
"seconds_since_seen": int(now - device.last_seen),
|
|
}
|
|
for device_id, device in self.devices.items()
|
|
}
|