fix(device-availability): fix timeout monitor and bridge-restart race conditions
- status_monitor: add availability_managed set; _monitor_loop skips devices in this set so the LWT/availability topic is the sole online/offline source - device_manager: register device with status_monitor.set_availability_managed() so the monitor actually skips them (previously the monitor had no knowledge of DeviceManager.availability_managed) - mqtt_bridge: remove blanket 'reset all devices to offline' on bridge restart; this was causing a race condition where the cron reset state AFTER the bridge had already sent device_online events via retained MQTT messages; stale running session cleanup is kept (still needed)
This commit is contained in:
parent
2fb45a6582
commit
0bce1e1bed
|
|
@ -191,17 +191,17 @@ class MqttBridge(models.Model):
|
|||
),
|
||||
})
|
||||
|
||||
# Bridge came back online → push fresh config and reset device states.
|
||||
# This ensures Odoo and Bridge are in sync after a restart.
|
||||
# Bridge came back online → push fresh config and close stale sessions.
|
||||
# Device states are NOT reset here - instead we rely on the bridge
|
||||
# sending device_online/device_offline events after reconnect.
|
||||
# (Resetting here would race with the device_online events that
|
||||
# the bridge sends immediately on startup via retained MQTT messages.)
|
||||
if was_offline:
|
||||
_logger.info(
|
||||
f"Bridge {bridge.name} came back online – pushing config and resetting device states"
|
||||
f"Bridge {bridge.name} came back online – pushing config and closing stale sessions"
|
||||
)
|
||||
# Reset stale device states to 'offline' so UI is consistent
|
||||
# until the bridge reports real events.
|
||||
devices = self.env['ows.mqtt.device'].sudo().search([('active', '=', True)])
|
||||
devices.write({'state': 'offline'})
|
||||
# Also close any stale 'running' sessions
|
||||
# Close any stale 'running' sessions - they couldn't have ended
|
||||
# cleanly while the bridge was offline
|
||||
stale_sessions = self.env['ows.mqtt.session'].sudo().search([
|
||||
('status', '=', 'running')
|
||||
])
|
||||
|
|
|
|||
|
|
@ -151,6 +151,8 @@ class DeviceManager:
|
|||
self.device_map[avail_topic] = device_id
|
||||
self.mqtt_client.subscribe(avail_topic)
|
||||
self.availability_managed.add(device_id)
|
||||
if self.status_monitor:
|
||||
self.status_monitor.set_availability_managed(device_id)
|
||||
logger.info("availability_topic_subscribed", device_id=device_id, topic=avail_topic)
|
||||
|
||||
status_topic = pcfg.get("status_topic", "")
|
||||
|
|
|
|||
|
|
@ -64,6 +64,9 @@ class DeviceStatusMonitor:
|
|||
self.devices: dict[str, DeviceStatus] = {}
|
||||
self.lock = threading.Lock()
|
||||
|
||||
# Devices managed by LWT/availability topic - timeout monitor skips them
|
||||
self.availability_managed: set[str] = set()
|
||||
|
||||
# Background thread
|
||||
self.monitor_thread: threading.Thread | None = None
|
||||
self.stop_flag = threading.Event()
|
||||
|
|
@ -152,6 +155,10 @@ class DeviceStatusMonitor:
|
|||
self._emit_device_online(device_id, now)
|
||||
self._save_status()
|
||||
|
||||
def set_availability_managed(self, device_id: str):
|
||||
"""Register a device as LWT-managed so the timeout monitor skips it."""
|
||||
self.availability_managed.add(device_id)
|
||||
|
||||
def mark_online_silent(self, device_id: str):
|
||||
"""Mark device as online and update last_seen WITHOUT emitting an event.
|
||||
|
||||
|
|
@ -232,6 +239,9 @@ class DeviceStatusMonitor:
|
|||
|
||||
with self.lock:
|
||||
for device_id, device in self.devices.items():
|
||||
# Skip devices whose online/offline is managed by LWT
|
||||
if device_id in self.availability_managed:
|
||||
continue
|
||||
# Check if device timed out
|
||||
if device.is_online:
|
||||
elapsed = now - device.last_seen
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user