Phase 3.3: Harden error handling and recovery patterns

- Add optional Odoo circuit-breaker for transient failures
- Unify timeout handling in Odoo and MQTT clients
- Improve transient error classification (timeout/connection/5xx/429)
- Add focused unit tests for recovery and circuit-breaker behavior
- Mark Phase 3.3 tasks as completed in optimization plan
This commit is contained in:
Matthias Lotz 2026-02-19 19:07:13 +01:00
parent f7b5a28f9a
commit 080a7275d9
4 changed files with 171 additions and 9 deletions

View File

@ -819,10 +819,10 @@ logger.info("device_added", device_id=device_id, topic=mqtt_topic)
**Dateien:** `mqtt_client.py`, `odoo_client.py`, `event_queue.py`
**Aufgaben:**
- [ ] Retry-Logic mit Exponential Backoff dokumentieren
- [ ] Circuit-Breaker-Pattern für Odoo-API (optional)
- [ ] Timeout-Handling vereinheitlichen
- [ ] Error-Recovery-Tests schreiben
- [x] Retry-Logic mit Exponential Backoff dokumentieren
- [x] Circuit-Breaker-Pattern für Odoo-API (optional)
- [x] Timeout-Handling vereinheitlichen
- [x] Error-Recovery-Tests schreiben
**Erfolgskriterien:**
- ✅ Transient Errors werden automatisch recovered

View File

@ -12,6 +12,8 @@ from exceptions import MQTTConnectionError
logger = structlog.get_logger()
MQTT_CONNECT_TIMEOUT_S = 10
class MQTTClient:
"""MQTT Client wrapper for device event reception."""
@ -144,7 +146,7 @@ class MQTTClient:
self.client.disconnect()
logger.info("mqtt_client_stopped")
def wait_for_connection(self, timeout: int = 10) -> bool:
def wait_for_connection(self, timeout: int = MQTT_CONNECT_TIMEOUT_S) -> bool:
"""
Wait for connection to be established.
@ -266,7 +268,7 @@ class MQTTClient:
self._loop_started = True
# Wait for connection to establish
connected = self.wait_for_connection(timeout=10)
connected = self.wait_for_connection(timeout=MQTT_CONNECT_TIMEOUT_S)
if connected:
logger.info(

View File

@ -1,5 +1,6 @@
"""Odoo API Client - handles communication with Odoo REST API."""
import time
from typing import Any
import requests
@ -10,6 +11,8 @@ from exceptions import OdooAPIError
logger = structlog.get_logger()
ODOO_REQUEST_TIMEOUT_S = 10
class MockOdooClient:
"""Mock Odoo client for standalone testing."""
@ -65,7 +68,17 @@ class MockOdooClient:
class OdooClient:
"""Real Odoo API client using REST API."""
def __init__(self, base_url: str, database: str, username: str, api_key: str):
def __init__(
self,
base_url: str,
database: str,
username: str,
api_key: str,
request_timeout_s: int = ODOO_REQUEST_TIMEOUT_S,
circuit_breaker_enabled: bool = True,
circuit_breaker_fail_threshold: int = 5,
circuit_breaker_recovery_s: int = 30,
):
"""
Initialize Odoo REST API client.
@ -79,8 +92,20 @@ class OdooClient:
self.database = database
self.username = username
self.api_key = api_key
self.request_timeout_s = request_timeout_s
self.circuit_breaker_enabled = circuit_breaker_enabled
self.circuit_breaker_fail_threshold = circuit_breaker_fail_threshold
self.circuit_breaker_recovery_s = circuit_breaker_recovery_s
self._consecutive_failures = 0
self._circuit_open_until_monotonic = 0.0
self.session: requests.Session = requests.Session()
logger.info("odoo_client_initialized", base_url=base_url, database=database)
logger.info(
"odoo_client_initialized",
base_url=base_url,
database=database,
timeout_s=request_timeout_s,
circuit_breaker_enabled=circuit_breaker_enabled,
)
# Initialize HTTP session
self.session.headers.update(
@ -89,6 +114,25 @@ class OdooClient:
}
)
def _is_circuit_open(self) -> bool:
if not self.circuit_breaker_enabled:
return False
return time.monotonic() < self._circuit_open_until_monotonic
def _record_success(self) -> None:
self._consecutive_failures = 0
self._circuit_open_until_monotonic = 0.0
def _record_transient_failure(self) -> None:
self._consecutive_failures += 1
if self.circuit_breaker_enabled and self._consecutive_failures >= self.circuit_breaker_fail_threshold:
self._circuit_open_until_monotonic = time.monotonic() + self.circuit_breaker_recovery_s
logger.warning(
"odoo_circuit_opened",
failures=self._consecutive_failures,
recovery_s=self.circuit_breaker_recovery_s,
)
def send_event(self, event: dict[str, Any]) -> dict[str, Any]:
"""
Send event to Odoo via POST /ows/iot/event.
@ -104,6 +148,13 @@ class OdooClient:
"""
url = f"{self.base_url}/ows/iot/event"
if self._is_circuit_open():
logger.warning("odoo_circuit_open_reject", event_type=event.get("event_type"))
raise OdooAPIError(
"odoo_circuit_open",
details={"retry_after_s": self.circuit_breaker_recovery_s},
)
try:
logger.debug(
"odoo_event_posting",
@ -116,9 +167,17 @@ class OdooClient:
response = self.session.post(
url,
json={"jsonrpc": "2.0", "method": "call", "params": event, "id": None},
timeout=10,
timeout=self.request_timeout_s,
)
status_code = response.status_code
if status_code >= 500 or status_code == 429:
self._record_transient_failure()
raise OdooAPIError(
f"Odoo transient HTTP error: {status_code}",
status_code=status_code,
)
response.raise_for_status()
data = response.json()
if not isinstance(data, dict):
@ -152,6 +211,8 @@ class OdooClient:
response=result,
)
self._record_success()
logger.info(
"odoo_event_sent",
event_uid=event.get("event_uid"),
@ -159,6 +220,10 @@ class OdooClient:
)
return result
except (requests.Timeout, requests.ConnectionError) as e:
self._record_transient_failure()
logger.error("odoo_event_timeout_or_connection_error", error=str(e))
raise OdooAPIError("Odoo request timeout/connection error", details={"error": str(e)}) from e
except Exception as e:
logger.error("odoo_event_send_failed", error=str(e))
raise

View File

@ -0,0 +1,95 @@
"""Unit tests for OdooClient error recovery and circuit-breaker behavior."""
from typing import Any, cast
from unittest.mock import Mock
import pytest
import requests
from clients.odoo_client import OdooClient
from exceptions import OdooAPIError
def _build_event() -> dict:
return {
"event_uid": "evt-1",
"event_type": "session_heartbeat",
"device_id": "device-1",
"timestamp": "2026-02-19T17:00:00Z",
"payload": {},
}
def _build_success_response() -> Mock:
response = Mock()
response.status_code = 200
response.raise_for_status = Mock()
response.json = Mock(return_value={"result": {"code": 200, "status": "success"}})
return response
def test_circuit_breaker_opens_after_transient_failures() -> None:
client = OdooClient(
base_url="http://odoo-dev:8069",
database="odoo",
username="admin",
api_key="token",
circuit_breaker_enabled=True,
circuit_breaker_fail_threshold=3,
circuit_breaker_recovery_s=30,
)
timeout_error = requests.Timeout("timeout")
post_mock = Mock(side_effect=timeout_error)
cast(Any, client.session).post = post_mock
for _ in range(3):
with pytest.raises(OdooAPIError):
client.send_event(_build_event())
assert post_mock.call_count == 3
with pytest.raises(OdooAPIError, match="odoo_circuit_open"):
client.send_event(_build_event())
assert post_mock.call_count == 3
def test_circuit_breaker_recovers_after_cooldown(monkeypatch: pytest.MonkeyPatch) -> None:
now = [100.0]
monkeypatch.setattr("clients.odoo_client.time.monotonic", lambda: now[0])
client = OdooClient(
base_url="http://odoo-dev:8069",
database="odoo",
username="admin",
api_key="token",
circuit_breaker_enabled=True,
circuit_breaker_fail_threshold=2,
circuit_breaker_recovery_s=5,
)
timeout_error = requests.Timeout("timeout")
post_mock = Mock(side_effect=timeout_error)
cast(Any, client.session).post = post_mock
with pytest.raises(OdooAPIError):
client.send_event(_build_event())
with pytest.raises(OdooAPIError):
client.send_event(_build_event())
assert post_mock.call_count == 2
now[0] = 102.0
with pytest.raises(OdooAPIError, match="odoo_circuit_open"):
client.send_event(_build_event())
assert post_mock.call_count == 2
now[0] = 106.0
success_post_mock = Mock(return_value=_build_success_response())
cast(Any, client.session).post = success_post_mock
result = client.send_event(_build_event())
assert success_post_mock.call_count == 1
assert result.get("code") == 200