feat: improve agent deletion handling by ignoring missing gateway agents

This commit is contained in:
Abhimanyu Saharan
2026-02-13 02:37:57 +05:30
parent 8db9f25db2
commit 303ce769a1
3 changed files with 177 additions and 7 deletions

View File

@@ -37,6 +37,15 @@ if TYPE_CHECKING:
from app.models.boards import Board from app.models.boards import Board
def _is_missing_gateway_agent_error(exc: OpenClawGatewayError) -> bool:
message = str(exc).lower()
if not message:
return False
if any(marker in message for marker in ("unknown agent", "no such agent", "agent does not exist")):
return True
return "agent" in message and "not found" in message
async def delete_board(session: AsyncSession, *, board: Board) -> OkResponse: async def delete_board(session: AsyncSession, *, board: Board) -> OkResponse:
"""Delete a board and all dependent records, cleaning gateway state when configured.""" """Delete a board and all dependent records, cleaning gateway state when configured."""
agents = await Agent.objects.filter_by(board_id=board.id).all(session) agents = await Agent.objects.filter_by(board_id=board.id).all(session)
@@ -46,17 +55,19 @@ async def delete_board(session: AsyncSession, *, board: Board) -> OkResponse:
gateway = await require_gateway_for_board(session, board, require_workspace_root=True) gateway = await require_gateway_for_board(session, board, require_workspace_root=True)
# Ensure URL is present (required for gateway cleanup calls). # Ensure URL is present (required for gateway cleanup calls).
gateway_client_config(gateway) gateway_client_config(gateway)
try: for agent in agents:
for agent in agents: try:
await OpenClawGatewayProvisioner().delete_agent_lifecycle( await OpenClawGatewayProvisioner().delete_agent_lifecycle(
agent=agent, agent=agent,
gateway=gateway, gateway=gateway,
) )
except OpenClawGatewayError as exc: except OpenClawGatewayError as exc:
raise HTTPException( if _is_missing_gateway_agent_error(exc):
status_code=status.HTTP_502_BAD_GATEWAY, continue
detail=f"Gateway cleanup failed: {exc}", raise HTTPException(
) from exc status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Gateway cleanup failed: {exc}",
) from exc
if task_ids: if task_ids:
await crud.delete_where( await crud.delete_where(

View File

@@ -3,6 +3,7 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from types import SimpleNamespace
from uuid import UUID, uuid4 from uuid import UUID, uuid4
import pytest import pytest
@@ -345,3 +346,92 @@ async def test_control_plane_upsert_agent_handles_already_exists(monkeypatch):
assert calls[0][0] == "agents.create" assert calls[0][0] == "agents.create"
assert calls[1][0] == "agents.update" assert calls[1][0] == "agents.update"
def test_is_missing_agent_error_matches_gateway_agent_not_found() -> None:
assert agent_provisioning._is_missing_agent_error(
agent_provisioning.OpenClawGatewayError('agent "mc-abc" not found'),
)
assert not agent_provisioning._is_missing_agent_error(
agent_provisioning.OpenClawGatewayError("dial tcp: connection refused"),
)
@pytest.mark.asyncio
async def test_delete_agent_lifecycle_ignores_missing_gateway_agent(monkeypatch) -> None:
class _ControlPlaneStub:
def __init__(self) -> None:
self.deleted_sessions: list[str] = []
async def delete_agent(self, agent_id: str, *, delete_files: bool = True) -> None:
_ = (agent_id, delete_files)
raise agent_provisioning.OpenClawGatewayError('agent "mc-abc" not found')
async def delete_agent_session(self, session_key: str) -> None:
self.deleted_sessions.append(session_key)
gateway = _GatewayStub(
id=uuid4(),
name="Acme",
url="ws://gateway.example/ws",
token=None,
workspace_root="/tmp/openclaw",
)
agent = SimpleNamespace(
id=uuid4(),
name="Worker",
board_id=uuid4(),
openclaw_session_id=None,
is_board_lead=False,
)
control_plane = _ControlPlaneStub()
monkeypatch.setattr(agent_provisioning, "_control_plane_for_gateway", lambda _g: control_plane)
await agent_provisioning.OpenClawGatewayProvisioner().delete_agent_lifecycle(
agent=agent, # type: ignore[arg-type]
gateway=gateway, # type: ignore[arg-type]
delete_files=True,
delete_session=True,
)
assert len(control_plane.deleted_sessions) == 1
@pytest.mark.asyncio
async def test_delete_agent_lifecycle_raises_on_non_missing_agent_error(monkeypatch) -> None:
class _ControlPlaneStub:
async def delete_agent(self, agent_id: str, *, delete_files: bool = True) -> None:
_ = (agent_id, delete_files)
raise agent_provisioning.OpenClawGatewayError("gateway timeout")
async def delete_agent_session(self, session_key: str) -> None:
_ = session_key
raise AssertionError("delete_agent_session should not be called")
gateway = _GatewayStub(
id=uuid4(),
name="Acme",
url="ws://gateway.example/ws",
token=None,
workspace_root="/tmp/openclaw",
)
agent = SimpleNamespace(
id=uuid4(),
name="Worker",
board_id=uuid4(),
openclaw_session_id=None,
is_board_lead=False,
)
monkeypatch.setattr(
agent_provisioning,
"_control_plane_for_gateway",
lambda _g: _ControlPlaneStub(),
)
with pytest.raises(agent_provisioning.OpenClawGatewayError):
await agent_provisioning.OpenClawGatewayProvisioner().delete_agent_lifecycle(
agent=agent, # type: ignore[arg-type]
gateway=gateway, # type: ignore[arg-type]
delete_files=True,
delete_session=True,
)

View File

@@ -4,13 +4,16 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from types import SimpleNamespace
from typing import Any from typing import Any
from uuid import uuid4 from uuid import uuid4
import pytest import pytest
from app.api import boards from app.api import boards
import app.services.board_lifecycle as board_lifecycle
from app.models.boards import Board from app.models.boards import Board
from app.services.openclaw.gateway_rpc import OpenClawGatewayError
_NO_EXEC_RESULTS_ERROR = "No more exec_results left for session.exec" _NO_EXEC_RESULTS_ERROR = "No more exec_results left for session.exec"
@@ -85,3 +88,69 @@ async def test_delete_board_cleans_tag_assignments_before_tasks() -> None:
deleted_table_names = [statement.table.name for statement in session.executed] deleted_table_names = [statement.table.name for statement in session.executed]
assert "tag_assignments" in deleted_table_names assert "tag_assignments" in deleted_table_names
assert deleted_table_names.index("tag_assignments") < deleted_table_names.index("tasks") assert deleted_table_names.index("tag_assignments") < deleted_table_names.index("tasks")
@pytest.mark.asyncio
async def test_delete_board_ignores_missing_gateway_agent(monkeypatch: pytest.MonkeyPatch) -> None:
"""Deleting a board should continue when gateway reports agent not found."""
session: Any = _FakeSession(exec_results=[[]])
board = Board(
id=uuid4(),
organization_id=uuid4(),
name="Demo Board",
slug="demo-board",
gateway_id=uuid4(),
)
agent = SimpleNamespace(id=uuid4(), board_id=board.id)
gateway = SimpleNamespace(url="ws://gateway.example/ws", token=None, workspace_root="/tmp")
called = {"delete_agent_lifecycle": 0}
async def _fake_all(_session: object) -> list[object]:
return [agent]
async def _fake_require_gateway_for_board(
_session: object,
_board: object,
*,
require_workspace_root: bool,
) -> object:
_ = require_workspace_root
return gateway
async def _fake_delete_agent_lifecycle(
_self: object,
*,
agent: object,
gateway: object,
delete_files: bool = True,
delete_session: bool = True,
) -> str | None:
_ = (agent, gateway, delete_files, delete_session)
called["delete_agent_lifecycle"] += 1
raise OpenClawGatewayError('agent "mc-worker" not found')
monkeypatch.setattr(
board_lifecycle.Agent,
"objects",
SimpleNamespace(filter_by=lambda **_kwargs: SimpleNamespace(all=_fake_all)),
)
monkeypatch.setattr(
board_lifecycle,
"require_gateway_for_board",
_fake_require_gateway_for_board,
)
monkeypatch.setattr(board_lifecycle, "gateway_client_config", lambda _gateway: None)
monkeypatch.setattr(
board_lifecycle.OpenClawGatewayProvisioner,
"delete_agent_lifecycle",
_fake_delete_agent_lifecycle,
)
await boards.delete_board(
session=session,
board=board,
)
assert called["delete_agent_lifecycle"] == 1
assert board in session.deleted
assert session.committed == 1