refactor: update provisioning service references to use OpenClawGatewayProvisioner
This commit is contained in:
@@ -1,25 +1,23 @@
|
||||
"""Provisioning, template sync, and board-lead lifecycle orchestration."""
|
||||
"""Gateway-only provisioning and lifecycle orchestration.
|
||||
|
||||
This module is the low-level layer that talks to the OpenClaw gateway RPC surface.
|
||||
DB-backed workflows (template sync, lead-agent record creation) live in
|
||||
`app.services.openclaw.provisioning_db`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Awaitable, Callable
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, TypeVar
|
||||
from uuid import UUID, uuid4
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from uuid import uuid4
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape
|
||||
from sqlalchemy import func
|
||||
from sqlmodel import col, select
|
||||
|
||||
from app.core.agent_tokens import generate_agent_token, hash_agent_token, verify_agent_token
|
||||
from app.core.config import settings
|
||||
from app.core.time import utcnow
|
||||
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
|
||||
from app.integrations.openclaw_gateway import (
|
||||
OpenClawGatewayError,
|
||||
@@ -28,15 +26,9 @@ from app.integrations.openclaw_gateway import (
|
||||
send_message,
|
||||
)
|
||||
from app.models.agents import Agent
|
||||
from app.models.board_memory import BoardMemory
|
||||
from app.models.boards import Board
|
||||
from app.models.gateways import Gateway
|
||||
from app.schemas.gateways import GatewayTemplatesSyncError, GatewayTemplatesSyncResult
|
||||
from app.services.openclaw.constants import (
|
||||
_NON_TRANSIENT_GATEWAY_ERROR_MARKERS,
|
||||
_SECURE_RANDOM,
|
||||
_TOOLS_KV_RE,
|
||||
_TRANSIENT_GATEWAY_ERROR_MARKERS,
|
||||
DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY,
|
||||
DEFAULT_GATEWAY_FILES,
|
||||
DEFAULT_HEARTBEAT_CONFIG,
|
||||
@@ -50,11 +42,8 @@ from app.services.openclaw.constants import (
|
||||
)
|
||||
from app.services.openclaw.internal import agent_key as _agent_key
|
||||
from app.services.openclaw.shared import GatewayAgentIdentity
|
||||
from app.services.organizations import get_org_owner_user
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
from app.models.users import User
|
||||
|
||||
|
||||
@@ -64,7 +53,21 @@ class ProvisionOptions:
|
||||
|
||||
action: str = "provision"
|
||||
force_bootstrap: bool = False
|
||||
reset_session: bool = False
|
||||
|
||||
|
||||
def _is_missing_session_error(exc: OpenClawGatewayError) -> bool:
|
||||
message = str(exc).lower()
|
||||
if not message:
|
||||
return False
|
||||
return any(
|
||||
marker in message
|
||||
for marker in (
|
||||
"not found",
|
||||
"unknown session",
|
||||
"no such session",
|
||||
"session does not exist",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _repo_root() -> Path:
|
||||
@@ -295,9 +298,11 @@ def _render_agent_files(
|
||||
else _heartbeat_template_name(agent)
|
||||
)
|
||||
heartbeat_path = _templates_root() / heartbeat_template
|
||||
if heartbeat_path.exists():
|
||||
rendered[name] = env.get_template(heartbeat_template).render(**context).strip()
|
||||
continue
|
||||
if not heartbeat_path.exists():
|
||||
msg = f"Missing template file: {heartbeat_template}"
|
||||
raise FileNotFoundError(msg)
|
||||
rendered[name] = env.get_template(heartbeat_template).render(**context).strip()
|
||||
continue
|
||||
override = overrides.get(name)
|
||||
if override:
|
||||
rendered[name] = env.from_string(override).render(**context).strip()
|
||||
@@ -306,14 +311,10 @@ def _render_agent_files(
|
||||
template_overrides[name] if template_overrides and name in template_overrides else name
|
||||
)
|
||||
path = _templates_root() / template_name
|
||||
if path.exists():
|
||||
rendered[name] = env.get_template(template_name).render(**context).strip()
|
||||
continue
|
||||
if name == "MEMORY.md":
|
||||
# Back-compat fallback for gateways that do not ship MEMORY.md.
|
||||
rendered[name] = "# MEMORY\n\nBootstrap pending.\n"
|
||||
continue
|
||||
rendered[name] = ""
|
||||
if not path.exists():
|
||||
msg = f"Missing template file: {template_name}"
|
||||
raise FileNotFoundError(msg)
|
||||
rendered[name] = env.get_template(template_name).render(**context).strip()
|
||||
return rendered
|
||||
|
||||
|
||||
@@ -330,6 +331,10 @@ class GatewayAgentRegistration:
|
||||
class GatewayControlPlane(ABC):
|
||||
"""Abstract gateway runtime interface used by agent lifecycle managers."""
|
||||
|
||||
@abstractmethod
|
||||
async def health(self) -> object:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def ensure_agent_session(self, session_key: str, *, label: str | None = None) -> None:
|
||||
raise NotImplementedError
|
||||
@@ -354,6 +359,10 @@ class GatewayControlPlane(ABC):
|
||||
async def list_agent_files(self, agent_id: str) -> dict[str, dict[str, Any]]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_agent_file_payload(self, *, agent_id: str, name: str) -> object:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def set_agent_file(self, *, agent_id: str, name: str, content: str) -> None:
|
||||
raise NotImplementedError
|
||||
@@ -372,6 +381,9 @@ class OpenClawGatewayControlPlane(GatewayControlPlane):
|
||||
def __init__(self, config: GatewayClientConfig) -> None:
|
||||
self._config = config
|
||||
|
||||
async def health(self) -> object:
|
||||
return await openclaw_call("health", config=self._config)
|
||||
|
||||
async def ensure_agent_session(self, session_key: str, *, label: str | None = None) -> None:
|
||||
if not session_key:
|
||||
return
|
||||
@@ -389,7 +401,7 @@ class OpenClawGatewayControlPlane(GatewayControlPlane):
|
||||
|
||||
async def upsert_agent(self, registration: GatewayAgentRegistration) -> None:
|
||||
# Prefer an idempotent "create then update" flow.
|
||||
# - Avoids a dependency on `agents.list` (which may surface gateway defaults like `main`).
|
||||
# - Avoids enumerating gateway agents for existence checks.
|
||||
# - Ensures we always hit the "create" RPC first, per lifecycle expectations.
|
||||
try:
|
||||
await openclaw_call(
|
||||
@@ -402,7 +414,9 @@ class OpenClawGatewayControlPlane(GatewayControlPlane):
|
||||
)
|
||||
except OpenClawGatewayError as exc:
|
||||
message = str(exc).lower()
|
||||
if not any(marker in message for marker in ("already", "exist", "duplicate", "conflict")):
|
||||
if not any(
|
||||
marker in message for marker in ("already", "exist", "duplicate", "conflict")
|
||||
):
|
||||
raise
|
||||
await openclaw_call(
|
||||
"agents.update",
|
||||
@@ -446,6 +460,13 @@ class OpenClawGatewayControlPlane(GatewayControlPlane):
|
||||
index[name] = dict(item)
|
||||
return index
|
||||
|
||||
async def get_agent_file_payload(self, *, agent_id: str, name: str) -> object:
|
||||
return await openclaw_call(
|
||||
"agents.files.get",
|
||||
{"agentId": agent_id, "name": name},
|
||||
config=self._config,
|
||||
)
|
||||
|
||||
async def set_agent_file(self, *, agent_id: str, name: str, content: str) -> None:
|
||||
await openclaw_call(
|
||||
"agents.files.set",
|
||||
@@ -654,10 +675,6 @@ class BaseAgentLifecycleManager(ABC):
|
||||
existing_files=existing_files,
|
||||
action=options.action,
|
||||
)
|
||||
if options.reset_session:
|
||||
# Session resets are useful but should never block file sync.
|
||||
with suppress(OpenClawGatewayError):
|
||||
await self._control_plane.reset_agent_session(session_key)
|
||||
|
||||
|
||||
class BoardAgentLifecycleManager(BaseAgentLifecycleManager):
|
||||
@@ -752,21 +769,8 @@ def _wakeup_text(agent: Agent, *, verb: str) -> str:
|
||||
)
|
||||
|
||||
|
||||
class OpenClawProvisioningService:
|
||||
"""High-level agent provisioning interface (create -> files -> wake).
|
||||
|
||||
This is the public entrypoint for agent lifecycle orchestration. Internals are
|
||||
implemented as module-private helpers and lifecycle manager classes.
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession | None = None) -> None:
|
||||
self._session = session
|
||||
|
||||
def _require_session(self) -> AsyncSession:
|
||||
if self._session is None:
|
||||
msg = "AsyncSession is required for this operation"
|
||||
raise ValueError(msg)
|
||||
return self._session
|
||||
class OpenClawGatewayProvisioner:
|
||||
"""Gateway-only agent lifecycle interface (create -> files -> wake)."""
|
||||
|
||||
async def sync_gateway_agent_heartbeats(self, gateway: Gateway, agents: list[Agent]) -> None:
|
||||
"""Sync current Agent.heartbeat_config values to the gateway config."""
|
||||
@@ -807,7 +811,8 @@ class OpenClawProvisioningService:
|
||||
"""
|
||||
|
||||
if not gateway.url:
|
||||
return
|
||||
msg = "Gateway url is required"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Guard against accidental main-agent provisioning without a board.
|
||||
if board is None and getattr(agent, "board_id", None) is not None:
|
||||
@@ -816,7 +821,9 @@ class OpenClawProvisioningService:
|
||||
|
||||
# Resolve session key and agent type.
|
||||
if board is None:
|
||||
session_key = (agent.openclaw_session_id or GatewayAgentIdentity.session_key(gateway) or "").strip()
|
||||
session_key = (
|
||||
agent.openclaw_session_id or GatewayAgentIdentity.session_key(gateway) or ""
|
||||
).strip()
|
||||
if not session_key:
|
||||
msg = "gateway main agent session_key is required"
|
||||
raise ValueError(msg)
|
||||
@@ -833,17 +840,16 @@ class OpenClawProvisioningService:
|
||||
session_key=session_key,
|
||||
auth_token=auth_token,
|
||||
user=user,
|
||||
options=ProvisionOptions(
|
||||
action=action,
|
||||
force_bootstrap=force_bootstrap,
|
||||
reset_session=False, # handled below
|
||||
),
|
||||
options=ProvisionOptions(action=action, force_bootstrap=force_bootstrap),
|
||||
session_label=agent.name or "Gateway Agent",
|
||||
)
|
||||
|
||||
if reset_session:
|
||||
with suppress(OpenClawGatewayError):
|
||||
try:
|
||||
await control_plane.reset_agent_session(session_key)
|
||||
except OpenClawGatewayError as exc:
|
||||
if not _is_missing_session_error(exc):
|
||||
raise
|
||||
|
||||
if not wake:
|
||||
return
|
||||
@@ -869,7 +875,8 @@ class OpenClawProvisioningService:
|
||||
"""Remove agent runtime state from the gateway (agent + optional session)."""
|
||||
|
||||
if not gateway.url:
|
||||
return None
|
||||
msg = "Gateway url is required"
|
||||
raise ValueError(msg)
|
||||
if not gateway.workspace_root:
|
||||
msg = "gateway_workspace_root is required"
|
||||
raise ValueError(msg)
|
||||
@@ -885,671 +892,16 @@ class OpenClawProvisioningService:
|
||||
|
||||
if delete_session:
|
||||
if agent.board_id is None:
|
||||
session_key = (agent.openclaw_session_id or GatewayAgentIdentity.session_key(gateway) or "").strip()
|
||||
session_key = (
|
||||
agent.openclaw_session_id or GatewayAgentIdentity.session_key(gateway) or ""
|
||||
).strip()
|
||||
else:
|
||||
session_key = _session_key(agent)
|
||||
if session_key:
|
||||
with suppress(OpenClawGatewayError):
|
||||
try:
|
||||
await control_plane.delete_agent_session(session_key)
|
||||
except OpenClawGatewayError as exc:
|
||||
if not _is_missing_session_error(exc):
|
||||
raise
|
||||
|
||||
return workspace_path
|
||||
|
||||
async def sync_gateway_templates(
|
||||
self,
|
||||
gateway: Gateway,
|
||||
options: GatewayTemplateSyncOptions,
|
||||
) -> GatewayTemplatesSyncResult:
|
||||
"""Synchronize AGENTS/TOOLS/etc templates to gateway-connected agents."""
|
||||
session = self._require_session()
|
||||
template_user = options.user
|
||||
if template_user is None:
|
||||
template_user = await get_org_owner_user(
|
||||
session,
|
||||
organization_id=gateway.organization_id,
|
||||
)
|
||||
options = GatewayTemplateSyncOptions(
|
||||
user=template_user,
|
||||
include_main=options.include_main,
|
||||
reset_sessions=options.reset_sessions,
|
||||
rotate_tokens=options.rotate_tokens,
|
||||
force_bootstrap=options.force_bootstrap,
|
||||
board_id=options.board_id,
|
||||
)
|
||||
result = _base_result(
|
||||
gateway,
|
||||
include_main=options.include_main,
|
||||
reset_sessions=options.reset_sessions,
|
||||
)
|
||||
if not gateway.url:
|
||||
_append_sync_error(
|
||||
result,
|
||||
message="Gateway URL is not configured for this gateway.",
|
||||
)
|
||||
return result
|
||||
|
||||
ctx = _SyncContext(
|
||||
session=session,
|
||||
gateway=gateway,
|
||||
config=GatewayClientConfig(url=gateway.url, token=gateway.token),
|
||||
backoff=_GatewayBackoff(timeout_s=10 * 60, timeout_context="template sync"),
|
||||
options=options,
|
||||
provisioner=self,
|
||||
)
|
||||
if not await _ping_gateway(ctx, result):
|
||||
return result
|
||||
|
||||
boards = await Board.objects.filter_by(gateway_id=gateway.id).all(session)
|
||||
boards_by_id = _boards_by_id(boards, board_id=options.board_id)
|
||||
if boards_by_id is None:
|
||||
_append_sync_error(
|
||||
result,
|
||||
message="Board does not belong to this gateway.",
|
||||
)
|
||||
return result
|
||||
paused_board_ids = await _paused_board_ids(session, list(boards_by_id.keys()))
|
||||
if boards_by_id:
|
||||
agents = await (
|
||||
Agent.objects.by_field_in("board_id", list(boards_by_id.keys()))
|
||||
.order_by(col(Agent.created_at).asc())
|
||||
.all(session)
|
||||
)
|
||||
else:
|
||||
agents = []
|
||||
|
||||
stop_sync = False
|
||||
for agent in agents:
|
||||
board = boards_by_id.get(agent.board_id) if agent.board_id is not None else None
|
||||
if board is None:
|
||||
result.agents_skipped += 1
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=agent,
|
||||
message="Skipping agent: board not found for agent.",
|
||||
)
|
||||
continue
|
||||
if board.id in paused_board_ids:
|
||||
result.agents_skipped += 1
|
||||
continue
|
||||
stop_sync = await _sync_one_agent(ctx, result, agent, board)
|
||||
if stop_sync:
|
||||
break
|
||||
|
||||
if not stop_sync and options.include_main:
|
||||
await _sync_main_agent(ctx, result)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def lead_session_key(board: Board) -> str:
|
||||
"""Return the deterministic session key for a board lead agent."""
|
||||
return f"agent:lead-{board.id}:main"
|
||||
|
||||
@staticmethod
|
||||
def lead_agent_name(_: Board) -> str:
|
||||
"""Return the default display name for board lead agents."""
|
||||
return "Lead Agent"
|
||||
|
||||
async def ensure_board_lead_agent(
|
||||
self,
|
||||
*,
|
||||
request: LeadAgentRequest,
|
||||
) -> tuple[Agent, bool]:
|
||||
"""Ensure a board has a lead agent; return `(agent, created)`."""
|
||||
session = self._require_session()
|
||||
board = request.board
|
||||
config_options = request.options
|
||||
existing = (
|
||||
await session.exec(
|
||||
select(Agent)
|
||||
.where(Agent.board_id == board.id)
|
||||
.where(col(Agent.is_board_lead).is_(True)),
|
||||
)
|
||||
).first()
|
||||
if existing:
|
||||
desired_name = config_options.agent_name or self.lead_agent_name(board)
|
||||
changed = False
|
||||
if existing.name != desired_name:
|
||||
existing.name = desired_name
|
||||
changed = True
|
||||
if existing.gateway_id != request.gateway.id:
|
||||
existing.gateway_id = request.gateway.id
|
||||
changed = True
|
||||
desired_session_key = self.lead_session_key(board)
|
||||
if existing.openclaw_session_id != desired_session_key:
|
||||
existing.openclaw_session_id = desired_session_key
|
||||
changed = True
|
||||
if changed:
|
||||
existing.updated_at = utcnow()
|
||||
session.add(existing)
|
||||
await session.commit()
|
||||
await session.refresh(existing)
|
||||
return existing, False
|
||||
|
||||
merged_identity_profile: dict[str, Any] = {
|
||||
"role": "Board Lead",
|
||||
"communication_style": "direct, concise, practical",
|
||||
"emoji": ":gear:",
|
||||
}
|
||||
if config_options.identity_profile:
|
||||
merged_identity_profile.update(
|
||||
{
|
||||
key: value.strip()
|
||||
for key, value in config_options.identity_profile.items()
|
||||
if value.strip()
|
||||
},
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
name=config_options.agent_name or self.lead_agent_name(board),
|
||||
status="provisioning",
|
||||
board_id=board.id,
|
||||
gateway_id=request.gateway.id,
|
||||
is_board_lead=True,
|
||||
heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(),
|
||||
identity_profile=merged_identity_profile,
|
||||
openclaw_session_id=self.lead_session_key(board),
|
||||
provision_requested_at=utcnow(),
|
||||
provision_action=config_options.action,
|
||||
)
|
||||
raw_token = generate_agent_token()
|
||||
agent.agent_token_hash = hash_agent_token(raw_token)
|
||||
session.add(agent)
|
||||
await session.commit()
|
||||
await session.refresh(agent)
|
||||
|
||||
try:
|
||||
await self.apply_agent_lifecycle(
|
||||
agent=agent,
|
||||
gateway=request.gateway,
|
||||
board=board,
|
||||
auth_token=raw_token,
|
||||
user=request.user,
|
||||
action=config_options.action,
|
||||
wake=True,
|
||||
deliver_wakeup=True,
|
||||
)
|
||||
except OpenClawGatewayError:
|
||||
# Best-effort provisioning. The board/agent rows should still exist.
|
||||
pass
|
||||
|
||||
return agent, True
|
||||
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GatewayTemplateSyncOptions:
|
||||
"""Runtime options controlling gateway template synchronization."""
|
||||
|
||||
user: User | None
|
||||
include_main: bool = True
|
||||
reset_sessions: bool = False
|
||||
rotate_tokens: bool = False
|
||||
force_bootstrap: bool = False
|
||||
board_id: UUID | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _SyncContext:
|
||||
"""Shared state passed to sync helper functions."""
|
||||
|
||||
session: AsyncSession
|
||||
gateway: Gateway
|
||||
config: GatewayClientConfig
|
||||
backoff: _GatewayBackoff
|
||||
options: GatewayTemplateSyncOptions
|
||||
provisioner: OpenClawProvisioningService
|
||||
|
||||
|
||||
def _is_transient_gateway_error(exc: Exception) -> bool:
|
||||
if not isinstance(exc, OpenClawGatewayError):
|
||||
return False
|
||||
message = str(exc).lower()
|
||||
if not message:
|
||||
return False
|
||||
if any(marker in message for marker in _NON_TRANSIENT_GATEWAY_ERROR_MARKERS):
|
||||
return False
|
||||
return ("503" in message and "websocket" in message) or any(
|
||||
marker in message for marker in _TRANSIENT_GATEWAY_ERROR_MARKERS
|
||||
)
|
||||
|
||||
|
||||
def _gateway_timeout_message(
|
||||
exc: OpenClawGatewayError,
|
||||
*,
|
||||
timeout_s: float,
|
||||
context: str,
|
||||
) -> str:
|
||||
rounded_timeout = int(timeout_s)
|
||||
timeout_text = f"{rounded_timeout} seconds"
|
||||
if rounded_timeout >= 120:
|
||||
timeout_text = f"{rounded_timeout // 60} minutes"
|
||||
return f"Gateway unreachable after {timeout_text} ({context} timeout). Last error: {exc}"
|
||||
|
||||
|
||||
class _GatewayBackoff:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
timeout_s: float = 10 * 60,
|
||||
base_delay_s: float = 0.75,
|
||||
max_delay_s: float = 30.0,
|
||||
jitter: float = 0.2,
|
||||
timeout_context: str = "gateway operation",
|
||||
) -> None:
|
||||
self._timeout_s = timeout_s
|
||||
self._base_delay_s = base_delay_s
|
||||
self._max_delay_s = max_delay_s
|
||||
self._jitter = jitter
|
||||
self._timeout_context = timeout_context
|
||||
self._delay_s = base_delay_s
|
||||
|
||||
def reset(self) -> None:
|
||||
self._delay_s = self._base_delay_s
|
||||
|
||||
@staticmethod
|
||||
async def _attempt(
|
||||
fn: Callable[[], Awaitable[_T]],
|
||||
) -> tuple[_T | None, OpenClawGatewayError | None]:
|
||||
try:
|
||||
return await fn(), None
|
||||
except OpenClawGatewayError as exc:
|
||||
return None, exc
|
||||
|
||||
async def run(self, fn: Callable[[], Awaitable[_T]]) -> _T:
|
||||
# Use per-call deadlines so long-running syncs can still tolerate a later
|
||||
# gateway restart without having an already-expired retry window.
|
||||
deadline_s = asyncio.get_running_loop().time() + self._timeout_s
|
||||
while True:
|
||||
value, error = await self._attempt(fn)
|
||||
if error is not None:
|
||||
exc = error
|
||||
if not _is_transient_gateway_error(exc):
|
||||
raise exc
|
||||
now = asyncio.get_running_loop().time()
|
||||
remaining = deadline_s - now
|
||||
if remaining <= 0:
|
||||
raise TimeoutError(
|
||||
_gateway_timeout_message(
|
||||
exc,
|
||||
timeout_s=self._timeout_s,
|
||||
context=self._timeout_context,
|
||||
),
|
||||
) from exc
|
||||
|
||||
sleep_s = min(self._delay_s, remaining)
|
||||
if self._jitter:
|
||||
sleep_s *= 1.0 + _SECURE_RANDOM.uniform(
|
||||
-self._jitter,
|
||||
self._jitter,
|
||||
)
|
||||
sleep_s = max(0.0, min(sleep_s, remaining))
|
||||
await asyncio.sleep(sleep_s)
|
||||
self._delay_s = min(self._delay_s * 2.0, self._max_delay_s)
|
||||
continue
|
||||
self.reset()
|
||||
if value is None:
|
||||
msg = "Gateway retry produced no value without an error"
|
||||
raise RuntimeError(msg)
|
||||
return value
|
||||
|
||||
|
||||
async def _with_gateway_retry(
|
||||
fn: Callable[[], Awaitable[_T]],
|
||||
*,
|
||||
backoff: _GatewayBackoff,
|
||||
) -> _T:
|
||||
return await backoff.run(fn)
|
||||
|
||||
|
||||
def _parse_tools_md(content: str) -> dict[str, str]:
|
||||
values: dict[str, str] = {}
|
||||
for raw in content.splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
match = _TOOLS_KV_RE.match(line)
|
||||
if not match:
|
||||
continue
|
||||
values[match.group("key")] = match.group("value").strip()
|
||||
return values
|
||||
|
||||
|
||||
async def _get_agent_file(
|
||||
*,
|
||||
agent_gateway_id: str,
|
||||
name: str,
|
||||
config: GatewayClientConfig,
|
||||
backoff: _GatewayBackoff | None = None,
|
||||
) -> str | None:
|
||||
try:
|
||||
|
||||
async def _do_get() -> object:
|
||||
return await openclaw_call(
|
||||
"agents.files.get",
|
||||
{"agentId": agent_gateway_id, "name": name},
|
||||
config=config,
|
||||
)
|
||||
|
||||
payload = await (backoff.run(_do_get) if backoff else _do_get())
|
||||
except OpenClawGatewayError:
|
||||
return None
|
||||
if isinstance(payload, str):
|
||||
return payload
|
||||
if isinstance(payload, dict):
|
||||
content = payload.get("content")
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
file_obj = payload.get("file")
|
||||
if isinstance(file_obj, dict):
|
||||
nested = file_obj.get("content")
|
||||
if isinstance(nested, str):
|
||||
return nested
|
||||
return None
|
||||
|
||||
|
||||
async def _get_existing_auth_token(
|
||||
*,
|
||||
agent_gateway_id: str,
|
||||
config: GatewayClientConfig,
|
||||
backoff: _GatewayBackoff | None = None,
|
||||
) -> str | None:
|
||||
tools = await _get_agent_file(
|
||||
agent_gateway_id=agent_gateway_id,
|
||||
name="TOOLS.md",
|
||||
config=config,
|
||||
backoff=backoff,
|
||||
)
|
||||
if not tools:
|
||||
return None
|
||||
values = _parse_tools_md(tools)
|
||||
token = values.get("AUTH_TOKEN")
|
||||
if not token:
|
||||
return None
|
||||
token = token.strip()
|
||||
return token or None
|
||||
|
||||
|
||||
async def _paused_board_ids(session: AsyncSession, board_ids: list[UUID]) -> set[UUID]:
|
||||
if not board_ids:
|
||||
return set()
|
||||
|
||||
commands = {"/pause", "/resume"}
|
||||
statement = (
|
||||
select(BoardMemory.board_id, BoardMemory.content)
|
||||
.where(col(BoardMemory.board_id).in_(board_ids))
|
||||
.where(col(BoardMemory.is_chat).is_(True))
|
||||
.where(func.lower(func.trim(col(BoardMemory.content))).in_(commands))
|
||||
.order_by(col(BoardMemory.board_id), col(BoardMemory.created_at).desc())
|
||||
# Postgres: DISTINCT ON (board_id) to get latest command per board.
|
||||
.distinct(col(BoardMemory.board_id))
|
||||
)
|
||||
|
||||
paused: set[UUID] = set()
|
||||
for board_id, content in await session.exec(statement):
|
||||
cmd = (content or "").strip().lower()
|
||||
if cmd == "/pause":
|
||||
paused.add(board_id)
|
||||
return paused
|
||||
|
||||
|
||||
def _append_sync_error(
|
||||
result: GatewayTemplatesSyncResult,
|
||||
*,
|
||||
message: str,
|
||||
agent: Agent | None = None,
|
||||
board: Board | None = None,
|
||||
) -> None:
|
||||
result.errors.append(
|
||||
GatewayTemplatesSyncError(
|
||||
agent_id=agent.id if agent else None,
|
||||
agent_name=agent.name if agent else None,
|
||||
board_id=board.id if board else None,
|
||||
message=message,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _rotate_agent_token(session: AsyncSession, agent: Agent) -> str:
|
||||
token = generate_agent_token()
|
||||
agent.agent_token_hash = hash_agent_token(token)
|
||||
agent.updated_at = utcnow()
|
||||
session.add(agent)
|
||||
await session.commit()
|
||||
await session.refresh(agent)
|
||||
return token
|
||||
|
||||
|
||||
async def _ping_gateway(ctx: _SyncContext, result: GatewayTemplatesSyncResult) -> bool:
|
||||
try:
|
||||
|
||||
async def _do_ping() -> object:
|
||||
# Use a lightweight health probe; avoid enumerating gateway agents.
|
||||
return await openclaw_call("health", config=ctx.config)
|
||||
|
||||
await ctx.backoff.run(_do_ping)
|
||||
except (TimeoutError, OpenClawGatewayError) as exc:
|
||||
_append_sync_error(result, message=str(exc))
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _base_result(
|
||||
gateway: Gateway,
|
||||
*,
|
||||
include_main: bool,
|
||||
reset_sessions: bool,
|
||||
) -> GatewayTemplatesSyncResult:
|
||||
return GatewayTemplatesSyncResult(
|
||||
gateway_id=gateway.id,
|
||||
include_main=include_main,
|
||||
reset_sessions=reset_sessions,
|
||||
agents_updated=0,
|
||||
agents_skipped=0,
|
||||
main_updated=False,
|
||||
)
|
||||
|
||||
|
||||
def _boards_by_id(
|
||||
boards: list[Board],
|
||||
*,
|
||||
board_id: UUID | None,
|
||||
) -> dict[UUID, Board] | None:
|
||||
boards_by_id = {board.id: board for board in boards}
|
||||
if board_id is None:
|
||||
return boards_by_id
|
||||
board = boards_by_id.get(board_id)
|
||||
if board is None:
|
||||
return None
|
||||
return {board_id: board}
|
||||
|
||||
|
||||
async def _resolve_agent_auth_token(
|
||||
ctx: _SyncContext,
|
||||
result: GatewayTemplatesSyncResult,
|
||||
agent: Agent,
|
||||
board: Board | None,
|
||||
*,
|
||||
agent_gateway_id: str,
|
||||
) -> tuple[str | None, bool]:
|
||||
try:
|
||||
auth_token = await _get_existing_auth_token(
|
||||
agent_gateway_id=agent_gateway_id,
|
||||
config=ctx.config,
|
||||
backoff=ctx.backoff,
|
||||
)
|
||||
except TimeoutError as exc:
|
||||
_append_sync_error(result, agent=agent, board=board, message=str(exc))
|
||||
return None, True
|
||||
|
||||
if not auth_token:
|
||||
if not ctx.options.rotate_tokens:
|
||||
result.agents_skipped += 1
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=agent,
|
||||
board=board,
|
||||
message=(
|
||||
"Skipping agent: unable to read AUTH_TOKEN from TOOLS.md "
|
||||
"(run with rotate_tokens=true to re-key)."
|
||||
),
|
||||
)
|
||||
return None, False
|
||||
auth_token = await _rotate_agent_token(ctx.session, agent)
|
||||
|
||||
if agent.agent_token_hash and not verify_agent_token(
|
||||
auth_token,
|
||||
agent.agent_token_hash,
|
||||
):
|
||||
if ctx.options.rotate_tokens:
|
||||
auth_token = await _rotate_agent_token(ctx.session, agent)
|
||||
else:
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=agent,
|
||||
board=board,
|
||||
message=(
|
||||
"Warning: AUTH_TOKEN in TOOLS.md does not match backend "
|
||||
"token hash (agent auth may be broken)."
|
||||
),
|
||||
)
|
||||
return auth_token, False
|
||||
|
||||
|
||||
async def _sync_one_agent(
|
||||
ctx: _SyncContext,
|
||||
result: GatewayTemplatesSyncResult,
|
||||
agent: Agent,
|
||||
board: Board,
|
||||
) -> bool:
|
||||
auth_token, fatal = await _resolve_agent_auth_token(
|
||||
ctx,
|
||||
result,
|
||||
agent,
|
||||
board,
|
||||
agent_gateway_id=_agent_key(agent),
|
||||
)
|
||||
if fatal:
|
||||
return True
|
||||
if not auth_token:
|
||||
return False
|
||||
try:
|
||||
|
||||
async def _do_provision() -> bool:
|
||||
await ctx.provisioner.apply_agent_lifecycle(
|
||||
agent=agent,
|
||||
gateway=ctx.gateway,
|
||||
board=board,
|
||||
auth_token=auth_token,
|
||||
user=ctx.options.user,
|
||||
action="update",
|
||||
force_bootstrap=ctx.options.force_bootstrap,
|
||||
reset_session=ctx.options.reset_sessions,
|
||||
wake=False,
|
||||
)
|
||||
return True
|
||||
|
||||
await _with_gateway_retry(_do_provision, backoff=ctx.backoff)
|
||||
result.agents_updated += 1
|
||||
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
|
||||
result.agents_skipped += 1
|
||||
_append_sync_error(result, agent=agent, board=board, message=str(exc))
|
||||
return True
|
||||
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
|
||||
result.agents_skipped += 1
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=agent,
|
||||
board=board,
|
||||
message=f"Failed to sync templates: {exc}",
|
||||
)
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
async def _sync_main_agent(
|
||||
ctx: _SyncContext,
|
||||
result: GatewayTemplatesSyncResult,
|
||||
) -> bool:
|
||||
main_agent = (
|
||||
await Agent.objects.all()
|
||||
.filter(col(Agent.gateway_id) == ctx.gateway.id)
|
||||
.filter(col(Agent.board_id).is_(None))
|
||||
.first(ctx.session)
|
||||
)
|
||||
if main_agent is None:
|
||||
_append_sync_error(
|
||||
result,
|
||||
message="Gateway agent record not found; " "skipping gateway agent template sync.",
|
||||
)
|
||||
return True
|
||||
main_gateway_agent_id = GatewayAgentIdentity.openclaw_agent_id(ctx.gateway)
|
||||
|
||||
token, fatal = await _resolve_agent_auth_token(
|
||||
ctx,
|
||||
result,
|
||||
main_agent,
|
||||
board=None,
|
||||
agent_gateway_id=main_gateway_agent_id,
|
||||
)
|
||||
if fatal:
|
||||
return True
|
||||
if not token:
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=main_agent,
|
||||
message="Skipping gateway agent: unable to read AUTH_TOKEN from TOOLS.md.",
|
||||
)
|
||||
return True
|
||||
stop_sync = False
|
||||
try:
|
||||
|
||||
async def _do_provision_main() -> bool:
|
||||
await ctx.provisioner.apply_agent_lifecycle(
|
||||
agent=main_agent,
|
||||
gateway=ctx.gateway,
|
||||
board=None,
|
||||
auth_token=token,
|
||||
user=ctx.options.user,
|
||||
action="update",
|
||||
force_bootstrap=ctx.options.force_bootstrap,
|
||||
reset_session=ctx.options.reset_sessions,
|
||||
wake=False,
|
||||
)
|
||||
return True
|
||||
|
||||
await _with_gateway_retry(_do_provision_main, backoff=ctx.backoff)
|
||||
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
|
||||
_append_sync_error(result, agent=main_agent, message=str(exc))
|
||||
stop_sync = True
|
||||
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
|
||||
_append_sync_error(
|
||||
result,
|
||||
agent=main_agent,
|
||||
message=f"Failed to sync gateway agent templates: {exc}",
|
||||
)
|
||||
else:
|
||||
result.main_updated = True
|
||||
return stop_sync
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class LeadAgentOptions:
|
||||
"""Optional overrides for board-lead provisioning behavior."""
|
||||
|
||||
agent_name: str | None = None
|
||||
identity_profile: dict[str, str] | None = None
|
||||
action: str = "provision"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class LeadAgentRequest:
|
||||
"""Inputs required to ensure or provision a board lead agent."""
|
||||
|
||||
board: Board
|
||||
gateway: Gateway
|
||||
config: GatewayClientConfig
|
||||
user: User | None
|
||||
options: LeadAgentOptions = field(default_factory=LeadAgentOptions)
|
||||
|
||||
Reference in New Issue
Block a user