agent_deply/pam_deploy_graph/agent.py

"""PAM 部署 Agent 运行时。

本模块不强依赖 LangGraph，可独立运行；同一组节点也可在
`pam_deploy_graph.graph` 中接入 LangGraph。
"""

from __future__ import annotations

import re
import time
from dataclasses import asdict
from pathlib import Path
from typing import Any, Callable

from .action_router import ActionRouter, build_action_backends
from .checkpoint_store import save_checkpoint
from .config_writer import write_config
from .constants import DEFAULT_PARAMS, GLOBAL_ACTION_SEQUENCE, IP_ACTION_SEQUENCE, REQUIRED_PARAMS
from .fake_runner import FakeActionRunner
from .llm import LlmClient, RuleBasedLlmClient, validate_deploy_plan, validate_intent_result, validate_mode_decision
from .mcp_runner import McpActionRunner
from .models import (
    ActionResult,
    AgentExecutionMode,
    AgentState,
    ExecutionStrategy,
    LlmDeployPlan,
    LlmIntentResult,
    LlmModeDecision,
    LlmParamResult,
    SkillPolicy,
)
from .models import ActionResult, AgentState, ExecutionStrategy, LlmActionAnalysis, LlmDeployPlan, LlmIntentResult, LlmParamResult
from .script_runner import ScriptActionRunner, select_script_entry
from .skill_policy import load_skill_policy
from .tool_catalog import normalize_planned_actions, tool_summaries

REQUIRED_ACTION_VALUES = {
    "upload-package": ("HASH_CODE",),
    "get-node-url": ("NODE_URL",),
}


class PamDeployAgent:
    """PAM 部署主 Agent，串联 LLM、action 路由、确认和续跑状态。"""

    def __init__(
        self,
        *,
        skill_path: str | Path = "doc_scripts/PAM_AUTO_DEPLY_SKILL.md",
        script_base_dir: str | Path = "doc_scripts",
        mcp_runner: McpActionRunner | None = None,
        fake_runner: FakeActionRunner | None = None,
        llm_client: LlmClient | None = None,
        action_analysis_enabled: bool = False,
        progress_callback: Callable[[dict[str, Any]], None] | None = None,
    ) -> None:
        """初始化策略、脚本 runner、MCP runner、fake runner 和 LLM client。"""
        self.skill_policy = load_skill_policy(skill_path)
        self.script_base_dir = Path(script_base_dir)
        self.script_runner = ScriptActionRunner(self.script_base_dir)
        self.fake_runner = fake_runner or FakeActionRunner()
        self.mcp_runner = mcp_runner
        self.llm_client = llm_client or RuleBasedLlmClient()
        self.action_analysis_enabled = action_analysis_enabled
        self.progress_callback = progress_callback
        self.router = ActionRouter(
            script_runner=self.script_runner,
            mcp_runner=mcp_runner,
            fake_runner=self.fake_runner,
        )

    def understand_request(self, text: str) -> LlmIntentResult:
        """调用 LLM 识别用户意图，并执行基础校验。"""
        result = self.llm_client.understand_request(text)
        validate_intent_result(result)
        return result

    def extract_params(self, text: str, base_params: dict[str, Any] | None = None) -> LlmParamResult:
        """从自然语言中抽取部署参数和控制参数。"""
        return self.llm_client.extract_params(text, base_params)

    def generate_plan(
        self,
        *,
        params: dict[str, Any],
        intent: str,
        strategy: ExecutionStrategy,
    ) -> LlmDeployPlan:
        """根据参数、意图和执行策略生成部署计划。"""
        plan = self.llm_client.generate_plan(
            params=params,
            intent=intent,
            strategy=strategy,
            skill_policy=self._skill_policy_payload(self.skill_policy),
            tool_summaries=self.tool_summaries(strategy),
        )
        validate_deploy_plan(plan)
        return plan

    def decide_execution_mode(
        self,
        *,
        text: str,
        params: dict[str, Any],
        intent: str,
        strategy: ExecutionStrategy,
    ) -> LlmModeDecision:
        """根据用户请求、skill 约束和工具能力决定执行模式。"""
        allowed_modes = list(self.skill_policy.allowed_execution_modes)
        decision = self.llm_client.decide_execution_mode(
            text=text,
            params=params,
            intent=intent,
            strategy=strategy,
            allowed_modes=allowed_modes,
            tool_summaries=self.tool_summaries(strategy),
        )
        validate_mode_decision(decision, allowed_modes)
        return decision

    def tool_summaries(self, strategy: ExecutionStrategy) -> list[dict[str, str]]:
        """返回当前 skill 和策略下允许暴露给 LLM 的 tool 摘要。"""
        return tool_summaries(self.skill_policy, strategy)

    def analyze_request(self, text: str, base_params: dict[str, Any] | None = None) -> dict[str, Any]:
        """完成意图识别、参数抽取和计划生成，供 analyze/chat 使用。"""
        intent = self.understand_request(text)
        params = self.extract_params(text, base_params)
        strategy = self._choose_strategy(intent.strategy_preference)
        merged_params = {**DEFAULT_PARAMS, **params.extracted_params}
        mode_decision = self.decide_execution_mode(
            text=text,
            params=merged_params,
            intent=intent.intent,
            strategy=strategy,
        )
        plan = self.generate_plan(
            params=merged_params,
            intent=intent.intent,
            strategy=strategy,
        )
        plan.planned_actions = normalize_planned_actions(
            plan.planned_actions,
            policy=self.skill_policy,
            mode=mode_decision.mode,
        )
        return {
            "intent": intent,
            "params": params,
            "mode_decision": mode_decision,
            "plan": plan,
        }

    def normalize_params(self, params: dict[str, Any]) -> dict[str, Any]:
        """合并默认参数并校验必填参数是否齐全。"""
        normalized = {**DEFAULT_PARAMS, **params}
        missing = [key for key in REQUIRED_PARAMS if not normalized.get(key)]
        if missing:
            raise ValueError(f"缺少必填参数: {', '.join(missing)}")
        normalized["ZIP_FILE_PATH"] = _normalize_local_file_path(str(normalized["ZIP_FILE_PATH"]).strip())
        return normalized

    def _choose_strategy(self, preference: str) -> ExecutionStrategy:
        """把 LLM 给出的策略偏好转换为内部执行策略。"""
        if preference in ("hybrid_node_mcp", "script_only", "fake"):
            return preference  # type: ignore[return-value]
        return "hybrid_node_mcp"

    def create_state(
        self,
        *,
        params: dict[str, Any],
        execution_strategy: ExecutionStrategy = "hybrid_node_mcp",
        execution_mode: AgentExecutionMode = "fixed_runtime",
        run_id: str | None = None,
        script_entry: str | None = None,
        config_path: str | None = None,
        trace_file_path: str | None = None,
        checkpoint_path: str | None = None,
        target_ips: list[str] | None = None,
        planned_actions: list[str] | None = None,
        mode_reason: str = "",
        mode_risk_level: str = "medium",
        mode_requires_confirmation: bool = True,
    ) -> AgentState:
        """创建一次运行所需的 AgentState，并写入脚本配置文件。"""
        normalized = self.normalize_params(params)
        actual_run_id = run_id or time.strftime("%Y%m%d_%H%M%S")
        actual_script_entry = script_entry or select_script_entry()
        runtime_dir = Path("runtime")
        actual_config_path = _absolute_path(config_path or runtime_dir / f"config_{actual_run_id}.txt")
        actual_trace_path = _absolute_path(trace_file_path or Path("logs") / f"api_trace_{actual_run_id}.log")
        write_config(normalized, actual_config_path)
        normalized_actions = normalize_planned_actions(
            planned_actions or [],
            policy=self.skill_policy,
            mode=execution_mode,
        )
        return AgentState(
            run_id=actual_run_id,
            params=normalized,
            execution_strategy=execution_strategy,
            action_backends=build_action_backends(execution_strategy),
            script_entry=actual_script_entry,
            script_base_dir=str(self.script_base_dir),
            config_path=str(actual_config_path),
            trace_file_path=str(actual_trace_path),
            checkpoint_path=checkpoint_path or "",
            target_ips=target_ips or [],
            execution_mode=execution_mode,
            planned_actions=normalized_actions,
            mode_reason=mode_reason,
            mode_risk_level=mode_risk_level,  # type: ignore[arg-type]
            mode_requires_confirmation=mode_requires_confirmation,
        )

    def pause_state(
        self,
        state: AgentState,
        *,
        reason: str,
        review_context: dict[str, Any] | None = None,
    ) -> AgentState:
        """将当前 state 标记为暂停，并持久化 checkpoint。"""
        state.paused = True
        state.pause_reason = reason
        state.review_context = dict(review_context or {})
        self._save_checkpoint(state)
        return state

    def resume_state(self, state: AgentState) -> AgentState:
        """清理暂停标记，允许后续继续执行。"""
        state.paused = False
        state.pause_reason = ""
        state.review_context = {}
        self._save_checkpoint(state)
        return state

    def update_state_params(self, state: AgentState, updates: dict[str, Any]) -> AgentState:
        """热更新 state 中的参数，并回写 config 文件。"""
        merged = {**state.params, **updates}
        normalized = self.normalize_params(merged)
        state.params = normalized
        if state.config_path:
            write_config(normalized, state.config_path)
        self._save_checkpoint(state)
        return state

    def preview(self, params: dict[str, Any], strategy: ExecutionStrategy = "hybrid_node_mcp") -> str:
        """渲染部署预览，展示参数和 action 路由。"""
        normalized = self.normalize_params(params)
        routes = build_action_backends(strategy)
        if strategy == "hybrid_node_mcp":
            home_backend = "脚本 action"
            node_backend = "MCP"
        elif strategy == "script_only":
            home_backend = "脚本 action"
            node_backend = "脚本 action"
        else:
            home_backend = "fake"
            node_backend = "fake"
        lines = [
            "## PAM 部署预览",
            "",
            f"- 执行模式: fixed_runtime",
            f"- 执行策略: {strategy}",
            f"- PAM_HOME: {home_backend}",
            f"- PAM_NODE: {node_backend}",
            f"- 机场: {normalized['AIRPORT_CODE']}",
            f"- 应用: {normalized['APP_NAME']}",
            f"- 模块: {normalized['MODULE_NAME']}",
            f"- 版本: {normalized['VERSION_NUMBER']}",
            "",
            "| action | backend |",
            "| --- | --- |",
        ]
        for action in GLOBAL_ACTION_SEQUENCE:
            lines.append(f"| `{action}` | `{routes[action]}` |")
        return "\n".join(lines)

    def run_global_flow(self, state: AgentState) -> AgentState:
        """执行全局部署阶段，并跳过 checkpoint 中已完成的步骤。"""
        if state.paused:
            self._save_checkpoint(state)
            return state
        while True:
            action = self.next_global_action(state)
            if action is None:
                return state
            self.run_global_action(state, action)

    def next_global_action(self, state: AgentState) -> str | None:
        """返回下一个未完成的全局 action。"""
        for action in self._planned_global_actions(state):
        if state.paused:
            return None
        for action in GLOBAL_ACTION_SEQUENCE:
            if action in state.completed_global_steps:
                continue
            return action
        return None

    def run_global_action(self, state: AgentState, action: str) -> AgentState:
        """执行一个全局 action，并把结果写回 AgentState。"""
        if action in state.completed_global_steps:
            return state
        kwargs: dict[str, Any] = {}
        if action == "publish-version":
            if not state.hash_code:
                state.last_failed_step = action
                self._save_checkpoint(state)
                raise RuntimeError("publish-version 缺少 HASH_CODE，请确认 upload-package 是否成功返回 HASH_CODE")
            kwargs["hash_code"] = state.hash_code
        backend = state.action_backends.get(action, "script")
        self._emit_progress({"type": "ACTION_START", "stage": action, "backend": backend})
        try:
            result = self.router.run_action(state, action, **kwargs)
        except Exception as exc:
            result = ActionResult(
                action=action,
                backend=backend,
                ok=False,
                error_summary=str(exc),
            )
        state.events.append(
            {
                "type": "ACTION_DONE" if result.ok else "ACTION_FAIL",
                "stage": action,
                "backend": result.backend,
                "message": result.error_summary or "ok",
            }
        )
        analysis = self._append_action_analysis(state, action, result)
        if not result.ok:
            self._emit_progress(
                {
                    "type": "ACTION_FAIL",
                    "stage": action,
                    "backend": result.backend,
                    "message": result.error_summary or "action 执行失败",
                }
            )
            state.last_failed_step = action
            self.pause_state(
                state,
                reason="action_failed",
                review_context=self._review_context(action=action, analysis=analysis, result=result),
            )
            self._save_checkpoint(state)
            raise RuntimeError(f"{action} 执行失败: {result.error_summary}")
        missing_values = self._missing_required_values(action, result.values)
        if missing_values:
            message = f"{action} 返回缺少必要字段: {', '.join(missing_values)}"
            self._emit_progress(
                {
                    "type": "ACTION_FAIL",
                    "stage": action,
                    "backend": result.backend,
                    "message": message,
                }
            )
            state.last_failed_step = action
            self.pause_state(
                state,
                reason="action_missing_required_values",
                review_context={
                    "type": "action_review",
                    "stage": action,
                    "message": message,
                    "missing_values": missing_values,
                },
            )
            self._save_checkpoint(state)
            raise RuntimeError(message)
        self._apply_result(state, action, result.values)
        state.completed_global_steps.append(action)
        state.last_success_step = action
        self._emit_progress(
            {
                "type": "ACTION_DONE",
                "stage": action,
                "backend": result.backend,
                "message": result.values.get("MESSAGE", "ok"),
            }
        )
        if analysis is not None and not analysis.should_continue:
            state.last_failed_step = action
            self.pause_state(
                state,
                reason="llm_review_blocked",
                review_context=self._review_context(action=action, analysis=analysis, result=result),
            )
            return state
        self._save_checkpoint(state)
        return state

    def _missing_required_values(self, action: str, values: dict[str, Any]) -> list[str]:
        """检查 action 成功返回后是否带回后续步骤必需字段。"""
        required = REQUIRED_ACTION_VALUES.get(action, ())
        return [key for key in required if not values.get(key)]

    def run_deploy_flow(self, state: AgentState) -> AgentState:
        """执行完整部署流程：全局阶段后进入逐 IP 阶段。"""
        if state.pending_confirmation or state.paused:
            self._save_checkpoint(state)
            return state
        self.run_global_flow(state)
        self.run_ip_flow(state)
        return state

    def run_ip_flow(self, state: AgentState) -> AgentState:
        """执行逐 IP 部署流程，失败时停在人工确认点。"""
        if state.paused:
            self._save_checkpoint(state)
            return state
        while True:
            work = self.next_ip_action(state)
            if work is None:
                return state
            ip, action = work
            self.run_ip_action(state, ip, action)

    def next_ip_action(self, state: AgentState) -> tuple[str, str] | None:
        """返回下一个待执行的单 IP action，并按需初始化 IP 状态。"""
        if state.pending_confirmation or state.paused:
            self._save_checkpoint(state)
            return None
        planned_ip_actions = self._planned_ip_actions(state)
        if not planned_ip_actions:
            return None
        self._resolve_target_ips(state)
        for ip in state.target_ips:
            ip_state = state.ip_states.get(ip)
            if ip_state and ip_state.get("status") == "SUCCESS":
                continue
            if ip_state and ip_state.get("status") == "FAILED":
                if ip_state.get("rollback_status") == "PENDING_AGENT_CONFIRMATION":
                    state.pending_confirmation = f"rollback-ip:{ip}"
                    self._save_checkpoint(state)
                    return None
                continue
            if not ip_state:
                state.events.append({"type": "IP_START", "ip": ip, "message": "start"})
                ip_state = {
                    "status": "RUNNING",
                    "completed_steps": [],
                    "failed_stage": "",
                    "failure_reason": "",
                    "rollback_status": "ROLLBACK_NOT_RUN",
                    "rollback_stop_first": False,
                    "log_file": "",
                }
                state.ip_states[ip] = ip_state

            completed_steps = ip_state.setdefault("completed_steps", [])
            for action in planned_ip_actions:
                if action not in completed_steps:
                    return ip, action

            ip_state["status"] = "SUCCESS"
            state.events.append({"type": "IP_DONE", "ip": ip, "message": "success"})
            self._save_checkpoint(state)
        return None

    def run_ip_action(self, state: AgentState, ip: str, action: str) -> AgentState:
        """执行一个单 IP action，并在失败时设置人工确认点。"""
        ip_state = state.ip_states[ip]
        completed_steps = ip_state.setdefault("completed_steps", [])
        if action in completed_steps:
            return state
        self._emit_progress(
            {
                "type": "ACTION_START",
                "stage": action,
                "backend": state.action_backends.get(action, ""),
                "ip": ip,
            }
        )
        backend = state.action_backends.get(action, "script")
        try:
            result = self.router.run_action(state, action, ip=ip)
        except Exception as exc:
            result = ActionResult(
                action=action,
                backend=backend,
                ok=False,
                error_summary=str(exc),
            )
        failed = (not result.ok) or self._business_failed(action, result.values)
        state.events.append(
            {
                "type": "ACTION_FAIL" if failed else "ACTION_DONE",
                "stage": action,
                "backend": result.backend,
                "ip": ip,
                "message": result.error_summary or result.values.get("MESSAGE", "ok"),
            }
        )
        analysis = self._append_action_analysis(state, action, result, ip=ip)

        if failed:
            self._emit_progress(
                {
                    "type": "ACTION_FAIL",
                    "stage": action,
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.error_summary or result.values.get("MESSAGE", "action 执行失败"),
                }
            )
            self.pause_state(
                state,
                reason="action_failed",
                review_context=self._review_context(action=action, analysis=analysis, result=result, ip=ip),
            )
            self._record_ip_failure(state, ip, action, result.error_summary or str(result.values))
            if action != "download-log":
                self._download_log_best_effort(state, ip)
            state.pending_confirmation = f"rollback-ip:{ip}"
            self._save_checkpoint(state)
            return state

        self._apply_ip_result(ip_state, action, result.values)
        completed_steps.append(action)
        self._emit_progress(
            {
                "type": "ACTION_DONE",
                "stage": action,
                "backend": result.backend,
                "ip": ip,
                "message": result.values.get("MESSAGE", "ok"),
            }
        )
        if analysis is not None and not analysis.should_continue:
            self.pause_state(
                state,
                reason="llm_review_blocked",
                review_context=self._review_context(action=action, analysis=analysis, result=result, ip=ip),
            )
            return state
        self._save_checkpoint(state)
        return state

    def build_confirmation_request(self, state: AgentState) -> dict[str, Any]:
        """把 pending_confirmation 转换为面向用户的确认请求。"""
        if not state.pending_confirmation:
            return {}
        kind, _, value = state.pending_confirmation.partition(":")
        if kind == "rollback-ip":
            ip_state = state.ip_states.get(value, {})
            return {
                "type": "rollback-ip",
                "ip": value,
                "failed_stage": ip_state.get("failed_stage", ""),
                "failure_reason": ip_state.get("failure_reason", ""),
                "rollback_stop_first": bool(ip_state.get("rollback_stop_first", False)),
                "allowed_decisions": ["approve", "reject"],
            }
        return {
            "type": kind,
            "value": value,
            "allowed_decisions": ["approve", "reject"],
        }

    def confirm_pending(self, state: AgentState, *, approved: bool, operator_note: str = "") -> AgentState:
        """处理人工确认结果；当前支持失败 IP 的回滚确认。"""
        request = self.build_confirmation_request(state)
        if not request:
            raise ValueError("当前没有待确认事项")
        if request["type"] != "rollback-ip":
            raise ValueError(f"不支持的确认类型: {request['type']}")

        ip = request["ip"]
        ip_state = state.ip_states[ip]
        if not approved:
            ip_state["rollback_status"] = "REJECTED_BY_OPERATOR"
            state.events.append(
                {
                    "type": "CONFIRMATION_REJECTED",
                    "stage": "rollback-ip",
                    "ip": ip,
                    "message": operator_note or "rollback rejected by operator",
                }
            )
            state.pending_confirmation = ""
            state.paused = False
            state.pause_reason = ""
            state.review_context = {}
            self._save_checkpoint(state)
            return state

        backend = state.action_backends.get("rollback-ip", "script")
        self._emit_progress(
            {
                "type": "ACTION_START",
                "stage": "rollback-ip",
                "backend": backend,
                "ip": ip,
            }
        )
        try:
            result = self.router.run_action(
                state,
                "rollback-ip",
                ip=ip,
                stop_first=bool(ip_state.get("rollback_stop_first", False)),
            )
        except Exception as exc:
            result = ActionResult(
                action="rollback-ip",
                backend=backend,
                ok=False,
                error_summary=str(exc),
            )
        ip_state["rollback_status"] = "ROLLBACK_DONE" if result.ok else "ROLLBACK_FAILED"
        state.events.append(
            {
                "type": "ACTION_DONE" if result.ok else "ACTION_FAIL",
                "stage": "rollback-ip",
                "backend": result.backend,
                "ip": ip,
                "message": result.error_summary or result.values.get("MESSAGE", "ok"),
            }
        )
        self._append_action_analysis(state, "rollback-ip", result, ip=ip)
        if result.ok:
            state.pending_confirmation = ""
            state.last_success_step = "rollback-ip"
            state.last_failed_step = ""
            state.paused = False
            state.pause_reason = ""
            state.review_context = {}
            self._emit_progress(
                {
                    "type": "ACTION_DONE",
                    "stage": "rollback-ip",
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.values.get("MESSAGE", "ok"),
                }
            )
        else:
            state.pending_confirmation = f"rollback-ip:{ip}"
            state.last_failed_step = "rollback-ip"
            state.paused = True
            state.pause_reason = "rollback_failed"
            self._emit_progress(
                {
                    "type": "ACTION_FAIL",
                    "stage": "rollback-ip",
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.error_summary or result.values.get("MESSAGE", "rollback 执行失败"),
                }
            )
        self._save_checkpoint(state)
        return state

    def _emit_progress(self, payload: dict[str, Any]) -> None:
        """向 CLI/chat 回调 action 执行进度，回调失败不影响主流程。"""
        if self.progress_callback is None:
            return
        try:
            self.progress_callback(payload)
        except Exception:
            return

    def _apply_result(self, state: AgentState, action: str, values: dict[str, Any]) -> None:
        """把全局 action 返回值写回 AgentState。"""
        if "HASH_CODE" in values:
            state.hash_code = str(values["HASH_CODE"])
        if "NODE_URL" in values:
            state.node_url = str(values["NODE_URL"])
        if action == "get-online-ips":
            ips = values.get("IP", [])
            if isinstance(ips, str):
                ips = [ips]
            state.online_ips = list(ips)
            state.target_ips = state.target_ips or state.online_ips.copy()

    def _resolve_target_ips(self, state: AgentState) -> None:
        """根据在线 IP 和用户指定 IP 计算最终目标 IP。"""
        if not state.target_ips:
            state.target_ips = state.online_ips.copy()
            return
        online = set(state.online_ips)
        requested = state.target_ips
        state.target_ips = [ip for ip in requested if ip in online]
        missing = [ip for ip in requested if ip not in online]
        if missing:
            state.events.append(
                {
                    "type": "TARGET_SCOPE_CHANGED",
                    "message": "some requested IPs are not online",
                    "missing_ips": missing,
                    "target_ips": state.target_ips,
                }
            )

    def _business_failed(self, action: str, values: dict[str, Any]) -> bool:
        """识别 exit code 之外的业务失败条件。"""
        if action == "verify-ip":
            success = values.get("SUCCESS")
            if success is None:
                return False
            return str(success).lower() not in ("true", "1", "yes")
        return False

    def _apply_ip_result(self, ip_state: dict[str, Any], action: str, values: dict[str, Any]) -> None:
        """把逐 IP action 返回值写回单 IP 状态。"""
        if action == "download-log":
            ip_state["log_file"] = str(values.get("LOG_FILE", ""))

    def _record_ip_failure(self, state: AgentState, ip: str, action: str, reason: str) -> None:
        """记录单 IP 失败，并设置待回滚确认状态。"""
        ip_state = state.ip_states[ip]
        stop_first = action in ("start-ip", "verify-ip")
        ip_state.update(
            {
                "status": "FAILED",
                "failed_stage": action,
                "failure_reason": reason,
                "rollback_status": "PENDING_AGENT_CONFIRMATION",
                "rollback_stop_first": stop_first,
            }
        )
        state.last_failed_step = action
        state.events.append(
            {
                    "type": "CONFIRMATION_REQUIRED",
                    "stage": "rollback-ip",
                    "ip": ip,
                    "stop_first": stop_first,
                    "message": f"{action} 执行失败，需要确认是否回滚",
                }
            )

    def _download_log_best_effort(self, state: AgentState, ip: str) -> None:
        """失败后尽力下载日志，日志失败不覆盖原失败原因。"""
        backend = state.action_backends.get("download-log", "script")
        self._emit_progress(
            {
                "type": "ACTION_START",
                "stage": "download-log",
                "backend": backend,
                "ip": ip,
                "message": "失败后尝试下载日志",
            }
        )
        try:
            result = self.router.run_action(state, "download-log", ip=ip)
        except Exception as exc:
            result = ActionResult(
                action="download-log",
                backend=backend,
                ok=False,
                error_summary=str(exc),
            )
        ip_state = state.ip_states[ip]
        if result.ok:
            ip_state["log_file"] = str(result.values.get("LOG_FILE", ""))
            state.events.append(
                {
                    "type": "ACTION_DONE",
                    "stage": "download-log",
                    "backend": result.backend,
                    "ip": ip,
                    "message": "已尽力下载日志",
                }
            )
            self._emit_progress(
                {
                    "type": "ACTION_DONE",
                    "stage": "download-log",
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.values.get("MESSAGE", "已尽力下载日志"),
                }
            )
        else:
            state.events.append(
                {
                    "type": "ACTION_FAIL",
                    "stage": "download-log",
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.error_summary or "尽力下载日志失败",
                }
            )
            self._emit_progress(
                {
                    "type": "ACTION_FAIL",
                    "stage": "download-log",
                    "backend": result.backend,
                    "ip": ip,
                    "message": result.error_summary or "尽力下载日志失败",
                }
            )
        self._append_action_analysis(state, "download-log", result, ip=ip)

    def _save_checkpoint(self, state: AgentState) -> None:
        """如果配置了 checkpoint 路径，则保存完整运行状态。"""
        if state.checkpoint_path:
            save_checkpoint(state, state.checkpoint_path, redact=False)

    def _append_action_analysis(
        self,
        state: AgentState,
        action: str,
        result,
        *,
        ip: str | None = None,
    ) -> Any:
        """启用 action 后分析时，把诊断结果追加到 events。"""
        self._emit_progress(
            {
                "type": "ACTION_REVIEW_START",
                "stage": action,
                "ip": ip or "",
                "message": "LLM 开始分析 action 结果",
            }
        )
        try:
            analysis = self.llm_client.analyze_action_result(
                action=action,
                result=result,
                state_summary=self._state_summary_for_llm(state, ip=ip),
            )
        except Exception as exc:  # pragma: no cover - 审核失败时也要显式暂停，避免黑盒继续执行
            state.events.append(
                {
                    "type": "ACTION_ANALYSIS_FAIL",
                    "stage": action,
                    "ip": ip or "",
                    "message": str(exc),
                }
            )
            self._emit_progress(
                {
                    "type": "ACTION_REVIEW_FAIL",
                    "stage": action,
                    "ip": ip or "",
                    "message": str(exc),
                }
            )
            return LlmActionAnalysis(
                action=action,
                has_anomaly=True,
                severity="high",
                possible_reason=f"LLM 审核失败: {exc}",
                suggested_action="请检查 LLM 配置、网络或 action 审核提示词文件后再继续。",
                requires_confirmation=True,
                should_continue=False,
                notes=["action 结果未完成 LLM 审核，流程已自动暂停。"],
            )
        payload = asdict(analysis)
        payload.update({"type": "ACTION_ANALYSIS", "stage": action})
        if ip:
            payload["ip"] = ip
        if self.action_analysis_enabled:
            state.events.append(payload)
        self._emit_progress(
            {
                "type": "ACTION_REVIEW_DONE",
                "stage": action,
                "ip": ip or "",
                "message": analysis.suggested_action or analysis.possible_reason or "LLM 审核完成",
                "has_anomaly": analysis.has_anomaly,
                "severity": analysis.severity,
                "should_continue": analysis.should_continue,
            }
        )
        return analysis

    def _state_summary_for_llm(self, state: AgentState, *, ip: str | None = None) -> dict[str, Any]:
        """生成给 LLM action 分析使用的脱敏状态摘要。"""
        return {
            "run_id": state.run_id,
            "execution_mode": state.execution_mode,
            "execution_strategy": state.execution_strategy,
            "planned_actions": state.planned_actions,
            "completed_global_steps": state.completed_global_steps,
            "online_ip_count": len(state.online_ips),
            "target_ips": state.target_ips,
            "current_ip": ip or "",
            "current_ip_state": state.ip_states.get(ip, {}) if ip else {},
            "pending_confirmation": state.pending_confirmation,
            "paused": state.paused,
            "pause_reason": state.pause_reason,
            "last_success_step": state.last_success_step,
            "last_failed_step": state.last_failed_step,
        }

    def _review_context(
        self,
        *,
        action: str,
        analysis,
        result,
        ip: str | None = None,
    ) -> dict[str, Any]:
        """构造面向用户展示的审核暂停上下文。"""
        context = {
            "type": "action_review",
            "stage": action,
            "ip": ip or "",
            "backend": result.backend,
            "ok": result.ok,
            "error_summary": result.error_summary,
        }
        if analysis is not None:
            context.update(
                {
                    "severity": analysis.severity,
                    "has_anomaly": analysis.has_anomaly,
                    "possible_reason": analysis.possible_reason,
                    "suggested_action": analysis.suggested_action,
                    "should_continue": analysis.should_continue,
                    "notes": list(analysis.notes),
                }
            )
        return context

    def render_report(self, state: AgentState) -> str:
        """渲染当前部署状态报告。"""
        success = sum(1 for item in state.ip_states.values() if item.get("status") == "SUCCESS")
        failed = sum(1 for item in state.ip_states.values() if item.get("status") == "FAILED")
        lines = [
            "## PAM 智能部署报告",
            "",
            f"- 执行模式: {state.execution_mode}",
            f"- 执行策略: {state.execution_strategy}",
            f"- 模式原因: {state.mode_reason or '-'}",
            f"- 机场: {state.params['AIRPORT_CODE']}",
            f"- 应用: {state.params['APP_NAME']}",
            f"- 模块: {state.params['MODULE_NAME']}",
            f"- 版本: {state.params['VERSION_NUMBER']}",
            f"- 在线工作站数: {len(state.online_ips)}",
            f"- 目标工作站数: {len(state.target_ips)}",
            f"- 成功: {success}",
            f"- 失败: {failed}",
            f"- 待确认: {state.pending_confirmation or '-'}",
            f"- 计划动作: {', '.join(state.planned_actions) if state.planned_actions else '-'}",
            f"- 暂停状态: {'是' if state.paused else '否'}",
            f"- 暂停原因: {state.pause_reason or '-'}",
            "",
            "| IP | 状态 | 失败阶段 | 回滚状态 | 日志 |",
            "| --- | --- | --- | --- | --- |",
        ]
        for ip, ip_state in state.ip_states.items():
            lines.append(
                "| {ip} | {status} | {failed_stage} | {rollback_status} | {log_file} |".format(
                    ip=ip,
                    status=ip_state.get("status", ""),
                    failed_stage=ip_state.get("failed_stage") or "-",
                    rollback_status=ip_state.get("rollback_status") or "-",
                    log_file=ip_state.get("log_file") or "-",
                )
            )
        return "\n".join(lines)

    def _planned_global_actions(self, state: AgentState) -> list[str]:
        """返回当前 state 下应执行的全局 action 列表。"""
        if state.planned_actions:
            return [action for action in state.planned_actions if action in GLOBAL_ACTION_SEQUENCE]
        return list(GLOBAL_ACTION_SEQUENCE)

    def _planned_ip_actions(self, state: AgentState) -> list[str]:
        """返回当前 state 下应执行的逐 IP action 列表。"""
        if state.planned_actions:
            return [action for action in state.planned_actions if action in IP_ACTION_SEQUENCE]
        return list(IP_ACTION_SEQUENCE)

    def _skill_policy_payload(self, policy: SkillPolicy) -> dict[str, Any]:
        """把 SkillPolicy 转成可发送给 LLM 的简化 JSON。"""
        return {
            "name": policy.name,
            "description": policy.description,
            "allowed_execution_modes": list(policy.allowed_execution_modes),
            "allowed_modes": list(policy.allowed_modes),
            "allowed_actions": list(policy.allowed_actions),
            "required_confirmations": list(policy.required_confirmations),
            "required_params": list(policy.required_params),
            "action_sequence": list(policy.action_sequence),
            "ip_action_sequence": list(policy.ip_action_sequence),
            "forbidden_actions": list(policy.forbidden_actions),
        }


def _absolute_path(path: str | Path) -> Path:
    """把传给脚本的文件路径转换为绝对路径，避免 cwd 切换后读错文件。"""
    return Path(path).expanduser().resolve()


def _normalize_local_file_path(path: str) -> str:
    """把本地相对文件路径转换为绝对路径，Windows/Unix 绝对路径保持不变。"""
    if re.match(r"^[A-Za-z]:[\\/]", path):
        return path
    if path.startswith("/"):
        return path
    value = Path(path).expanduser()
    if value.is_absolute():
        return str(value)
    return str(value.resolve())