"""PAM deploy Agent runtime. This is intentionally runnable without langgraph installed. The same nodes can be wired into LangGraph later via pam_deploy_graph.graph. """ from __future__ import annotations import time from pathlib import Path from typing import Any from .action_router import ActionRouter, build_action_backends from .checkpoint_store import save_checkpoint from .config_writer import write_config from .constants import DEFAULT_PARAMS, GLOBAL_ACTION_SEQUENCE, IP_ACTION_SEQUENCE, REQUIRED_PARAMS from .fake_runner import FakeActionRunner from .llm import LlmClient, RuleBasedLlmClient, validate_deploy_plan, validate_intent_result from .mcp_runner import McpActionRunner from .models import AgentState, ExecutionStrategy, LlmDeployPlan, LlmIntentResult, LlmParamResult from .script_runner import ScriptActionRunner, select_script_entry from .skill_policy import load_skill_policy class PamDeployAgent: def __init__( self, *, skill_path: str | Path = "doc_scripts/PAM_AUTO_DEPLY_SKILL.md", script_base_dir: str | Path = "doc_scripts", mcp_runner: McpActionRunner | None = None, fake_runner: FakeActionRunner | None = None, llm_client: LlmClient | None = None, ) -> None: self.skill_policy = load_skill_policy(skill_path) self.script_base_dir = Path(script_base_dir) self.script_runner = ScriptActionRunner(self.script_base_dir) self.fake_runner = fake_runner or FakeActionRunner() self.mcp_runner = mcp_runner self.llm_client = llm_client or RuleBasedLlmClient() self.router = ActionRouter( script_runner=self.script_runner, mcp_runner=mcp_runner, fake_runner=self.fake_runner, ) def understand_request(self, text: str) -> LlmIntentResult: result = self.llm_client.understand_request(text) validate_intent_result(result) return result def extract_params(self, text: str, base_params: dict[str, Any] | None = None) -> LlmParamResult: return self.llm_client.extract_params(text, base_params) def generate_plan( self, *, params: dict[str, Any], intent: str, strategy: ExecutionStrategy, ) -> LlmDeployPlan: plan = self.llm_client.generate_plan(params=params, intent=intent, strategy=strategy) validate_deploy_plan(plan) return plan def analyze_request(self, text: str, base_params: dict[str, Any] | None = None) -> dict[str, Any]: intent = self.understand_request(text) params = self.extract_params(text, base_params) strategy = self._choose_strategy(intent.strategy_preference) plan = self.generate_plan( params={**DEFAULT_PARAMS, **params.extracted_params}, intent=intent.intent, strategy=strategy, ) return { "intent": intent, "params": params, "plan": plan, } def normalize_params(self, params: dict[str, Any]) -> dict[str, Any]: normalized = {**DEFAULT_PARAMS, **params} missing = [key for key in REQUIRED_PARAMS if not normalized.get(key)] if missing: raise ValueError(f"Missing required params: {', '.join(missing)}") return normalized def _choose_strategy(self, preference: str) -> ExecutionStrategy: if preference in ("hybrid_node_mcp", "script_only", "fake"): return preference # type: ignore[return-value] return "hybrid_node_mcp" def create_state( self, *, params: dict[str, Any], execution_strategy: ExecutionStrategy = "hybrid_node_mcp", run_id: str | None = None, script_entry: str | None = None, config_path: str | None = None, trace_file_path: str | None = None, checkpoint_path: str | None = None, target_ips: list[str] | None = None, ) -> AgentState: normalized = self.normalize_params(params) actual_run_id = run_id or time.strftime("%Y%m%d_%H%M%S") actual_script_entry = script_entry or select_script_entry() runtime_dir = Path("runtime") actual_config_path = config_path or str(runtime_dir / f"config_{actual_run_id}.txt") actual_trace_path = trace_file_path or str(Path("logs") / f"api_trace_{actual_run_id}.log") write_config(normalized, actual_config_path) return AgentState( run_id=actual_run_id, params=normalized, execution_strategy=execution_strategy, action_backends=build_action_backends(execution_strategy), script_entry=actual_script_entry, script_base_dir=str(self.script_base_dir), config_path=actual_config_path, trace_file_path=actual_trace_path, checkpoint_path=checkpoint_path or "", target_ips=target_ips or [], ) def preview(self, params: dict[str, Any], strategy: ExecutionStrategy = "hybrid_node_mcp") -> str: normalized = self.normalize_params(params) routes = build_action_backends(strategy) if strategy == "hybrid_node_mcp": home_backend = "脚本 action" node_backend = "MCP" elif strategy == "script_only": home_backend = "脚本 action" node_backend = "脚本 action" else: home_backend = "fake" node_backend = "fake" lines = [ "## PAM 部署预演", "", f"- 执行策略: {strategy}", f"- PAM_HOME: {home_backend}", f"- PAM_NODE: {node_backend}", f"- 机场: {normalized['AIRPORT_CODE']}", f"- 应用: {normalized['APP_NAME']}", f"- 模块: {normalized['MODULE_NAME']}", f"- 版本: {normalized['VERSION_NUMBER']}", "", "| action | backend |", "| --- | --- |", ] for action in GLOBAL_ACTION_SEQUENCE: lines.append(f"| `{action}` | `{routes[action]}` |") return "\n".join(lines) def run_global_flow(self, state: AgentState) -> AgentState: for action in GLOBAL_ACTION_SEQUENCE: if action in state.completed_global_steps: continue kwargs: dict[str, Any] = {} if action == "publish-version": kwargs["hash_code"] = state.hash_code result = self.router.run_action(state, action, **kwargs) state.events.append( { "type": "ACTION_DONE" if result.ok else "ACTION_FAIL", "stage": action, "backend": result.backend, "message": result.error_summary or "ok", } ) if not result.ok: state.last_failed_step = action self._save_checkpoint(state) raise RuntimeError(f"{action} failed: {result.error_summary}") self._apply_result(state, action, result.values) state.completed_global_steps.append(action) state.last_success_step = action self._save_checkpoint(state) return state def run_deploy_flow(self, state: AgentState) -> AgentState: if state.pending_confirmation: self._save_checkpoint(state) return state self.run_global_flow(state) self.run_ip_flow(state) return state def run_ip_flow(self, state: AgentState) -> AgentState: if state.pending_confirmation: self._save_checkpoint(state) return state self._resolve_target_ips(state) for ip in state.target_ips: ip_state = state.ip_states.get(ip) if ip_state and ip_state.get("status") == "SUCCESS": continue if ip_state and ip_state.get("status") == "FAILED": if ip_state.get("rollback_status") == "PENDING_AGENT_CONFIRMATION": state.pending_confirmation = f"rollback-ip:{ip}" self._save_checkpoint(state) return state continue if not ip_state: state.events.append({"type": "IP_START", "ip": ip, "message": "start"}) ip_state = { "status": "RUNNING", "completed_steps": [], "failed_stage": "", "failure_reason": "", "rollback_status": "ROLLBACK_NOT_RUN", "rollback_stop_first": False, "log_file": "", } state.ip_states[ip] = ip_state for action in IP_ACTION_SEQUENCE: completed_steps = ip_state.setdefault("completed_steps", []) if action in completed_steps: continue result = self.router.run_action(state, action, ip=ip) failed = (not result.ok) or self._business_failed(action, result.values) state.events.append( { "type": "ACTION_FAIL" if failed else "ACTION_DONE", "stage": action, "backend": result.backend, "ip": ip, "message": result.error_summary or result.values.get("MESSAGE", "ok"), } ) if failed: self._record_ip_failure(state, ip, action, result.error_summary or str(result.values)) if action != "download-log": self._download_log_best_effort(state, ip) state.pending_confirmation = f"rollback-ip:{ip}" self._save_checkpoint(state) return state self._apply_ip_result(ip_state, action, result.values) completed_steps.append(action) self._save_checkpoint(state) ip_state["status"] = "SUCCESS" state.events.append({"type": "IP_DONE", "ip": ip, "message": "success"}) self._save_checkpoint(state) return state def build_confirmation_request(self, state: AgentState) -> dict[str, Any]: if not state.pending_confirmation: return {} kind, _, value = state.pending_confirmation.partition(":") if kind == "rollback-ip": ip_state = state.ip_states.get(value, {}) return { "type": "rollback-ip", "ip": value, "failed_stage": ip_state.get("failed_stage", ""), "failure_reason": ip_state.get("failure_reason", ""), "rollback_stop_first": bool(ip_state.get("rollback_stop_first", False)), "allowed_decisions": ["approve", "reject"], } return { "type": kind, "value": value, "allowed_decisions": ["approve", "reject"], } def confirm_pending(self, state: AgentState, *, approved: bool, operator_note: str = "") -> AgentState: request = self.build_confirmation_request(state) if not request: raise ValueError("No pending confirmation") if request["type"] != "rollback-ip": raise ValueError(f"Unsupported confirmation type: {request['type']}") ip = request["ip"] ip_state = state.ip_states[ip] if not approved: ip_state["rollback_status"] = "REJECTED_BY_OPERATOR" state.events.append( { "type": "CONFIRMATION_REJECTED", "stage": "rollback-ip", "ip": ip, "message": operator_note or "rollback rejected by operator", } ) state.pending_confirmation = "" self._save_checkpoint(state) return state result = self.router.run_action( state, "rollback-ip", ip=ip, stop_first=bool(ip_state.get("rollback_stop_first", False)), ) ip_state["rollback_status"] = "ROLLBACK_DONE" if result.ok else "ROLLBACK_FAILED" state.events.append( { "type": "ACTION_DONE" if result.ok else "ACTION_FAIL", "stage": "rollback-ip", "backend": result.backend, "ip": ip, "message": result.error_summary or result.values.get("MESSAGE", "ok"), } ) if result.ok: state.pending_confirmation = "" state.last_success_step = "rollback-ip" state.last_failed_step = "" else: state.pending_confirmation = f"rollback-ip:{ip}" state.last_failed_step = "rollback-ip" self._save_checkpoint(state) return state def _apply_result(self, state: AgentState, action: str, values: dict[str, Any]) -> None: if "HASH_CODE" in values: state.hash_code = str(values["HASH_CODE"]) if "NODE_URL" in values: state.node_url = str(values["NODE_URL"]) if action == "get-online-ips": ips = values.get("IP", []) if isinstance(ips, str): ips = [ips] state.online_ips = list(ips) state.target_ips = state.target_ips or state.online_ips.copy() def _resolve_target_ips(self, state: AgentState) -> None: if not state.target_ips: state.target_ips = state.online_ips.copy() return online = set(state.online_ips) requested = state.target_ips state.target_ips = [ip for ip in requested if ip in online] missing = [ip for ip in requested if ip not in online] if missing: state.events.append( { "type": "TARGET_SCOPE_CHANGED", "message": "some requested IPs are not online", "missing_ips": missing, "target_ips": state.target_ips, } ) def _business_failed(self, action: str, values: dict[str, Any]) -> bool: if action == "verify-ip": success = values.get("SUCCESS") if success is None: return False return str(success).lower() not in ("true", "1", "yes") return False def _apply_ip_result(self, ip_state: dict[str, Any], action: str, values: dict[str, Any]) -> None: if action == "download-log": ip_state["log_file"] = str(values.get("LOG_FILE", "")) def _record_ip_failure(self, state: AgentState, ip: str, action: str, reason: str) -> None: ip_state = state.ip_states[ip] stop_first = action in ("start-ip", "verify-ip") ip_state.update( { "status": "FAILED", "failed_stage": action, "failure_reason": reason, "rollback_status": "PENDING_AGENT_CONFIRMATION", "rollback_stop_first": stop_first, } ) state.last_failed_step = action state.events.append( { "type": "CONFIRMATION_REQUIRED", "stage": "rollback-ip", "ip": ip, "stop_first": stop_first, "message": f"{action} failed; rollback confirmation required", } ) def _download_log_best_effort(self, state: AgentState, ip: str) -> None: result = self.router.run_action(state, "download-log", ip=ip) ip_state = state.ip_states[ip] if result.ok: ip_state["log_file"] = str(result.values.get("LOG_FILE", "")) state.events.append( { "type": "ACTION_DONE", "stage": "download-log", "backend": result.backend, "ip": ip, "message": "best effort log downloaded", } ) else: state.events.append( { "type": "ACTION_FAIL", "stage": "download-log", "backend": result.backend, "ip": ip, "message": result.error_summary or "best effort log download failed", } ) def _save_checkpoint(self, state: AgentState) -> None: if state.checkpoint_path: save_checkpoint(state, state.checkpoint_path, redact=False) def render_report(self, state: AgentState) -> str: success = sum(1 for item in state.ip_states.values() if item.get("status") == "SUCCESS") failed = sum(1 for item in state.ip_states.values() if item.get("status") == "FAILED") lines = [ "## PAM 智能部署报告", "", f"- 执行策略: {state.execution_strategy}", f"- 机场: {state.params['AIRPORT_CODE']}", f"- 应用: {state.params['APP_NAME']}", f"- 模块: {state.params['MODULE_NAME']}", f"- 版本: {state.params['VERSION_NUMBER']}", f"- 在线工作站数: {len(state.online_ips)}", f"- 目标工作站数: {len(state.target_ips)}", f"- 成功: {success}", f"- 失败: {failed}", f"- 待确认: {state.pending_confirmation or '-'}", "", "| IP | 状态 | 失败阶段 | 回滚状态 | 日志 |", "| --- | --- | --- | --- | --- |", ] for ip, ip_state in state.ip_states.items(): lines.append( "| {ip} | {status} | {failed_stage} | {rollback_status} | {log_file} |".format( ip=ip, status=ip_state.get("status", ""), failed_stage=ip_state.get("failed_stage") or "-", rollback_status=ip_state.get("rollback_status") or "-", log_file=ip_state.get("log_file") or "-", ) ) return "\n".join(lines)