dark 14e297a488 feat: 落地 PAM 智能部署 Agent 骨架
- 新增 pam_deploy_graph 包,包含 Agent runtime、ActionRouter、脚本/MCP/fake runner
- 支持 hybrid_node_mcp 策略:PAM_HOME 走脚本 action,PAM_NODE 走 MCP
- 支持 script_only 离线策略,全部 action 走现有脚本 action
- 新增 LLM structured output 骨架和规则 fallback,支持意图识别、参数抽取、计划生成
- 新增 LangGraph StateGraph 工厂和 MCP client adapter
- 新增 CLI:preview、analyze、run-global、run-deploy
- 增加 fake 完整部署流程、单 IP 失败待回滚确认状态和报告输出
- 增加单元测试覆盖路由、parser、runner、Skill 加载、LLM 输出、MCP adapter 和 LangGraph 图
- 更新 README,记录当前代码骨架、进度、使用方式和下一步计划
2026-05-29 15:53:47 +08:00

342 lines
14 KiB
Python

"""PAM deploy Agent runtime.
This is intentionally runnable without langgraph installed. The same nodes can
be wired into LangGraph later via pam_deploy_graph.graph.
"""
from __future__ import annotations
import time
from pathlib import Path
from typing import Any
from .action_router import ActionRouter, build_action_backends
from .config_writer import write_config
from .constants import DEFAULT_PARAMS, GLOBAL_ACTION_SEQUENCE, IP_ACTION_SEQUENCE, REQUIRED_PARAMS
from .fake_runner import FakeActionRunner
from .llm import RuleBasedLlmClient, validate_deploy_plan, validate_intent_result
from .mcp_runner import McpActionRunner
from .models import AgentState, ExecutionStrategy, LlmDeployPlan, LlmIntentResult, LlmParamResult
from .script_runner import ScriptActionRunner, select_script_entry
from .skill_policy import load_skill_policy
class PamDeployAgent:
def __init__(
self,
*,
skill_path: str | Path = "doc_scripts/PAM_AUTO_DEPLY_SKILL.md",
script_base_dir: str | Path = "doc_scripts",
mcp_runner: McpActionRunner | None = None,
fake_runner: FakeActionRunner | None = None,
llm_client: RuleBasedLlmClient | None = None,
) -> None:
self.skill_policy = load_skill_policy(skill_path)
self.script_base_dir = Path(script_base_dir)
self.script_runner = ScriptActionRunner(self.script_base_dir)
self.fake_runner = fake_runner or FakeActionRunner()
self.mcp_runner = mcp_runner
self.llm_client = llm_client or RuleBasedLlmClient()
self.router = ActionRouter(
script_runner=self.script_runner,
mcp_runner=mcp_runner,
fake_runner=self.fake_runner,
)
def understand_request(self, text: str) -> LlmIntentResult:
result = self.llm_client.understand_request(text)
validate_intent_result(result)
return result
def extract_params(self, text: str, base_params: dict[str, Any] | None = None) -> LlmParamResult:
return self.llm_client.extract_params(text, base_params)
def generate_plan(
self,
*,
params: dict[str, Any],
intent: str,
strategy: ExecutionStrategy,
) -> LlmDeployPlan:
plan = self.llm_client.generate_plan(params=params, intent=intent, strategy=strategy)
validate_deploy_plan(plan)
return plan
def analyze_request(self, text: str, base_params: dict[str, Any] | None = None) -> dict[str, Any]:
intent = self.understand_request(text)
params = self.extract_params(text, base_params)
strategy = self._choose_strategy(intent.strategy_preference)
plan = self.generate_plan(
params={**DEFAULT_PARAMS, **params.extracted_params},
intent=intent.intent,
strategy=strategy,
)
return {
"intent": intent,
"params": params,
"plan": plan,
}
def normalize_params(self, params: dict[str, Any]) -> dict[str, Any]:
normalized = {**DEFAULT_PARAMS, **params}
missing = [key for key in REQUIRED_PARAMS if not normalized.get(key)]
if missing:
raise ValueError(f"Missing required params: {', '.join(missing)}")
return normalized
def _choose_strategy(self, preference: str) -> ExecutionStrategy:
if preference in ("hybrid_node_mcp", "script_only", "fake"):
return preference # type: ignore[return-value]
return "hybrid_node_mcp"
def create_state(
self,
*,
params: dict[str, Any],
execution_strategy: ExecutionStrategy = "hybrid_node_mcp",
run_id: str | None = None,
script_entry: str | None = None,
config_path: str | None = None,
trace_file_path: str | None = None,
target_ips: list[str] | None = None,
) -> AgentState:
normalized = self.normalize_params(params)
actual_run_id = run_id or time.strftime("%Y%m%d_%H%M%S")
actual_script_entry = script_entry or select_script_entry()
runtime_dir = Path("runtime")
actual_config_path = config_path or str(runtime_dir / f"config_{actual_run_id}.txt")
actual_trace_path = trace_file_path or str(Path("logs") / f"api_trace_{actual_run_id}.log")
write_config(normalized, actual_config_path)
return AgentState(
run_id=actual_run_id,
params=normalized,
execution_strategy=execution_strategy,
action_backends=build_action_backends(execution_strategy),
script_entry=actual_script_entry,
script_base_dir=str(self.script_base_dir),
config_path=actual_config_path,
trace_file_path=actual_trace_path,
target_ips=target_ips or [],
)
def preview(self, params: dict[str, Any], strategy: ExecutionStrategy = "hybrid_node_mcp") -> str:
normalized = self.normalize_params(params)
routes = build_action_backends(strategy)
if strategy == "hybrid_node_mcp":
home_backend = "脚本 action"
node_backend = "MCP"
elif strategy == "script_only":
home_backend = "脚本 action"
node_backend = "脚本 action"
else:
home_backend = "fake"
node_backend = "fake"
lines = [
"## PAM 部署预演",
"",
f"- 执行策略: {strategy}",
f"- PAM_HOME: {home_backend}",
f"- PAM_NODE: {node_backend}",
f"- 机场: {normalized['AIRPORT_CODE']}",
f"- 应用: {normalized['APP_NAME']}",
f"- 模块: {normalized['MODULE_NAME']}",
f"- 版本: {normalized['VERSION_NUMBER']}",
"",
"| action | backend |",
"| --- | --- |",
]
for action in GLOBAL_ACTION_SEQUENCE:
lines.append(f"| `{action}` | `{routes[action]}` |")
return "\n".join(lines)
def run_global_flow(self, state: AgentState) -> AgentState:
for action in GLOBAL_ACTION_SEQUENCE:
kwargs: dict[str, Any] = {}
if action == "publish-version":
kwargs["hash_code"] = state.hash_code
result = self.router.run_action(state, action, **kwargs)
state.events.append(
{
"type": "ACTION_DONE" if result.ok else "ACTION_FAIL",
"stage": action,
"backend": result.backend,
"message": result.error_summary or "ok",
}
)
if not result.ok:
state.last_failed_step = action
raise RuntimeError(f"{action} failed: {result.error_summary}")
self._apply_result(state, action, result.values)
state.completed_global_steps.append(action)
state.last_success_step = action
return state
def run_deploy_flow(self, state: AgentState) -> AgentState:
self.run_global_flow(state)
self.run_ip_flow(state)
return state
def run_ip_flow(self, state: AgentState) -> AgentState:
self._resolve_target_ips(state)
for ip in state.target_ips:
state.events.append({"type": "IP_START", "ip": ip, "message": "start"})
ip_state = {
"status": "RUNNING",
"completed_steps": [],
"failed_stage": "",
"failure_reason": "",
"rollback_status": "ROLLBACK_NOT_RUN",
"rollback_stop_first": False,
"log_file": "",
}
state.ip_states[ip] = ip_state
for action in IP_ACTION_SEQUENCE:
result = self.router.run_action(state, action, ip=ip)
failed = (not result.ok) or self._business_failed(action, result.values)
state.events.append(
{
"type": "ACTION_FAIL" if failed else "ACTION_DONE",
"stage": action,
"backend": result.backend,
"ip": ip,
"message": result.error_summary or result.values.get("MESSAGE", "ok"),
}
)
if failed:
self._record_ip_failure(state, ip, action, result.error_summary or str(result.values))
if action != "download-log":
self._download_log_best_effort(state, ip)
state.pending_confirmation = f"rollback-ip:{ip}"
return state
self._apply_ip_result(ip_state, action, result.values)
ip_state["completed_steps"].append(action)
ip_state["status"] = "SUCCESS"
state.events.append({"type": "IP_DONE", "ip": ip, "message": "success"})
return state
def _apply_result(self, state: AgentState, action: str, values: dict[str, Any]) -> None:
if "HASH_CODE" in values:
state.hash_code = str(values["HASH_CODE"])
if "NODE_URL" in values:
state.node_url = str(values["NODE_URL"])
if action == "get-online-ips":
ips = values.get("IP", [])
if isinstance(ips, str):
ips = [ips]
state.online_ips = list(ips)
state.target_ips = state.target_ips or state.online_ips.copy()
def _resolve_target_ips(self, state: AgentState) -> None:
if not state.target_ips:
state.target_ips = state.online_ips.copy()
return
online = set(state.online_ips)
requested = state.target_ips
state.target_ips = [ip for ip in requested if ip in online]
missing = [ip for ip in requested if ip not in online]
if missing:
state.events.append(
{
"type": "TARGET_SCOPE_CHANGED",
"message": "some requested IPs are not online",
"missing_ips": missing,
"target_ips": state.target_ips,
}
)
def _business_failed(self, action: str, values: dict[str, Any]) -> bool:
if action == "verify-ip":
success = values.get("SUCCESS")
if success is None:
return False
return str(success).lower() not in ("true", "1", "yes")
return False
def _apply_ip_result(self, ip_state: dict[str, Any], action: str, values: dict[str, Any]) -> None:
if action == "download-log":
ip_state["log_file"] = str(values.get("LOG_FILE", ""))
def _record_ip_failure(self, state: AgentState, ip: str, action: str, reason: str) -> None:
ip_state = state.ip_states[ip]
stop_first = action in ("start-ip", "verify-ip")
ip_state.update(
{
"status": "FAILED",
"failed_stage": action,
"failure_reason": reason,
"rollback_status": "PENDING_AGENT_CONFIRMATION",
"rollback_stop_first": stop_first,
}
)
state.last_failed_step = action
state.events.append(
{
"type": "CONFIRMATION_REQUIRED",
"stage": "rollback-ip",
"ip": ip,
"stop_first": stop_first,
"message": f"{action} failed; rollback confirmation required",
}
)
def _download_log_best_effort(self, state: AgentState, ip: str) -> None:
result = self.router.run_action(state, "download-log", ip=ip)
ip_state = state.ip_states[ip]
if result.ok:
ip_state["log_file"] = str(result.values.get("LOG_FILE", ""))
state.events.append(
{
"type": "ACTION_DONE",
"stage": "download-log",
"backend": result.backend,
"ip": ip,
"message": "best effort log downloaded",
}
)
else:
state.events.append(
{
"type": "ACTION_FAIL",
"stage": "download-log",
"backend": result.backend,
"ip": ip,
"message": result.error_summary or "best effort log download failed",
}
)
def render_report(self, state: AgentState) -> str:
success = sum(1 for item in state.ip_states.values() if item.get("status") == "SUCCESS")
failed = sum(1 for item in state.ip_states.values() if item.get("status") == "FAILED")
lines = [
"## PAM 智能部署报告",
"",
f"- 执行策略: {state.execution_strategy}",
f"- 机场: {state.params['AIRPORT_CODE']}",
f"- 应用: {state.params['APP_NAME']}",
f"- 模块: {state.params['MODULE_NAME']}",
f"- 版本: {state.params['VERSION_NUMBER']}",
f"- 在线工作站数: {len(state.online_ips)}",
f"- 目标工作站数: {len(state.target_ips)}",
f"- 成功: {success}",
f"- 失败: {failed}",
f"- 待确认: {state.pending_confirmation or '-'}",
"",
"| IP | 状态 | 失败阶段 | 回滚状态 | 日志 |",
"| --- | --- | --- | --- | --- |",
]
for ip, ip_state in state.ip_states.items():
lines.append(
"| {ip} | {status} | {failed_stage} | {rollback_status} | {log_file} |".format(
ip=ip,
status=ip_state.get("status", ""),
failed_stage=ip_state.get("failed_stage") or "-",
rollback_status=ip_state.get("rollback_status") or "-",
log_file=ip_state.get("log_file") or "-",
)
)
return "\n".join(lines)