agent_deply/pam_deploy_graph/interactive.py
dark badcce5d2d verify-ip 等逐 IP action 失败后不再进入自动回滚确认,改为保存 failed_stage 并暂停。
用户修复外部问题后输入 resume,会从失败 action 重新执行,而不是结束整个流程。
回滚从 workflow 中拆出,新增显式命令:
chat:rollback [IP]
CLI:rollback --checkpoint ... [--ip ...] [--stop-first|--no-stop-first]
旧 confirm approve/reject 只保留为旧 checkpoint 兼容入口,新流程不再推荐使用。
LangGraph workflow 已移除回滚确认 interrupt 节点,失败暂停和续跑走业务 checkpoint。
README、打包 README、run.sh --help、流程图、todo、提示词基线和测试都已同步。
2026-06-04 13:46:19 +08:00

1135 lines
47 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""PAM 部署 Agent 的常驻式交互 CLI 会话。"""
from __future__ import annotations
import time
import json
import shlex
import builtins
import logging
import os
import sys
from dataclasses import asdict
from pathlib import Path
from typing import Any, Callable
from .agent import PamDeployAgent
from .checkpoint_store import load_agent_state, redact_mapping
from .langgraph_runtime import LangGraphDeploymentRuntime, LangGraphRunResult
from .llm import build_llm_client
from .llm.rule_based import RuleBasedLlmClient
from .logging_utils import configure_logging, json_for_log, redact_for_log
from .mcp_factory import build_mcp_runner_from_config
from .models import AgentState, ExecutionStrategy
from .params_loader import load_params_file
InputFunc = Callable[[str], str]
OutputFunc = Callable[[str], None]
logger = logging.getLogger(__name__)
COMMAND_HELP = """可用命令:
help 显示帮助
preview 查看当前参数和执行策略
analyze <需求> 只做理解和计划,不执行
params 脱敏展示当前会话参数
events [数量] 查看最近 action 事件,默认 10 条
set KEY=VALUE 修改当前会话参数
llm config KEY=VALUE 配置真实 LLM支持 base_url/api_key/model/action_analysis_prompt_file
llm test [文本] 测试当前 LLM client 是否可正常调用
llm fallback 切回本地规则 fallback
llm action-analysis on|off 开关 action 审核详情写入 events
mcp config <路径> 加载 MCP client JSON 配置
run 创建部署任务并执行
status 查看当前运行状态
resume 从当前 checkpoint 续跑
rollback [IP] 显式回滚指定 IP不传 IP 时回滚当前失败 IP
list checkpoints 列出 checkpoint 目录下的 JSON 文件
load params <路径> 加载并热更新参数文件
load checkpoint <路径> 加载指定 checkpoint
checkpoint 显示 checkpoint 路径
exit 退出
也可以直接输入自然语言需求Agent 会先分析并更新会话参数;执行仍需输入 run。
执行中可按 Ctrl+C 中断,保存 checkpoint 后再用 resume 继续。
"""
class InteractiveCliSession:
"""维护一次交互式 CLI 会话的参数、状态和命令处理逻辑。"""
def __init__(
self,
*,
agent: PamDeployAgent,
params: dict[str, Any],
strategy: ExecutionStrategy = "hybrid_node_mcp",
checkpoint_path: str | None = None,
target_ips: list[str] | None = None,
input_func: InputFunc = input,
output_func: OutputFunc = print,
) -> None:
"""初始化会话上下文和输入输出函数。"""
self.agent = agent
self.params = dict(params)
self.strategy = strategy
self.checkpoint_path = checkpoint_path or _default_checkpoint_path()
self.target_ips = list(target_ips or [])
self.input = _build_prompt_input(input_func)
self.output = _build_output_func(output_func)
self.state: AgentState | None = None
self.last_analysis: dict[str, Any] | None = None
self.llm_config: dict[str, str] = {}
self.mcp_config_path: str = ""
self.graph_runtime: LangGraphDeploymentRuntime | None = None
self.agent.progress_callback = self._on_progress
self.log_path = configure_logging()
logger.info(
"chat 会话已初始化 strategy=%s checkpoint=%s target_ips=%s llm_client=%s log_path=%s params=%s",
self.strategy,
self.checkpoint_path,
self.target_ips,
type(self.agent.llm_client).__name__,
self.log_path,
json_for_log(self.params),
)
def run(self) -> None:
"""启动 REPL 循环,直到用户 exit 或输入流结束。"""
logger.info("chat REPL 启动 checkpoint=%s", self.checkpoint_path)
self.output("PAM 部署 Agent 交互式会话")
self.output("输入 help 查看命令,输入 exit 退出。")
self._load_existing_checkpoint_if_any()
while True:
try:
line = self.input("pam-deploy-agent> ")
except KeyboardInterrupt:
logger.info("chat 输入被用户中断")
self.output("已取消当前输入。输入 exit 退出,或继续输入命令。")
continue
except EOFError:
logger.info("chat 输入流结束")
self.output("bye")
return
if not self.handle_line(line):
return
def handle_line(self, line: str) -> bool:
"""处理用户输入的一行命令;返回 False 表示退出会话。"""
text = line.strip()
if not text:
return True
command, _, rest = text.partition(" ")
normalized = command.lower()
logger.info(
"chat 收到输入 command=%s text=%s",
normalized,
redact_for_log(text, max_text_len=500),
)
if normalized in ("exit", "quit", "q"):
logger.info("chat 会话退出")
self.output("bye")
return False
if normalized in ("help", "?"):
self.output(COMMAND_HELP.rstrip())
return True
if normalized == "preview":
self.output(self.agent.preview(self.params, self.strategy))
return True
if normalized == "params":
self._show_params()
return True
if normalized == "events":
self._show_events(rest.strip())
return True
if normalized == "analyze":
self._analyze(rest.strip())
return True
if normalized == "set":
self._set_param(rest.strip())
return True
if normalized == "llm":
self._configure_llm(rest.strip())
return True
if normalized == "mcp":
self._configure_mcp(rest.strip())
return True
if normalized in ("run", "deploy", "execute"):
self._run_deploy()
return True
if normalized == "resume":
self._resume()
return True
if normalized == "rollback":
self._rollback(rest.strip())
return True
if normalized == "status":
self._status()
return True
if normalized == "approve":
self._confirm(approved=True, note=rest.strip())
return True
if normalized == "reject":
self._confirm(approved=False, note=rest.strip())
return True
if normalized == "checkpoint":
self.output(f"checkpoint: {self.checkpoint_path}")
return True
if normalized == "list" and rest.strip().lower() == "checkpoints":
self._list_checkpoints()
return True
if normalized == "load" and rest.strip().lower().startswith("params"):
self._load_params(rest.strip()[len("params") :].strip())
return True
if normalized == "load" and rest.strip().lower().startswith("checkpoint"):
self._load_checkpoint(rest.strip()[len("checkpoint") :].strip())
return True
if _is_small_talk(text):
logger.info("chat 输入识别为寒暄,跳过结构化分析")
self.output("你好。可以输入 help 查看命令,或直接描述部署需求;执行前仍需输入 run 并确认。")
return True
if not _looks_like_deploy_request(text):
logger.info("chat 输入未命中部署需求粗筛,跳过结构化分析")
self.output("我没有识别到明确的部署需求。可以输入 help 查看命令,或用 analyze <需求> 明确触发需求分析。")
return True
self.output("正在分析需求...")
self._analyze(text)
return True
def _analyze(self, text: str) -> None:
"""分析自然语言需求,并更新会话中的参数、策略和目标 IP。"""
if not text:
self.output("请输入要分析的自然语言需求例如analyze 请用 MCP 预演部署 HET。")
return
try:
logger.info("chat 开始需求分析 text_len=%s base_params=%s", len(text), json_for_log(self.params))
result = self.agent.analyze_request(text, self.params)
except Exception as exc:
logger.exception("chat 需求分析失败 text=%s", redact_for_log(text, max_text_len=500))
self.output(f"需求分析失败: {exc}")
return
self.last_analysis = result
param_result = result["params"]
intent_result = result["intent"]
plan = result["plan"]
self.params = dict(param_result.extracted_params)
self.strategy = _choose_strategy(intent_result.strategy_preference, self.strategy)
user_ips = param_result.extracted_control.get("user_specified_ips")
if isinstance(user_ips, list):
self.target_ips = [str(item) for item in user_ips]
self._sync_params_to_state()
safe_payload = redact_mapping({key: asdict(value) for key, value in result.items()})
self.output("已生成结构化理解:")
self.output(f"- intent: {intent_result.intent}")
self.output(f"- strategy: {self.strategy}")
self.output(f"- summary: {plan.summary}")
if param_result.missing_required_params:
self.output("- missing: " + ", ".join(param_result.missing_required_params))
if self.target_ips:
self.output("- target_ips: " + ", ".join(self.target_ips))
self.output("执行请输 run查看完整 JSON 可用一次性 analyze 命令。")
self.output(_format_redacted_params(safe_payload["params"]["extracted_params"]))
logger.info(
"chat 需求分析完成 intent=%s strategy=%s target_ips=%s result=%s",
intent_result.intent,
self.strategy,
self.target_ips,
json_for_log(safe_payload),
)
def _set_param(self, assignment: str) -> None:
"""处理 `set KEY=VALUE` 命令,更新当前会话参数。"""
if "=" not in assignment:
self.output("格式set KEY=VALUE")
return
key, value = assignment.split("=", 1)
key = key.strip()
if not key:
self.output("参数名不能为空。")
return
self.params[key] = value.strip()
self._sync_params_to_state()
self.output(f"已设置 {key}")
logger.info("chat 参数已设置 key=%s params=%s", key, json_for_log(self.params))
def _show_params(self) -> None:
"""脱敏展示当前会话参数。"""
self.output(_format_redacted_params(redact_mapping(self.params)))
def _show_events(self, count_text: str) -> None:
"""展示最近若干条事件。"""
if self.state is None or not self.state.events:
self.output("当前没有事件。")
return
try:
count = int(count_text) if count_text else 10
except ValueError:
self.output("格式events [数量]")
return
events = self.state.events[-max(count, 1) :]
self.output(json.dumps(redact_mapping(events), ensure_ascii=False, indent=2, default=str))
def _configure_llm(self, text: str) -> None:
"""热加载 LLM 配置,或开关 action 后诊断。"""
if not text:
self.output("格式llm config base_url=... api_key=... model=... action_analysis_prompt_file=... | llm test [文本] | llm fallback | llm action-analysis on|off")
return
try:
parts = shlex.split(text)
except ValueError as exc:
logger.exception("chat LLM 命令解析失败 text=%s", redact_for_log(text, max_text_len=500))
self.output(f"LLM 命令解析失败: {exc}")
return
if parts[0] == "fallback":
self.agent.llm_client = RuleBasedLlmClient()
self.llm_config = {}
self.output("已切回本地规则 LLM fallback。")
logger.info("chat LLM 已切回 fallback")
return
if parts[0] == "test":
test_text = text.partition("test")[2].strip()
self._test_llm(test_text)
return
if parts[0] == "action-analysis":
if len(parts) < 2 or parts[1] not in ("on", "off"):
self.output("格式llm action-analysis on|off")
return
self.agent.action_analysis_enabled = parts[1] == "on"
self.output(f"action 审核详情写入 events 已{'开启' if self.agent.action_analysis_enabled else '关闭'}")
logger.info("chat action 审核事件写入开关=%s", self.agent.action_analysis_enabled)
return
if parts[0] != "config":
self.output("未知 llm 命令。")
return
updates = _parse_key_values(parts[1:])
self.llm_config.update(updates)
try:
logger.info("chat 开始加载 LLM 配置 updates=%s merged=%s", json_for_log(updates), json_for_log(self.llm_config))
self.agent.llm_client = build_llm_client(
base_url=self.llm_config.get("base_url"),
api_key=self.llm_config.get("api_key"),
model=self.llm_config.get("model"),
action_analysis_prompt_path=self.llm_config.get("action_analysis_prompt_file"),
)
except Exception as exc:
logger.exception("chat LLM 配置失败 config=%s", json_for_log(self.llm_config))
self.output(f"LLM 配置失败: {exc}")
return
safe = {**self.llm_config}
if safe.get("api_key"):
safe["api_key"] = "***"
self.output("LLM 配置已加载: " + json.dumps(safe, ensure_ascii=False))
logger.info("chat LLM 配置已加载 client=%s config=%s", type(self.agent.llm_client).__name__, json_for_log(self.llm_config))
def _test_llm(self, text: str) -> None:
"""使用当前 LLM client 做一次轻量调用,便于确认配置是否生效。"""
prompt = text or "请判断这是一次 LLM 连通性测试,并返回结构化意图。"
client_name = type(self.agent.llm_client).__name__
self.output(f"正在测试 LLM: {client_name}")
logger.info("chat LLM 测试开始 client=%s prompt=%s", client_name, redact_for_log(prompt, max_text_len=500))
try:
result = self.agent.understand_request(prompt)
except Exception as exc:
logger.exception("chat LLM 测试失败 client=%s", client_name)
self.output(f"LLM 测试失败: {exc}")
return
self.output("LLM 测试通过")
self.output(f"- client: {client_name}")
self.output(f"- intent: {result.intent}")
self.output(f"- strategy: {result.strategy_preference}")
self.output(f"- confidence: {result.confidence}")
if result.reasons:
self.output("- reasons: " + "; ".join(result.reasons))
logger.info("chat LLM 测试通过 client=%s result=%s", client_name, json_for_log(asdict(result)))
def _configure_mcp(self, text: str) -> None:
"""热加载 MCP client 配置。"""
command, _, path = text.partition(" ")
if command != "config" or not path.strip():
self.output("格式mcp config <mcp_client.json>")
return
path = path.strip().strip('"')
try:
logger.info("chat 开始加载 MCP 配置 path=%s", path)
runner = build_mcp_runner_from_config(path)
except Exception as exc:
logger.exception("chat MCP 配置失败 path=%s", path)
self.output(f"MCP 配置失败: {exc}")
return
self.agent.mcp_runner = runner
self.agent.router.mcp_runner = runner
self.mcp_config_path = path
self.output(f"MCP 配置已加载: {path}")
logger.info("chat MCP 配置已加载 path=%s runner=%s", path, type(runner).__name__)
def _list_checkpoints(self) -> None:
"""列出当前 checkpoint 目录下的 JSON 文件。"""
checkpoint_dir = Path(self.checkpoint_path).parent
logger.info("chat 查询 checkpoint 列表 dir=%s", checkpoint_dir)
if not checkpoint_dir.exists():
self.output(f"checkpoint 目录不存在: {checkpoint_dir}")
return
files = sorted(checkpoint_dir.glob("*.json"), key=lambda item: item.stat().st_mtime, reverse=True)
if not files:
self.output(f"checkpoint 目录没有 JSON 文件: {checkpoint_dir}")
return
lines = ["checkpoint 列表:"]
for file in files[:20]:
lines.append(f"- {file}")
self.output("\n".join(lines))
def _load_checkpoint(self, path_text: str) -> None:
"""加载指定 checkpoint 文件。"""
if not path_text:
self.output("格式load checkpoint <路径>")
return
checkpoint = Path(path_text)
logger.info("chat 开始加载 checkpoint path=%s", checkpoint)
if not checkpoint.exists():
self.output(f"checkpoint 不存在: {checkpoint}")
return
self.state = load_agent_state(checkpoint)
self.state.checkpoint_path = str(checkpoint)
self.checkpoint_path = str(checkpoint)
self.params = dict(self.state.params)
self.strategy = self.state.execution_strategy
self.target_ips = list(self.state.target_ips)
self.graph_runtime = None
self.output(f"已加载 checkpoint: {checkpoint}")
logger.info(
"chat checkpoint 已加载 path=%s run_id=%s strategy=%s paused=%s pending=%s",
checkpoint,
self.state.run_id,
self.strategy,
self.state.paused,
self.state.pending_confirmation,
)
if self.state.pending_confirmation:
self._print_confirmation()
self._print_pause_context()
def _load_params(self, path_text: str) -> None:
"""从参数文件热更新当前会话参数,并同步到已暂停 state。"""
if not path_text:
self.output("格式load params <路径>")
return
path = Path(path_text)
logger.info("chat 开始加载参数文件 path=%s", path)
if not path.exists():
self.output(f"参数文件不存在: {path}")
return
try:
updates = load_params_file(path)
except Exception as exc:
logger.exception("chat 参数文件加载失败 path=%s", path)
self.output(f"参数文件加载失败: {exc}")
return
self.params.update(updates)
try:
self.params = self.agent.normalize_params(self.params)
except ValueError as exc:
logger.exception("chat 参数热更新归一化失败 path=%s updates=%s", path, json_for_log(updates))
self.output(f"参数热更新失败: {exc}")
return
self._sync_params_to_state()
self.output(f"已加载参数文件: {path}")
self.output(_format_redacted_params(redact_mapping(self.params)))
logger.info("chat 参数文件已加载 path=%s updates=%s params=%s", path, json_for_log(updates), json_for_log(self.params))
def _run_deploy(self) -> None:
"""在用户确认后创建状态并执行完整部署流程。"""
logger.info("chat run 请求开始 strategy=%s checkpoint=%s target_ips=%s", self.strategy, self.checkpoint_path, self.target_ips)
if self.state and self.state.pending_confirmation:
logger.info("chat run 命中待确认事项 pending=%s", self.state.pending_confirmation)
self._print_confirmation()
return
if not self._prepare_params_for_run():
logger.info("chat run 参数准备失败")
return
problems = self._validate_run_prerequisites(self.params)
if problems:
logger.info("chat run 前置检查失败 problems=%s", problems)
self.output("执行前检查未通过:")
for problem in problems:
self.output(f"- {problem}")
self.output("请修正参数或配置后再输入 run。")
return
if not self._confirm_params_and_scope():
logger.info("chat run 用户取消参数或目标范围确认")
self.output("已取消执行。")
return
if not self._ask_yes_no("即将执行真实 action确认执行请输入 yes: "):
logger.info("chat run 用户取消最终执行确认")
self.output("已取消执行。")
return
self.state = self.agent.create_state(
params=self.params,
execution_strategy=self.strategy,
checkpoint_path=self.checkpoint_path,
target_ips=self.target_ips,
)
self.graph_runtime = None
logger.info(
"chat run state 已创建 run_id=%s strategy=%s checkpoint=%s config=%s target_ips=%s",
self.state.run_id,
self.state.execution_strategy,
self.state.checkpoint_path,
self.state.config_path,
self.state.target_ips,
)
self._execute_current_state()
def _confirm_params_and_scope(self) -> bool:
"""执行前确认参数和目标 IP 范围。"""
self.output(_format_redacted_params(redact_mapping(self.params)))
if not self._ask_yes_no("确认以上参数请输入 yes: "):
return False
if self.target_ips:
self.output("目标 IP: " + ", ".join(self.target_ips))
else:
self.output("目标 IP: 未指定,将在 get-online-ips 后使用全部在线 IP。")
return self._ask_yes_no("确认目标范围请输入 yes: ")
def _resume(self) -> None:
"""从内存状态或 checkpoint 文件继续执行部署流程。"""
if self.state is None:
checkpoint = Path(self.checkpoint_path)
if not checkpoint.exists():
self.output("当前没有可续跑的 checkpoint。")
logger.info("chat resume 未找到 checkpoint path=%s", checkpoint)
return
logger.info("chat resume 从 checkpoint 加载 path=%s", checkpoint)
self.state = load_agent_state(checkpoint)
self.state.checkpoint_path = self.state.checkpoint_path or str(checkpoint)
if self.state.paused:
logger.info("chat resume 清理暂停状态 run_id=%s reason=%s", self.state.run_id, self.state.pause_reason)
self.state = self.agent.resume_state(self.state)
if self.graph_runtime and self.graph_runtime.waiting_confirmation:
logger.info("chat resume 停在 LangGraph 确认点 pending=%s", self.state.pending_confirmation if self.state else "")
self._print_confirmation()
return
logger.info("chat resume 开始执行 run_id=%s checkpoint=%s", self.state.run_id if self.state else "", self.checkpoint_path)
self._execute_current_state()
def _execute_current_state(self) -> None:
"""执行当前 state并输出报告、确认提示和 checkpoint 路径。"""
if self.state is None:
self.output("当前没有运行状态。")
return
logger.info(
"chat 开始执行当前 state run_id=%s strategy=%s checkpoint=%s graph_runtime=%s waiting_confirmation=%s",
self.state.run_id,
self.state.execution_strategy,
self.state.checkpoint_path,
type(self.graph_runtime).__name__ if self.graph_runtime else "",
self.graph_runtime.waiting_confirmation if self.graph_runtime else False,
)
if self.graph_runtime is None or not self.graph_runtime.waiting_confirmation:
try:
self.graph_runtime = LangGraphDeploymentRuntime(agent=self.agent)
except RuntimeError as exc:
logger.exception("chat LangGraph runtime 不可用,降级本地执行 run_id=%s", self.state.run_id)
self.output(f"LangGraph 确认运行器不可用,降级为本地执行: {exc}")
self.graph_runtime = None
try:
self.state = self.agent.run_deploy_flow(self.state)
except KeyboardInterrupt:
self._handle_execution_interrupt()
return
except Exception as fallback_exc:
self._handle_execution_error(fallback_exc)
return
self._print_state_report_and_checkpoint()
logger.info("chat 本地执行完成 run_id=%s checkpoint=%s", self.state.run_id, self.state.checkpoint_path)
return
try:
result = self.graph_runtime.start(self.state)
except KeyboardInterrupt:
self._handle_execution_interrupt()
return
except Exception as exc:
self._handle_execution_error(exc)
return
self._apply_graph_result(result)
logger.info(
"chat LangGraph 执行返回 interrupted=%s pending=%s checkpoint=%s",
result.interrupted,
self.state.pending_confirmation if self.state else "",
self.state.checkpoint_path if self.state else self.checkpoint_path,
)
def _prepare_params_for_run(self) -> bool:
"""执行前归一化参数,确保确认值和实际写入脚本配置一致。"""
try:
self.params = self.agent.normalize_params(self.params)
except ValueError as exc:
logger.exception("chat 参数检查失败 params=%s", json_for_log(self.params))
self.output(f"参数检查失败: {exc}")
return False
logger.info("chat 参数检查通过 params=%s", json_for_log(self.params))
return True
def _validate_run_prerequisites(self, params: dict[str, Any]) -> list[str]:
"""在创建 state 前检查可提前发现的运行问题。"""
problems: list[str] = []
if self.strategy != "fake":
zip_path = str(params.get("ZIP_FILE_PATH", "")).strip()
if not _path_exists(zip_path):
problems.append(f"ZIP_FILE_PATH 不存在: {zip_path}")
if self.strategy in ("script_only", "hybrid_node_mcp"):
script_entry = self.agent.script_base_dir / "deploy.sh"
ps_entry = self.agent.script_base_dir / "deploy.ps1"
if not script_entry.exists() and not ps_entry.exists():
problems.append(f"脚本入口不存在: {script_entry}{ps_entry}")
if self.strategy == "hybrid_node_mcp" and self.agent.mcp_runner is None:
problems.append("当前策略需要 MCP runner请启动时传 --mcp-config 或在 chat 内执行 mcp config <路径>。")
return problems
def _handle_execution_error(self, exc: Exception) -> None:
"""输出 action 执行失败后的可恢复提示,不再误报 LangGraph 不可用。"""
self.output(f"执行已停止: {exc}")
logger.exception(
"chat 执行停止 run_id=%s checkpoint=%s",
self.state.run_id if self.state else "",
self.state.checkpoint_path if self.state else self.checkpoint_path,
)
if self.state is None:
return
if self.state.last_failed_step:
self.output(f"最后失败步骤: {self.state.last_failed_step}")
self._print_pause_context()
if self.state.pending_confirmation:
self._print_confirmation()
self.output(f"checkpoint: {self.state.checkpoint_path or self.checkpoint_path}")
self.output("请修正参数或外部环境后,使用 load checkpoint <路径> / resume 继续,或重新 run。")
def _handle_execution_interrupt(self) -> None:
"""处理执行中的用户中断,并保留断点。"""
if self.state is None:
self.output("执行已中断。")
logger.info("chat 执行中断时没有 state")
return
self.graph_runtime = None
logger.info("chat 执行被用户中断 run_id=%s checkpoint=%s", self.state.run_id, self.state.checkpoint_path)
self.state = self.agent.pause_state(
self.state,
reason="user_interrupted",
review_context={"type": "user_interrupt", "message": "用户手动中断执行"},
)
self.output("执行已由用户中断,当前 checkpoint 已保存。")
self._print_pause_context()
self.output(f"checkpoint: {self.state.checkpoint_path or self.checkpoint_path}")
def _apply_graph_result(self, result: LangGraphRunResult) -> None:
"""把 LangGraph 运行结果同步回 chat 会话并输出用户可见状态。"""
if result.state is not None:
self.state = result.state
if self.state is None:
self.output("当前没有运行状态。")
return
logger.info(
"chat 应用 LangGraph 结果 run_id=%s interrupted=%s pending=%s paused=%s",
self.state.run_id,
result.interrupted,
self.state.pending_confirmation,
self.state.paused,
)
self.output(result.report or self.agent.render_report(self.state))
if result.interrupted and result.confirmation:
self._print_confirmation_request(result.confirmation)
elif self.state.pending_confirmation:
self._print_confirmation()
self._print_pause_context()
self.output(f"checkpoint: {self.state.checkpoint_path or self.checkpoint_path}")
def _print_state_report_and_checkpoint(self) -> None:
"""输出本地执行路径的状态报告和 checkpoint。"""
if self.state is None:
return
self.output(self.agent.render_report(self.state))
self._print_pause_context()
if self.state.pending_confirmation:
self._print_confirmation()
self.output(f"checkpoint: {self.state.checkpoint_path or self.checkpoint_path}")
def _status(self) -> None:
"""输出当前运行状态;没有 state 时输出 checkpoint 路径。"""
if self.state is None:
self.output("当前还没有运行状态。")
self.output(f"checkpoint: {self.checkpoint_path}")
return
self.output(self.agent.render_report(self.state))
self._print_pause_context()
if self.state.pending_confirmation:
self._print_confirmation()
def _confirm(self, *, approved: bool, note: str = "") -> None:
"""处理 approve/reject 命令。"""
if self.state is None:
checkpoint = Path(self.checkpoint_path)
if checkpoint.exists():
logger.info("chat confirm 从 checkpoint 加载 path=%s", checkpoint)
self.state = load_agent_state(checkpoint)
self.state.checkpoint_path = self.state.checkpoint_path or str(checkpoint)
else:
self.output("当前没有待确认任务。")
logger.info("chat confirm 无 state 且 checkpoint 不存在 path=%s", checkpoint)
return
if not self.state.pending_confirmation:
self.output("当前没有待确认任务。")
logger.info("chat confirm 无待确认事项 run_id=%s", self.state.run_id)
return
logger.info(
"chat confirm 开始 run_id=%s approved=%s pending=%s note_len=%s",
self.state.run_id,
approved,
self.state.pending_confirmation,
len(note),
)
if self.graph_runtime and self.graph_runtime.waiting_confirmation:
try:
result = self.graph_runtime.resume(approved=approved, note=note)
except RuntimeError as exc:
logger.exception("chat LangGraph 确认恢复失败,降级本地确认 run_id=%s", self.state.run_id)
self.output(f"LangGraph 确认恢复失败,降级为本地确认: {exc}")
else:
self._apply_graph_result(result)
return
self.state = self.agent.confirm_pending(self.state, approved=approved, operator_note=note)
logger.info(
"chat confirm 完成 run_id=%s pending=%s paused=%s",
self.state.run_id,
self.state.pending_confirmation,
self.state.paused,
)
self.output(self.agent.render_report(self.state))
self._print_pause_context()
if self.state.pending_confirmation:
self._print_confirmation()
def _rollback(self, text: str) -> None:
"""显式执行单 IP 回滚;主 workflow 不再自动触发回滚。"""
if self.state is None:
checkpoint = Path(self.checkpoint_path)
if checkpoint.exists():
logger.info("chat rollback 从 checkpoint 加载 path=%s", checkpoint)
self.state = load_agent_state(checkpoint)
self.state.checkpoint_path = self.state.checkpoint_path or str(checkpoint)
else:
self.output("当前没有可回滚的运行状态。")
logger.info("chat rollback 无 state 且 checkpoint 不存在 path=%s", checkpoint)
return
try:
ip, stop_first, note = _parse_rollback_args(text)
except ValueError as exc:
self.output(f"rollback 参数错误: {exc}")
return
ip = ip or _find_current_failed_ip(self.state)
if not ip:
self.output("未找到当前失败 IP请使用 rollback <IP> 指定。")
logger.info("chat rollback 未找到可回滚 IP run_id=%s", self.state.run_id)
return
logger.info(
"chat rollback 开始 run_id=%s ip=%s stop_first=%s note_len=%s",
self.state.run_id,
ip,
stop_first,
len(note),
)
self.graph_runtime = None
try:
self.state = self.agent.rollback_ip(
self.state,
ip,
stop_first=stop_first,
operator_note=note,
)
except Exception as exc:
logger.exception("chat rollback 执行失败 run_id=%s ip=%s", self.state.run_id, ip)
self.output(f"rollback 执行失败: {exc}")
self._print_pause_context()
return
self.output(self.agent.render_report(self.state))
self._print_pause_context()
self.output(f"checkpoint: {self.state.checkpoint_path or self.checkpoint_path}")
if not self.state.paused:
self.output("回滚已完成;如需继续主流程,输入 resume。")
logger.info(
"chat rollback 完成 run_id=%s ip=%s status=%s paused=%s",
self.state.run_id,
ip,
self.state.ip_states.get(ip, {}).get("rollback_status"),
self.state.paused,
)
def _sync_params_to_state(self) -> None:
"""若当前已有 state则把热更新参数同步到 checkpoint/config。"""
if self.state is None:
return
try:
self.state = self.agent.update_state_params(self.state, self.params)
except ValueError as exc:
logger.exception("chat 参数同步到 state 失败 run_id=%s params=%s", self.state.run_id, json_for_log(self.params))
self.output(f"参数同步到当前任务失败: {exc}")
return
self.params = dict(self.state.params)
if self.target_ips:
self.state.target_ips = list(self.target_ips)
logger.info(
"chat 参数已同步到 state run_id=%s checkpoint=%s params=%s target_ips=%s",
self.state.run_id,
self.state.checkpoint_path,
json_for_log(self.params),
self.target_ips,
)
def _print_pause_context(self) -> None:
"""输出暂停原因和审核建议,避免黑盒暂停。"""
if self.state is None or not self.state.paused:
return
context = self.state.review_context or {}
reason = self.state.pause_reason or "unknown"
self.output(f"当前流程已暂停: {reason}")
if context.get("stage"):
self.output(f"- stage: {context.get('stage')}")
if context.get("ip"):
self.output(f"- ip: {context.get('ip')}")
if context.get("possible_reason"):
self.output(f"- reason: {context.get('possible_reason')}")
elif context.get("error_summary"):
self.output(f"- reason: {context.get('error_summary')}")
if context.get("suggested_action"):
self.output(f"- suggestion: {context.get('suggested_action')}")
if context.get("severity"):
self.output(f"- severity: {context.get('severity')}")
if context.get("notes"):
self.output("- notes: " + "; ".join(str(item) for item in context.get("notes", [])))
if reason == "user_interrupted":
self.output("输入 resume 可从当前 checkpoint 继续。")
elif reason == "llm_review_blocked":
self.output("请根据以上建议判断后续;如需继续,输入 resume。")
elif reason == "action_failed":
ip = context.get("ip")
rollback_hint = f"rollback {ip}" if ip else "rollback <IP>"
self.output(f"请修复失败原因后输入 resume 重试当前 action如需回滚输入 {rollback_hint}")
elif reason == "rollback_failed":
self.output("请检查回滚失败原因;修复后可再次输入 rollback 重试,或人工处理后再 resume。")
def _on_progress(self, payload: dict[str, Any]) -> None:
"""把 Agent action 进度转成 chat 可见输出。"""
event_type = str(payload.get("type", ""))
stage = str(payload.get("stage", ""))
backend = str(payload.get("backend", ""))
ip = str(payload.get("ip", ""))
message = str(payload.get("message", ""))
suffix_parts = []
if backend:
suffix_parts.append(f"backend={backend}")
if ip:
suffix_parts.append(f"ip={ip}")
suffix = f" [{', '.join(suffix_parts)}]" if suffix_parts else ""
if event_type == "ACTION_START":
self.output(f"开始执行 action: {stage}{suffix}")
elif event_type == "ACTION_DONE":
detail = f": {message}" if message and message != "ok" else ""
self.output(f"完成 action: {stage}{suffix}{detail}")
elif event_type == "ACTION_FAIL":
detail = f": {message}" if message else ""
self.output(f"失败 action: {stage}{suffix}{detail}")
elif event_type == "ACTION_REVIEW_START":
self.output(f"开始分析 action 结果: {stage}{suffix}")
elif event_type == "ACTION_REVIEW_DONE":
detail = f": {message}" if message else ""
self.output(f"分析完成: {stage}{suffix}{detail}")
elif event_type == "ACTION_REVIEW_FAIL":
detail = f": {message}" if message else ""
self.output(f"分析失败: {stage}{suffix}{detail}")
logger.info("chat progress event=%s", json_for_log(payload))
def _print_confirmation(self) -> None:
"""输出当前待人工确认事项。"""
if self.state is None:
return
request = self.agent.build_confirmation_request(self.state)
if not request:
return
self._print_confirmation_request(request)
def _print_confirmation_request(self, request: dict[str, Any]) -> None:
"""输出指定的人工确认请求。"""
self.output("需要人工确认:")
self.output(f"- type: {request.get('type')}")
if request.get("ip"):
self.output(f"- ip: {request['ip']}")
if request.get("failed_stage"):
self.output(f"- failed_stage: {request['failed_stage']}")
if request.get("failure_reason"):
self.output(f"- reason: {request['failure_reason']}")
self.output("输入 approve 执行回滚,或 reject [原因] 拒绝回滚。")
def _ask_yes_no(self, prompt: str) -> bool:
"""读取一次 yes/no 确认,只有 yes/y 视为确认。"""
try:
answer = self.input(prompt).strip().lower()
except EOFError:
return False
return answer in ("yes", "y")
def _load_existing_checkpoint_if_any(self) -> None:
"""会话启动时自动加载已存在的 checkpoint。"""
checkpoint = Path(self.checkpoint_path)
if not checkpoint.exists():
return
logger.info("chat 启动时自动加载已有 checkpoint path=%s", checkpoint)
self.state = load_agent_state(checkpoint)
self.state.checkpoint_path = self.state.checkpoint_path or str(checkpoint)
self.output(f"已加载 checkpoint: {checkpoint}")
if self.state.pending_confirmation:
self._print_confirmation()
self._print_pause_context()
def run_interactive_chat(
*,
agent: PamDeployAgent,
params: dict[str, Any],
strategy: ExecutionStrategy,
checkpoint_path: str | None = None,
target_ips: list[str] | None = None,
input_func: InputFunc = input,
output_func: OutputFunc = print,
) -> InteractiveCliSession:
"""创建并运行交互式 CLI 会话,返回会话对象便于测试。"""
session = InteractiveCliSession(
agent=agent,
params=params,
strategy=strategy,
checkpoint_path=checkpoint_path,
target_ips=target_ips,
input_func=input_func,
output_func=output_func,
)
session.run()
return session
def _default_checkpoint_path() -> str:
"""生成默认 chat checkpoint 路径。"""
return str(Path("runtime") / "checkpoints" / f"chat_{time.strftime('%Y%m%d_%H%M%S')}.json")
def _choose_strategy(preference: str, default: ExecutionStrategy) -> ExecutionStrategy:
"""根据 LLM 偏好更新执行策略,非法值保留默认策略。"""
if preference in ("hybrid_node_mcp", "script_only", "fake"):
return preference # type: ignore[return-value]
return default
def _format_redacted_params(params: dict[str, Any]) -> str:
"""把脱敏后的参数字典格式化为多行文本。"""
lines = ["当前参数:"]
for key in sorted(params):
lines.append(f"- {key}: {params[key]}")
return "\n".join(lines)
def _parse_key_values(parts: list[str]) -> dict[str, str]:
"""解析 KEY=VALUE 参数列表。"""
values: dict[str, str] = {}
for part in parts:
if "=" not in part:
continue
key, value = part.split("=", 1)
if key:
values[key] = value
return values
def _parse_rollback_args(text: str) -> tuple[str, bool | None, str]:
"""解析 chat rollback 命令参数,返回 IP、停机覆盖值和备注。"""
try:
parts = shlex.split(text)
except ValueError as exc:
raise ValueError(str(exc)) from exc
ip = ""
stop_first: bool | None = None
note_parts: list[str] = []
index = 0
while index < len(parts):
part = parts[index]
if part == "--stop-first":
stop_first = True
elif part == "--no-stop-first":
stop_first = False
elif part in ("--note", "-n"):
index += 1
if index >= len(parts):
raise ValueError("--note 需要提供备注内容")
note_parts.append(parts[index])
elif not ip:
ip = part
else:
note_parts.append(part)
index += 1
return ip, stop_first, " ".join(note_parts)
def _find_current_failed_ip(state: AgentState) -> str:
"""从当前 state 中找一个适合显式回滚的失败 IP。"""
context_ip = str((state.review_context or {}).get("ip", ""))
if context_ip and context_ip in state.ip_states:
return context_ip
for ip, ip_state in state.ip_states.items():
if ip_state.get("status") == "FAILED":
return ip
return ""
def _is_small_talk(text: str) -> bool:
"""识别不应触发 LLM/结构化分析的简单寒暄。"""
normalized = text.strip().lower()
return normalized in {
"你好",
"您好",
"hello",
"hi",
"hey",
"在吗",
"谢谢",
"thanks",
"thank you",
}
def _looks_like_deploy_request(text: str) -> bool:
"""粗筛自然语言部署需求,避免任意闲聊都触发耗时分析。"""
lowered = text.lower()
deploy_keywords = (
"部署",
"发布",
"升级",
"回滚",
"预演",
"执行",
"pam",
"mcp",
"node",
"版本",
"机场",
"deploy",
"release",
"upgrade",
"rollback",
"preview",
)
param_markers = (
"HOME_BASE_URL",
"CLIENT_ID",
"AIRPORT_CODE",
"APP_NAME",
"MODULE_NAME",
"VERSION_NUMBER",
"ZIP_FILE_PATH",
)
return any(keyword in lowered for keyword in deploy_keywords) or any(marker in text for marker in param_markers)
def _path_exists(path: str) -> bool:
"""检查本地路径是否存在,兼容打包到 Linux 后的绝对路径。"""
if not path:
return False
return Path(path).expanduser().exists()
def _build_prompt_input(input_func: InputFunc) -> InputFunc:
"""如果安装了 prompt_toolkit则启用历史记录和命令补全。"""
if input_func is not builtins.input:
return input_func
if getattr(sys, "frozen", False):
return input_func
try:
from prompt_toolkit import PromptSession
from prompt_toolkit.completion import WordCompleter
from prompt_toolkit.history import FileHistory
except ImportError:
return input_func
commands = [
"help",
"preview",
"analyze",
"params",
"events",
"set",
"llm config",
"llm test",
"llm fallback",
"llm action-analysis on",
"llm action-analysis off",
"mcp config",
"run",
"status",
"resume",
"rollback",
"rollback --stop-first",
"list checkpoints",
"load params",
"load checkpoint",
"checkpoint",
"exit",
]
history = None
try:
history_path = Path("runtime") / "chat_history.txt"
history_path.parent.mkdir(parents=True, exist_ok=True)
history = FileHistory(str(history_path))
except OSError:
history = None
session = PromptSession(
history=history,
completer=WordCompleter(commands, ignore_case=True, sentence=True),
)
return session.prompt
def _build_output_func(output_func: OutputFunc) -> OutputFunc:
"""如果安装了 rich则使用 rich 输出;否则保持原输出函数。"""
if output_func is not builtins.print:
return output_func
try:
from rich.console import Console
from rich.markdown import Markdown
except ImportError:
return output_func
console = Console()
def rich_print(value: str) -> None:
text = str(value)
stripped = text.lstrip()
if stripped.startswith("{") or stripped.startswith("["):
try:
console.print_json(text)
return
except Exception:
pass
if text.startswith("## ") or "\n| ---" in text:
console.print(Markdown(text))
return
console.print(text)
return rich_print