dark 4250a7b221 LLM action 结果分析不再传 state_summary
调整了 agent.py 和 LLM client 协议/实现。
现在只传当前 action 的结构化结果和必要诊断日志,避免历史运行态影响判断。
提示词和文档也已同步说明。

verify-ip 增加健康检查重试
默认 VERIFY_INTERVAL_SEC=10、VERIFY_MAX_ATTEMPTS=12,约 2 分钟。
verify-ip 未通过但未达到最大次数时,会播报进度、保存 checkpoint,并继续从当前 verify-ip 重试,不会进入 download-log。
参数已加入 config.txt.example、脚本配置读取、README、打包 README、Skill 文档和流程图。
2026-06-04 16:57:16 +08:00

333 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""LLM 结构化输出的确定性规则 fallback。
该类不是对真实模型的替代,只用于本地开发和测试时提供稳定输出。
真实 LLM client 需要实现相同方法。
"""
from __future__ import annotations
import logging
import re
from dataclasses import asdict
from typing import Any
from pam_deploy_graph.constants import GLOBAL_ACTION_SEQUENCE, REQUIRED_PARAMS
from pam_deploy_graph.logging_utils import json_for_log, redact_for_log
from pam_deploy_graph.models import (
ActionResult,
ExecutionStrategy,
LlmActionAnalysis,
LlmDeployPlan,
LlmIntentResult,
LlmParamResult,
)
logger = logging.getLogger(__name__)
KEY_ALIASES = {
"home_base_url": "HOME_BASE_URL",
"HOME_BASE_URL": "HOME_BASE_URL",
"client_id": "CLIENT_ID",
"CLIENT_ID": "CLIENT_ID",
"client_secret": "CLIENT_SECRET",
"CLIENT_SECRET": "CLIENT_SECRET",
"airportCode": "AIRPORT_CODE",
"AIRPORT_CODE": "AIRPORT_CODE",
"applicationName": "APP_NAME",
"APP_NAME": "APP_NAME",
"moduleName": "MODULE_NAME",
"MODULE_NAME": "MODULE_NAME",
"versionNumber": "VERSION_NUMBER",
"VERSION_NUMBER": "VERSION_NUMBER",
"zipFilePath": "ZIP_FILE_PATH",
"ZIP_FILE_PATH": "ZIP_FILE_PATH",
"actionType": "ACTION_TYPE",
"ACTION_TYPE": "ACTION_TYPE",
"timeOut": "TIMEOUT",
"TIMEOUT": "TIMEOUT",
"logName": "LOG_NAME",
"LOG_NAME": "LOG_NAME",
}
class RuleBasedLlmClient:
"""基于规则的轻量 LLM client fallback。"""
def understand_request(self, text: str) -> LlmIntentResult:
"""用关键词规则识别用户意图和执行策略偏好。"""
logger.info("规则 LLM 意图识别开始 text=%s", redact_for_log(text, max_text_len=800))
lowered = text.lower()
reasons: list[str] = []
intent = "deploy"
if any(word in lowered for word in ("用法", "怎么用", "生成脚本", "给我脚本", "usage")):
intent = "show_usage"
reasons.append("用户在询问脚本用法或脚本生成")
elif any(word in lowered for word in ("预演", "计划", "不执行", "不要动环境", "dry-run", "preview")):
intent = "preview"
reasons.append("用户要求只预演或不触碰环境")
elif any(word in lowered for word in ("在线ip", "在线 ip", "查询ip", "查询 ip", "node", "工作站")):
intent = "query_node_ips"
reasons.append("用户要求查询 Node 或在线工作站")
elif any(word in lowered for word in ("回滚", "rollback")):
intent = "rollback"
reasons.append("用户要求回滚")
else:
reasons.append("默认识别为部署请求")
mode_preference = "未指定"
strategy_preference = "未指定"
if any(word in lowered for word in ("mcp", "在线执行", "直接在线")):
mode_preference = "MCP"
strategy_preference = "hybrid_node_mcp"
reasons.append("用户倾向 MCPPAM_HOME 仍需脚本 action")
if any(word in lowered for word in ("脚本", "离线", "script", "shell", "powershell")):
mode_preference = "API脚本"
strategy_preference = "script_only"
reasons.append("用户倾向脚本或离线执行")
if intent == "preview":
strategy_preference = strategy_preference if strategy_preference != "未指定" else "hybrid_node_mcp"
result = LlmIntentResult(
intent=intent, # type: ignore[arg-type]
mode_preference=mode_preference, # type: ignore[arg-type]
strategy_preference=strategy_preference, # type: ignore[arg-type]
confidence=0.72 if intent != "deploy" else 0.6,
reasons=reasons,
)
logger.info("规则 LLM 意图识别完成 result=%s", json_for_log(asdict(result)))
return result
def extract_params(self, text: str, base_params: dict[str, Any] | None = None) -> LlmParamResult:
"""从 key=value、中文短语和 IP 地址中抽取参数。"""
logger.info("规则 LLM 参数抽取开始 text=%s base_params=%s", redact_for_log(text, max_text_len=800), json_for_log(base_params or {}))
params = dict(base_params or {})
params.update(self._extract_key_values(text))
params.update(self._extract_chinese_patterns(text))
control: dict[str, Any] = {}
ips = re.findall(r"\b(?:\d{1,3}\.){3}\d{1,3}\b", text)
if ips:
control["user_specified_ips"] = ips
missing = [key for key in REQUIRED_PARAMS if not params.get(key)]
sensitive = [key for key in ("CLIENT_SECRET", "CLIENT_ID") if params.get(key)]
result = LlmParamResult(
extracted_params=params,
extracted_control=control,
missing_required_params=missing,
sensitive_fields_present=sensitive,
)
logger.info("规则 LLM 参数抽取完成 result=%s", json_for_log(asdict(result)))
return result
def generate_plan(
self,
*,
params: dict[str, Any],
intent: str,
strategy: ExecutionStrategy,
) -> LlmDeployPlan:
"""生成确定性的部署计划和风险提示。"""
logger.info("规则 LLM 计划生成开始 intent=%s strategy=%s params=%s", intent, strategy, json_for_log(params))
if strategy == "hybrid_node_mcp":
strategy_text = "PAM_HOME 使用脚本 actionPAM_NODE 使用 MCP"
elif strategy == "script_only":
strategy_text = "全部 action 使用脚本 action"
else:
strategy_text = "全部 action 使用 fake runner"
summary = (
f"计划处理 {params.get('AIRPORT_CODE', '-')}/"
f"{params.get('APP_NAME', '-')}/"
f"{params.get('MODULE_NAME', '-')}/"
f"{params.get('VERSION_NUMBER', '-')},执行策略为 {strategy_text}"
)
risk_notes = [
"真实部署前必须确认参数。",
"发布版本、创建下载任务、升级和回滚属于高风险动作。",
"回滚只能在用户确认后执行。",
]
if strategy == "hybrid_node_mcp":
risk_notes.append("PAM_HOME 当前没有 MCP 能力HOME 阶段仍会调用脚本 action。")
result = LlmDeployPlan(
summary=summary,
risk_notes=risk_notes,
planned_actions=list(GLOBAL_ACTION_SEQUENCE),
requires_confirmation=intent in ("deploy", "query_node_ips", "rollback"),
execution_strategy=strategy,
)
logger.info("规则 LLM 计划生成完成 result=%s", json_for_log(asdict(result)))
return result
def analyze_action_result(
self,
*,
action: str,
result: ActionResult,
) -> LlmActionAnalysis:
"""用本地规则分析 action 结果,作为真实 LLM 不可用时的兜底。"""
logger.info(
"规则 LLM action 审核开始 action=%s result=%s",
action,
json_for_log(
{
"backend": result.backend,
"ok": result.ok,
"exit_code": result.exit_code,
"tool_name": result.tool_name,
"values": result.values,
"error_summary": result.error_summary,
},
max_text_len=1000,
),
)
notes: list[str] = []
has_anomaly = not result.ok
severity = "info"
possible_reason = ""
suggested_action = "继续观察。"
requires_confirmation = False
should_continue = True
progress_complete: bool | None = None
if not result.ok:
severity = "medium"
possible_reason = result.error_summary or "action 返回失败状态。"
suggested_action = "查看 action 诊断日志、参数、网络和目标服务状态。"
notes.append("硬规则检测到 action 执行失败。")
should_continue = False
if action == "verify-ip":
success = result.values.get("SUCCESS")
if success is not None and str(success).lower() not in ("true", "1", "yes"):
has_anomaly = True
severity = "high"
possible_reason = result.values.get("MESSAGE", "") or "工作站健康检查未通过。"
suggested_action = "先下载日志并人工确认是否执行回滚。"
requires_confirmation = True
notes.append("verify-ip SUCCESS 非成功值。")
should_continue = False
if action == "rollback-ip" and not result.ok:
severity = "high"
suggested_action = "保持待确认状态,人工排查回滚失败原因后重试或转人工处理。"
requires_confirmation = True
notes.append("rollback-ip 失败需要人工处理。")
should_continue = False
if action in ("poll-download-progress", "poll-upgrade-progress"):
progress_complete, progress_has_anomaly, progress_reason, progress_note = _analyze_progress_values(action, result.values)
if progress_note:
notes.append(progress_note)
if progress_has_anomaly:
has_anomaly = True
severity = "high"
possible_reason = progress_reason or possible_reason or "进度接口返回失败状态。"
suggested_action = "停止后续 action检查下载/推送任务状态、PAM_HOME/PAM_NODE 日志和接口返回。"
should_continue = False
elif progress_complete:
has_anomaly = has_anomaly or False
suggested_action = "进度已完成,可以继续下一个 action。"
should_continue = should_continue and True
elif result.ok:
severity = severity if has_anomaly else "info"
suggested_action = "进度未完成,继续查询进度。"
should_continue = should_continue and True
if result.values.get("PENDING_AGENT_CONFIRMATION"):
has_anomaly = True
severity = "high"
possible_reason = str(result.values["PENDING_AGENT_CONFIRMATION"])
suggested_action = "暂停自动流程,等待人工确认。"
requires_confirmation = True
notes.append("action 返回待人工确认标记。")
should_continue = False
analysis = LlmActionAnalysis(
action=action,
has_anomaly=has_anomaly,
severity=severity, # type: ignore[arg-type]
possible_reason=possible_reason,
suggested_action=suggested_action,
requires_confirmation=requires_confirmation,
should_continue=should_continue,
progress_complete=progress_complete,
notes=notes,
)
logger.info("规则 LLM action 审核完成 analysis=%s", json_for_log(asdict(analysis)))
return analysis
def _extract_key_values(self, text: str) -> dict[str, str]:
"""抽取 KEY=VALUE 形式的参数。"""
params: dict[str, str] = {}
for match in re.finditer(r"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*([^\s,;]+)", text):
raw_key, value = match.groups()
key = KEY_ALIASES.get(raw_key)
if key:
params[key] = value.strip()
return params
def _extract_chinese_patterns(self, text: str) -> dict[str, str]:
"""抽取常见中文描述中的部署参数。"""
patterns = {
"AIRPORT_CODE": r"(?:机场|三字码)\s*[:]?\s*([A-Z]{3})",
"APP_NAME": r"(?:应用|应用名)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"MODULE_NAME": r"(?:模块|模块名)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"VERSION_NUMBER": r"(?:版本|版本号)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"ZIP_FILE_PATH": r"(?:包|软件包|zip)\s*[:]?\s*([A-Za-z]:[\\/][^\s,;]+|/[^\s,;]+)",
}
params: dict[str, str] = {}
for key, pattern in patterns.items():
match = re.search(pattern, text)
if match:
params[key] = match.group(1)
return params
def _analyze_progress_values(action: str, values: dict[str, Any]) -> tuple[bool, bool, str, str]:
"""分析进度字段,返回完成状态、异常状态、原因和备注。"""
step = _lower_value(values.get("STEP"))
status = _lower_value(values.get("STATUS"))
msg = _lower_value(values.get("MSG"))
message = _lower_value(values.get("MESSAGE"))
success = _lower_value(values.get("SUCCESS"))
finish = _lower_value(values.get("FINISH"))
code = _lower_value(values.get("CODE"))
rate = _lower_value(values.get("RATE_OF_PROGRESS"))
complete = False
if step == "done":
complete = True
elif status in ("completed", "complete", "done", "success", "succeeded"):
complete = True
elif success in ("true", "1", "yes"):
complete = True
elif action == "poll-upgrade-progress" and finish in ("true", "1", "yes"):
complete = True
elif msg == "success" and rate == "100" and (not code or code == "0"):
complete = True
if code and code != "0":
return complete, True, f"进度接口返回非 0 CODE: {code}", _progress_note(values)
combined = " ".join(item for item in (step, status, msg, message) if item)
if re.search(r"fail|error", combined, flags=re.IGNORECASE):
return complete, True, values.get("MESSAGE") or values.get("MSG") or values.get("STEP") or "进度接口返回失败状态", _progress_note(values)
return complete, False, "", _progress_note(values)
def _progress_note(values: dict[str, Any]) -> str:
"""把进度核心字段整理成一条备注。"""
parts = []
for key in ("RATE_OF_PROGRESS", "STEP", "MSG", "STATUS", "SUCCESS", "CODE", "FINISH", "MESSAGE"):
value = values.get(key)
if value not in (None, ""):
parts.append(f"{key}={value}")
return "当前进度: " + ", ".join(parts) if parts else "进度接口未返回明确进度字段。"
def _lower_value(value: Any) -> str:
"""把字段值转成小写字符串。"""
return str(value).strip().lower() if value is not None else ""