dark 85afabcd94 增强 chat LLM 交互与单 action 执行能力
- 扩展 LLM client 协议,支持普通对话、日志分析和单 action 解析
- chat 非内置输入默认进入 LLM 普通对话,不再本地拦截问候
- 新增 ask、log analyze、action propose、action run 等交互命令
- 单 action 执行前强制人工确认,并复用现有 ActionRouter、审核、事件和 checkpoint
- 日志分析默认读取尾部内容并脱敏后再提交给 LLM
- 更新 README、发布包 README 和 run.sh help
- 补充 LLM 与 chat 交互相关测试
2026-06-05 11:49:13 +08:00

416 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""LLM 结构化输出的确定性规则 fallback。
该类不是对真实模型的替代,只用于本地开发和测试时提供稳定输出。
真实 LLM client 需要实现相同方法。
"""
from __future__ import annotations
import logging
import re
from dataclasses import asdict
from typing import Any
from pam_deploy_graph.constants import GLOBAL_ACTION_SEQUENCE, REQUIRED_PARAMS, SENSITIVE_KEYS
from pam_deploy_graph.logging_utils import json_for_log, redact_for_log
from pam_deploy_graph.models import (
ActionResult,
ExecutionStrategy,
LlmActionAnalysis,
LlmDeployPlan,
LlmIntentResult,
LlmParamResult,
LlmSingleActionProposal,
)
logger = logging.getLogger(__name__)
KEY_ALIASES = {
"home_base_url": "HOME_BASE_URL",
"HOME_BASE_URL": "HOME_BASE_URL",
"client_id": "CLIENT_ID",
"CLIENT_ID": "CLIENT_ID",
"client_secret": "CLIENT_SECRET",
"CLIENT_SECRET": "CLIENT_SECRET",
"airportCode": "AIRPORT_CODE",
"AIRPORT_CODE": "AIRPORT_CODE",
"applicationName": "APP_NAME",
"APP_NAME": "APP_NAME",
"moduleName": "MODULE_NAME",
"MODULE_NAME": "MODULE_NAME",
"versionNumber": "VERSION_NUMBER",
"VERSION_NUMBER": "VERSION_NUMBER",
"parentVersionNumber": "PARENT_VERSION_NUMBER",
"PARENT_VERSION_NUMBER": "PARENT_VERSION_NUMBER",
"parent_version_number": "PARENT_VERSION_NUMBER",
"zipFilePath": "ZIP_FILE_PATH",
"ZIP_FILE_PATH": "ZIP_FILE_PATH",
"actionType": "ACTION_TYPE",
"ACTION_TYPE": "ACTION_TYPE",
"timeOut": "TIMEOUT",
"TIMEOUT": "TIMEOUT",
"logName": "LOG_NAME",
"LOG_NAME": "LOG_NAME",
}
class RuleBasedLlmClient:
"""基于规则的轻量 LLM client fallback。"""
def chat(self, text: str, context: dict[str, Any] | None = None) -> str:
"""规则 fallback 的普通对话说明。"""
logger.info("规则 LLM 普通对话 text=%s context=%s", redact_for_log(text, max_text_len=800), json_for_log(context or {}))
lowered = text.lower()
if any(word in lowered for word in ("help", "帮助", "怎么用", "命令")):
return (
"当前是本地规则 LLM fallback。可用 `analyze <需求>` 分析部署需求,`run` 执行完整 workflow"
"`action propose <需求>` 解析单个 action`action run ...` 确认后执行单个 action"
"`log analyze <路径>` 分析日志尾部。"
)
return (
"当前未配置真实 LLM已使用本地规则 fallback。普通闲聊只能给出有限说明"
"如需自然语言问答、日志深度分析或更准确的 action 解析,请配置真实 LLM。"
)
def analyze_log(self, log_text: str, question: str | None = None, source_path: str = "") -> str:
"""用本地规则分析日志尾部。"""
logger.info("规则 LLM 日志分析 source=%s question=%s text_len=%s", source_path, redact_for_log(question or "", max_text_len=300), len(log_text))
lines = log_text.splitlines()
problem_lines = [
line
for line in lines
if re.search(r"error|exception|fail|traceback|timeout|refused|denied|失败|异常|错误|超时", line, flags=re.IGNORECASE)
]
summary = [
f"日志来源: {source_path or '-'}",
f"已分析尾部 {len(lines)} 行。",
]
if question:
summary.append(f"关注问题: {question}")
if problem_lines:
summary.append(f"发现 {len(problem_lines)} 行疑似异常,最近几条:")
summary.extend(f"- {redact_for_log(line, max_text_len=240)}" for line in problem_lines[-5:])
summary.append("建议:优先检查以上异常附近的接口返回、网络连通性、认证信息和目标服务状态。")
else:
summary.append("未在日志尾部发现明显 ERROR/Exception/fail/timeout 关键字。")
summary.append("建议:如问题仍存在,请扩大 `--tail` 或提供更具体的问题描述。")
return "\n".join(summary)
def propose_action(
self,
text: str,
allowed_actions: list[str],
params: dict[str, Any],
state_summary: dict[str, Any] | None = None,
) -> LlmSingleActionProposal:
"""只在用户明确写出 action 名时生成单 action 建议。"""
logger.info(
"规则 LLM 单 action 解析开始 text=%s allowed=%s state=%s",
redact_for_log(text, max_text_len=800),
allowed_actions,
json_for_log(state_summary or {}),
)
action = ""
lowered = text.lower()
for candidate in allowed_actions:
if candidate.lower() in lowered:
action = candidate
break
ip_match = re.search(r"\b(?:\d{1,3}\.){3}\d{1,3}\b", text)
kwargs = _safe_action_kwargs(self._extract_key_values(text))
risk = "high" if action in ("publish-version", "create-download-task", "upgrade-ip", "start-ip", "stop-ip", "rollback-ip") else "medium"
proposal = LlmSingleActionProposal(
action=action,
ip=ip_match.group(0) if ip_match else "",
kwargs=kwargs,
reason="规则 fallback 仅在输入中出现明确 action 名时生成建议。" if action else "未识别到明确 action 名。",
risk_level=risk, # type: ignore[arg-type]
requires_confirmation=True,
)
logger.info("规则 LLM 单 action 解析完成 proposal=%s", json_for_log(asdict(proposal)))
return proposal
def understand_request(self, text: str) -> LlmIntentResult:
"""用关键词规则识别用户意图和执行策略偏好。"""
logger.info("规则 LLM 意图识别开始 text=%s", redact_for_log(text, max_text_len=800))
lowered = text.lower()
reasons: list[str] = []
intent = "deploy"
if any(word in lowered for word in ("用法", "怎么用", "生成脚本", "给我脚本", "usage")):
intent = "show_usage"
reasons.append("用户在询问脚本用法或脚本生成")
elif any(word in lowered for word in ("预演", "计划", "不执行", "不要动环境", "dry-run", "preview")):
intent = "preview"
reasons.append("用户要求只预演或不触碰环境")
elif any(word in lowered for word in ("在线ip", "在线 ip", "查询ip", "查询 ip", "node", "工作站")):
intent = "query_node_ips"
reasons.append("用户要求查询 Node 或在线工作站")
elif any(word in lowered for word in ("回滚", "rollback")):
intent = "rollback"
reasons.append("用户要求回滚")
else:
reasons.append("默认识别为部署请求")
mode_preference = "未指定"
strategy_preference = "未指定"
if any(word in lowered for word in ("mcp", "在线执行", "直接在线")):
mode_preference = "MCP"
strategy_preference = "hybrid_node_mcp"
reasons.append("用户倾向 MCPPAM_HOME 仍需脚本 action")
if any(word in lowered for word in ("脚本", "离线", "script", "shell", "powershell")):
mode_preference = "API脚本"
strategy_preference = "script_only"
reasons.append("用户倾向脚本或离线执行")
if intent == "preview":
strategy_preference = strategy_preference if strategy_preference != "未指定" else "hybrid_node_mcp"
result = LlmIntentResult(
intent=intent, # type: ignore[arg-type]
mode_preference=mode_preference, # type: ignore[arg-type]
strategy_preference=strategy_preference, # type: ignore[arg-type]
confidence=0.72 if intent != "deploy" else 0.6,
reasons=reasons,
)
logger.info("规则 LLM 意图识别完成 result=%s", json_for_log(asdict(result)))
return result
def extract_params(self, text: str, base_params: dict[str, Any] | None = None) -> LlmParamResult:
"""从 key=value、中文短语和 IP 地址中抽取参数。"""
logger.info("规则 LLM 参数抽取开始 text=%s base_params=%s", redact_for_log(text, max_text_len=800), json_for_log(base_params or {}))
params = dict(base_params or {})
params.update(self._extract_key_values(text))
params.update(self._extract_chinese_patterns(text))
control: dict[str, Any] = {}
ips = re.findall(r"\b(?:\d{1,3}\.){3}\d{1,3}\b", text)
if ips:
control["user_specified_ips"] = ips
missing = [key for key in REQUIRED_PARAMS if not params.get(key)]
sensitive = [key for key in ("CLIENT_SECRET", "CLIENT_ID") if params.get(key)]
result = LlmParamResult(
extracted_params=params,
extracted_control=control,
missing_required_params=missing,
sensitive_fields_present=sensitive,
)
logger.info("规则 LLM 参数抽取完成 result=%s", json_for_log(asdict(result)))
return result
def generate_plan(
self,
*,
params: dict[str, Any],
intent: str,
strategy: ExecutionStrategy,
) -> LlmDeployPlan:
"""生成确定性的部署计划和风险提示。"""
logger.info("规则 LLM 计划生成开始 intent=%s strategy=%s params=%s", intent, strategy, json_for_log(params))
if strategy == "hybrid_node_mcp":
strategy_text = "PAM_HOME 使用脚本 actionPAM_NODE 使用 MCP"
elif strategy == "script_only":
strategy_text = "全部 action 使用脚本 action"
else:
strategy_text = "全部 action 使用 fake runner"
summary = (
f"计划处理 {params.get('AIRPORT_CODE', '-')}/"
f"{params.get('APP_NAME', '-')}/"
f"{params.get('MODULE_NAME', '-')}/"
f"{params.get('VERSION_NUMBER', '-')},执行策略为 {strategy_text}"
)
risk_notes = [
"真实部署前必须确认参数。",
"发布版本、创建下载任务、升级和回滚属于高风险动作。",
"回滚只能在用户确认后执行。",
]
if strategy == "hybrid_node_mcp":
risk_notes.append("PAM_HOME 当前没有 MCP 能力HOME 阶段仍会调用脚本 action。")
result = LlmDeployPlan(
summary=summary,
risk_notes=risk_notes,
planned_actions=list(GLOBAL_ACTION_SEQUENCE),
requires_confirmation=intent in ("deploy", "query_node_ips", "rollback"),
execution_strategy=strategy,
)
logger.info("规则 LLM 计划生成完成 result=%s", json_for_log(asdict(result)))
return result
def analyze_action_result(
self,
*,
action: str,
result: ActionResult,
) -> LlmActionAnalysis:
"""用本地规则分析 action 结果,作为真实 LLM 不可用时的兜底。"""
logger.info(
"规则 LLM action 审核开始 action=%s result=%s",
action,
json_for_log(
{
"backend": result.backend,
"ok": result.ok,
"exit_code": result.exit_code,
"tool_name": result.tool_name,
"values": result.values,
"error_summary": result.error_summary,
},
max_text_len=1000,
),
)
notes: list[str] = []
has_anomaly = not result.ok
severity = "info"
possible_reason = ""
suggested_action = "继续观察。"
requires_confirmation = False
should_continue = True
progress_complete: bool | None = None
if not result.ok:
severity = "medium"
possible_reason = result.error_summary or "action 返回失败状态。"
suggested_action = "查看 action 诊断日志、参数、网络和目标服务状态。"
notes.append("硬规则检测到 action 执行失败。")
should_continue = False
if action == "verify-ip":
success = result.values.get("SUCCESS")
if success is not None and str(success).lower() not in ("true", "1", "yes"):
has_anomaly = True
severity = "high"
possible_reason = result.values.get("MESSAGE", "") or "工作站健康检查未通过。"
suggested_action = "先下载日志并人工确认是否执行回滚。"
requires_confirmation = True
notes.append("verify-ip SUCCESS 非成功值。")
should_continue = False
if action == "rollback-ip" and not result.ok:
severity = "high"
suggested_action = "保持待确认状态,人工排查回滚失败原因后重试或转人工处理。"
requires_confirmation = True
notes.append("rollback-ip 失败需要人工处理。")
should_continue = False
if action in ("poll-download-progress", "poll-upgrade-progress"):
progress_complete, progress_has_anomaly, progress_reason, progress_note = _analyze_progress_values(action, result.values)
if progress_note:
notes.append(progress_note)
if progress_has_anomaly:
has_anomaly = True
severity = "high"
possible_reason = progress_reason or possible_reason or "进度接口返回失败状态。"
suggested_action = "停止后续 action检查下载/推送任务状态、PAM_HOME/PAM_NODE 日志和接口返回。"
should_continue = False
elif progress_complete:
has_anomaly = has_anomaly or False
suggested_action = "进度已完成,可以继续下一个 action。"
should_continue = should_continue and True
elif result.ok:
severity = severity if has_anomaly else "info"
suggested_action = "进度未完成,继续查询进度。"
should_continue = should_continue and True
if result.values.get("PENDING_AGENT_CONFIRMATION"):
has_anomaly = True
severity = "high"
possible_reason = str(result.values["PENDING_AGENT_CONFIRMATION"])
suggested_action = "暂停自动流程,等待人工确认。"
requires_confirmation = True
notes.append("action 返回待人工确认标记。")
should_continue = False
analysis = LlmActionAnalysis(
action=action,
has_anomaly=has_anomaly,
severity=severity, # type: ignore[arg-type]
possible_reason=possible_reason,
suggested_action=suggested_action,
requires_confirmation=requires_confirmation,
should_continue=should_continue,
progress_complete=progress_complete,
notes=notes,
)
logger.info("规则 LLM action 审核完成 analysis=%s", json_for_log(asdict(analysis)))
return analysis
def _extract_key_values(self, text: str) -> dict[str, str]:
"""抽取 KEY=VALUE 形式的参数。"""
params: dict[str, str] = {}
for match in re.finditer(r"([A-Za-z_][A-Za-z0-9_]*)\s*=\s*([^\s,;]+)", text):
raw_key, value = match.groups()
key = KEY_ALIASES.get(raw_key)
if key:
params[key] = value.strip()
return params
def _extract_chinese_patterns(self, text: str) -> dict[str, str]:
"""抽取常见中文描述中的部署参数。"""
patterns = {
"AIRPORT_CODE": r"(?:机场|三字码)\s*[:]?\s*([A-Z]{3})",
"APP_NAME": r"(?:应用|应用名)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"MODULE_NAME": r"(?:模块|模块名)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"VERSION_NUMBER": r"(?:版本|版本号)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"PARENT_VERSION_NUMBER": r"(?:继承版本|父版本|规则版本|继承哪个版本的规则)\s*[:]?\s*([A-Za-z0-9_.-]+)",
"ZIP_FILE_PATH": r"(?:包|软件包|zip)\s*[:]?\s*([A-Za-z]:[\\/][^\s,;]+|/[^\s,;]+)",
}
params: dict[str, str] = {}
for key, pattern in patterns.items():
match = re.search(pattern, text)
if match:
params[key] = match.group(1)
return params
def _analyze_progress_values(action: str, values: dict[str, Any]) -> tuple[bool, bool, str, str]:
"""分析进度字段,返回完成状态、异常状态、原因和备注。"""
step = _lower_value(values.get("STEP"))
status = _lower_value(values.get("STATUS"))
msg = _lower_value(values.get("MSG"))
message = _lower_value(values.get("MESSAGE"))
success = _lower_value(values.get("SUCCESS"))
finish = _lower_value(values.get("FINISH"))
code = _lower_value(values.get("CODE"))
rate = _lower_value(values.get("RATE_OF_PROGRESS"))
complete = False
if step == "done":
complete = True
elif status in ("completed", "complete", "done", "success", "succeeded"):
complete = True
elif success in ("true", "1", "yes"):
complete = True
elif action == "poll-upgrade-progress" and finish in ("true", "1", "yes"):
complete = True
elif msg == "success" and rate == "100" and (not code or code == "0"):
complete = True
if code and code != "0":
return complete, True, f"进度接口返回非 0 CODE: {code}", _progress_note(values)
combined = " ".join(item for item in (step, status, msg, message) if item)
if re.search(r"fail|error", combined, flags=re.IGNORECASE):
return complete, True, values.get("MESSAGE") or values.get("MSG") or values.get("STEP") or "进度接口返回失败状态", _progress_note(values)
return complete, False, "", _progress_note(values)
def _progress_note(values: dict[str, Any]) -> str:
"""把进度核心字段整理成一条备注。"""
parts = []
for key in ("RATE_OF_PROGRESS", "STEP", "MSG", "STATUS", "SUCCESS", "CODE", "FINISH", "MESSAGE"):
value = values.get(key)
if value not in (None, ""):
parts.append(f"{key}={value}")
return "当前进度: " + ", ".join(parts) if parts else "进度接口未返回明确进度字段。"
def _lower_value(value: Any) -> str:
"""把字段值转成小写字符串。"""
return str(value).strip().lower() if value is not None else ""
def _safe_action_kwargs(values: dict[str, str]) -> dict[str, str]:
"""过滤单 action 额外参数,避免把敏感字段放入执行建议。"""
return {key: value for key, value in values.items() if key not in SENSITIVE_KEYS}