调整了 agent.py 和 LLM client 协议/实现。 现在只传当前 action 的结构化结果和必要诊断日志,避免历史运行态影响判断。 提示词和文档也已同步说明。 verify-ip 增加健康检查重试 默认 VERIFY_INTERVAL_SEC=10、VERIFY_MAX_ATTEMPTS=12,约 2 分钟。 verify-ip 未通过但未达到最大次数时,会播报进度、保存 checkpoint,并继续从当前 verify-ip 重试,不会进入 download-log。 参数已加入 config.txt.example、脚本配置读取、README、打包 README、Skill 文档和流程图。
547 lines
19 KiB
Python
547 lines
19 KiB
Python
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from pam_deploy_graph.agent import PamDeployAgent
|
|
from pam_deploy_graph.checkpoint_store import load_agent_state
|
|
from pam_deploy_graph.constants import GLOBAL_ACTION_SEQUENCE
|
|
from pam_deploy_graph.fake_runner import FakeActionRunner
|
|
from pam_deploy_graph.models import LlmActionAnalysis
|
|
|
|
|
|
PARAMS = {
|
|
"HOME_BASE_URL": "https://pam.home.example.com",
|
|
"CLIENT_ID": "client",
|
|
"CLIENT_SECRET": "secret",
|
|
"AIRPORT_CODE": "HET",
|
|
"APP_NAME": "PAM",
|
|
"MODULE_NAME": "Node",
|
|
"VERSION_NUMBER": "2.0.5",
|
|
"ZIP_FILE_PATH": "C:/pkg.zip",
|
|
"VERIFY_INTERVAL_SEC": 0,
|
|
"VERIFY_MAX_ATTEMPTS": 2,
|
|
}
|
|
|
|
|
|
class BlockingReviewLlmClient:
|
|
def analyze_action_result(self, *, action, result):
|
|
return LlmActionAnalysis(
|
|
action=action,
|
|
has_anomaly=True,
|
|
severity="high",
|
|
possible_reason="review blocked",
|
|
suggested_action="stop and inspect",
|
|
requires_confirmation=True,
|
|
should_continue=False,
|
|
notes=["blocked by test llm"],
|
|
)
|
|
|
|
|
|
class BlockingOnceReviewLlmClient:
|
|
def __init__(self, blocked_action: str = "get-token") -> None:
|
|
self.blocked_action = blocked_action
|
|
self.blocked = False
|
|
|
|
def analyze_action_result(self, *, action, result):
|
|
if action == self.blocked_action and not self.blocked:
|
|
self.blocked = True
|
|
return LlmActionAnalysis(
|
|
action=action,
|
|
has_anomaly=True,
|
|
severity="high",
|
|
possible_reason="review blocked once",
|
|
suggested_action="fix then retry current action",
|
|
requires_confirmation=True,
|
|
should_continue=False,
|
|
)
|
|
return LlmActionAnalysis(action=action)
|
|
|
|
|
|
class BrokenReviewLlmClient:
|
|
def analyze_action_result(self, *, action, result):
|
|
raise RuntimeError("review transport failed")
|
|
|
|
|
|
class ProgressivePollRunner(FakeActionRunner):
|
|
"""模拟下载和推送进度多次查询后才完成。"""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.download_progress = ["10", "55", "100"]
|
|
self.upgrade_progress: dict[str, list[str]] = {}
|
|
|
|
def _fixture_for(self, action, kwargs):
|
|
if action == "poll-download-progress":
|
|
rate = self.download_progress.pop(0) if self.download_progress else "100"
|
|
return {
|
|
"ACTION": action,
|
|
"STEP": "DONE" if rate == "100" else "RUNNING",
|
|
"RATE_OF_PROGRESS": rate,
|
|
"MSG": "success" if rate == "100" else "running",
|
|
"MESSAGE": f"download {rate}%",
|
|
}
|
|
if action == "poll-upgrade-progress":
|
|
ip = kwargs.get("ip", "")
|
|
values = self.upgrade_progress.setdefault(str(ip), ["30", "100"])
|
|
rate = values.pop(0) if values else "100"
|
|
return {
|
|
"ACTION": action,
|
|
"IP": ip,
|
|
"STEP": "DONE" if rate == "100" else "RUNNING",
|
|
"RATE_OF_PROGRESS": rate,
|
|
"MSG": "success" if rate == "100" else "running",
|
|
"MESSAGE": f"upgrade {rate}%",
|
|
}
|
|
return super()._fixture_for(action, kwargs)
|
|
|
|
|
|
class FlakyVerifyRunner(FakeActionRunner):
|
|
"""模拟应用启动后第二次健康检查通过。"""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.verify_calls = 0
|
|
|
|
def _fixture_for(self, action, kwargs):
|
|
if action == "verify-ip" and kwargs.get("ip") == "192.168.1.10":
|
|
self.verify_calls += 1
|
|
if self.verify_calls == 1:
|
|
return {
|
|
"ACTION": action,
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "application is starting",
|
|
}
|
|
return super()._fixture_for(action, kwargs)
|
|
|
|
|
|
def test_run_deploy_flow_success(tmp_path: Path):
|
|
agent = PamDeployAgent(fake_runner=FakeActionRunner())
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.pending_confirmation == ""
|
|
assert set(state.ip_states) == {"192.168.1.10", "192.168.1.11"}
|
|
assert all(item["status"] == "SUCCESS" for item in state.ip_states.values())
|
|
|
|
|
|
def test_progress_actions_repeat_until_llm_marks_complete(tmp_path: Path):
|
|
fake = ProgressivePollRunner()
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params={**PARAMS, "POLL_INTERVAL_SEC": 0},
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
calls = [call[0] for call in fake.calls]
|
|
assert calls.count("poll-download-progress") == 3
|
|
assert calls.count("poll-upgrade-progress") == 4
|
|
assert "poll-download-progress" in state.completed_global_steps
|
|
assert state.poll_attempts == {}
|
|
assert all(item["status"] == "SUCCESS" for item in state.ip_states.values())
|
|
progress_events = [event for event in state.events if event["type"] == "ACTION_PROGRESS"]
|
|
assert any(event["stage"] == "poll-download-progress" and "RATE_OF_PROGRESS=10" in event["message"] for event in progress_events)
|
|
assert any(event["stage"] == "poll-upgrade-progress" and event["ip"] == "192.168.1.10" for event in progress_events)
|
|
|
|
|
|
def test_progress_timeout_pauses_on_current_action(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"poll-download-progress": {
|
|
"ACTION": "poll-download-progress",
|
|
"STEP": "RUNNING",
|
|
"RATE_OF_PROGRESS": "20",
|
|
"MSG": "running",
|
|
"MESSAGE": "download 20%",
|
|
}
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params={**PARAMS, "POLL_INTERVAL_SEC": 0, "DOWNLOAD_POLL_MAX_ATTEMPTS": 2},
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.paused is True
|
|
assert state.pause_reason == "progress_timeout"
|
|
assert state.last_failed_step == "poll-download-progress"
|
|
assert "poll-download-progress" not in state.completed_global_steps
|
|
assert state.review_context["stage"] == "poll-download-progress"
|
|
assert state.poll_attempts["global:poll-download-progress"] == 2
|
|
|
|
|
|
def test_verify_ip_retries_until_success_before_marking_failed(tmp_path: Path):
|
|
fake = FlakyVerifyRunner()
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert fake.verify_calls == 2
|
|
assert state.paused is False
|
|
assert state.poll_attempts == {}
|
|
assert state.ip_states["192.168.1.10"]["status"] == "SUCCESS"
|
|
assert any(
|
|
event["type"] == "ACTION_PROGRESS"
|
|
and event["stage"] == "verify-ip"
|
|
and event["ip"] == "192.168.1.10"
|
|
for event in state.events
|
|
)
|
|
|
|
|
|
def test_create_state_writes_absolute_script_config_path_and_normalized_zip(tmp_path: Path):
|
|
package_path = tmp_path / "pkg.zip"
|
|
params = {**PARAMS, "ZIP_FILE_PATH": str(package_path)}
|
|
agent = PamDeployAgent(fake_runner=FakeActionRunner())
|
|
|
|
state = agent.create_state(
|
|
params=params,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "runtime" / "config.txt"),
|
|
trace_file_path=str(tmp_path / "logs" / "trace.log"),
|
|
)
|
|
|
|
assert Path(state.config_path).is_absolute()
|
|
assert Path(state.trace_file_path).is_absolute()
|
|
config_text = Path(state.config_path).read_text(encoding="utf-8")
|
|
assert f"ZIP_FILE_PATH={package_path.resolve()}" in config_text
|
|
|
|
|
|
def test_global_action_requires_hash_code_from_upload_package(tmp_path: Path):
|
|
fake = FakeActionRunner({"upload-package": {"ACTION": "upload-package"}})
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
with pytest.raises(RuntimeError, match="缺少必要字段: HASH_CODE"):
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.last_failed_step == "upload-package"
|
|
assert "upload-package" not in state.completed_global_steps
|
|
|
|
|
|
def test_run_deploy_flow_stops_on_verify_failure(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"verify-ip:192.168.1.10": {
|
|
"ACTION": "verify-ip",
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "health check failed",
|
|
}
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
verify_calls = [call for call in fake.calls if call[0] == "verify-ip" and call[1].get("ip") == "192.168.1.10"]
|
|
assert len(verify_calls) == 2
|
|
assert state.pending_confirmation == ""
|
|
assert state.paused is True
|
|
assert state.pause_reason == "action_failed"
|
|
assert state.ip_states["192.168.1.10"]["status"] == "FAILED"
|
|
assert state.ip_states["192.168.1.10"]["failed_stage"] == "verify-ip"
|
|
assert state.ip_states["192.168.1.10"]["rollback_status"] == "ROLLBACK_NOT_RUN"
|
|
assert "192.168.1.11" not in state.ip_states
|
|
assert any(event["type"] == "ACTION_RETRY_REQUIRED" for event in state.events)
|
|
assert not any(call[0] == "download-log" for call in fake.calls)
|
|
|
|
|
|
def test_resume_retries_failed_ip_action_without_rollback(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"verify-ip:192.168.1.10": {
|
|
"ACTION": "verify-ip",
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "health check failed",
|
|
}
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
fake.fixtures = {}
|
|
agent.resume_state(state)
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.pending_confirmation == ""
|
|
assert state.paused is False
|
|
assert state.last_failed_step == ""
|
|
assert state.ip_states["192.168.1.10"]["status"] == "SUCCESS"
|
|
assert state.ip_states["192.168.1.10"]["rollback_status"] == "ROLLBACK_NOT_RUN"
|
|
assert state.ip_states["192.168.1.11"]["status"] == "SUCCESS"
|
|
assert not any(call[0] == "rollback-ip" for call in fake.calls)
|
|
|
|
|
|
def test_action_analysis_event_is_recorded_when_enabled(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"verify-ip:192.168.1.10": {
|
|
"ACTION": "verify-ip",
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "health check failed",
|
|
}
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake, action_analysis_enabled=True)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
analyses = [event for event in state.events if event["type"] == "ACTION_ANALYSIS"]
|
|
verify_analysis = [event for event in analyses if event["stage"] == "verify-ip"][0]
|
|
assert verify_analysis["has_anomaly"] is True
|
|
assert verify_analysis["severity"] == "high"
|
|
assert verify_analysis["requires_confirmation"] is True
|
|
|
|
|
|
def test_successful_action_can_be_blocked_by_llm_review(tmp_path: Path):
|
|
agent = PamDeployAgent(
|
|
fake_runner=FakeActionRunner(),
|
|
llm_client=BlockingReviewLlmClient(),
|
|
)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.paused is True
|
|
assert state.pause_reason == "llm_review_blocked"
|
|
assert state.last_failed_step == "get-token"
|
|
assert state.completed_global_steps == []
|
|
assert state.review_context["stage"] == "get-token"
|
|
assert state.review_context["suggested_action"] == "stop and inspect"
|
|
|
|
|
|
def test_resume_retries_llm_blocked_global_action(tmp_path: Path):
|
|
fake = FakeActionRunner()
|
|
agent = PamDeployAgent(
|
|
fake_runner=fake,
|
|
llm_client=BlockingOnceReviewLlmClient(),
|
|
)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
agent.resume_state(state)
|
|
agent.run_deploy_flow(state)
|
|
|
|
called_actions = [call[0] for call in fake.calls]
|
|
assert called_actions[:2] == ["get-token", "get-token"]
|
|
assert called_actions.count("get-token") == 2
|
|
assert state.paused is False
|
|
assert state.completed_global_steps[0] == "get-token"
|
|
|
|
|
|
def test_action_review_failure_pauses_flow(tmp_path: Path):
|
|
agent = PamDeployAgent(
|
|
fake_runner=FakeActionRunner(),
|
|
llm_client=BrokenReviewLlmClient(),
|
|
)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(tmp_path / "checkpoint.json"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.paused is True
|
|
assert state.pause_reason == "llm_review_blocked"
|
|
assert state.review_context["stage"] == "get-token"
|
|
assert "LLM 审核失败" in state.review_context["possible_reason"]
|
|
assert state.completed_global_steps == []
|
|
assert any(event["type"] == "ACTION_ANALYSIS_FAIL" for event in state.events)
|
|
|
|
|
|
def test_explicit_rollback_runs_rollback_and_resume_continues(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"verify-ip:192.168.1.10": {
|
|
"ACTION": "verify-ip",
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "health check failed",
|
|
}
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
agent.rollback_ip(state, "192.168.1.10")
|
|
agent.run_deploy_flow(state)
|
|
|
|
assert state.pending_confirmation == ""
|
|
assert state.ip_states["192.168.1.10"]["rollback_status"] == "ROLLBACK_DONE"
|
|
assert state.ip_states["192.168.1.11"]["status"] == "SUCCESS"
|
|
assert any(call[0] == "rollback-ip" for call in fake.calls)
|
|
|
|
|
|
def test_failed_explicit_rollback_pauses_without_confirmation(tmp_path: Path):
|
|
fake = FakeActionRunner(
|
|
{
|
|
"verify-ip:192.168.1.10": {
|
|
"ACTION": "verify-ip",
|
|
"IP": "192.168.1.10",
|
|
"SUCCESS": "false",
|
|
"MESSAGE": "health check failed",
|
|
},
|
|
"rollback-ip:192.168.1.10": {
|
|
"_fail": True,
|
|
"ACTION": "rollback-ip",
|
|
"IP": "192.168.1.10",
|
|
"MESSAGE": "rollback failed",
|
|
},
|
|
}
|
|
)
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
)
|
|
|
|
agent.run_deploy_flow(state)
|
|
agent.rollback_ip(state, "192.168.1.10")
|
|
|
|
assert state.pending_confirmation == ""
|
|
assert state.paused is True
|
|
assert state.pause_reason == "rollback_failed"
|
|
assert state.ip_states["192.168.1.10"]["rollback_status"] == "ROLLBACK_FAILED"
|
|
|
|
|
|
def test_checkpoint_resume_skips_completed_global_and_success_ip(tmp_path: Path):
|
|
checkpoint = tmp_path / "checkpoint.json"
|
|
fake = FakeActionRunner()
|
|
agent = PamDeployAgent(fake_runner=fake)
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
config_path=str(tmp_path / "config.txt"),
|
|
checkpoint_path=str(checkpoint),
|
|
)
|
|
state.completed_global_steps = list(GLOBAL_ACTION_SEQUENCE)
|
|
state.online_ips = ["192.168.1.10", "192.168.1.11"]
|
|
state.target_ips = ["192.168.1.10", "192.168.1.11"]
|
|
state.ip_states["192.168.1.10"] = {
|
|
"status": "SUCCESS",
|
|
"completed_steps": ["upgrade-ip", "poll-upgrade-progress", "start-ip", "verify-ip", "download-log"],
|
|
"failed_stage": "",
|
|
"failure_reason": "",
|
|
"rollback_status": "ROLLBACK_NOT_RUN",
|
|
"rollback_stop_first": False,
|
|
"log_file": "logs/fake.zip",
|
|
}
|
|
|
|
agent.run_deploy_flow(state)
|
|
loaded = load_agent_state(checkpoint)
|
|
|
|
called_actions = [call[0] for call in fake.calls]
|
|
assert "get-token" not in called_actions
|
|
assert all(call[1].get("ip") != "192.168.1.10" for call in fake.calls)
|
|
assert loaded.ip_states["192.168.1.11"]["status"] == "SUCCESS"
|
|
|
|
|
|
def test_update_state_params_rewrites_config_and_checkpoint(tmp_path: Path):
|
|
initial_package = tmp_path / "pkg-a.zip"
|
|
updated_package = tmp_path / "pkg-b.zip"
|
|
checkpoint = tmp_path / "checkpoint.json"
|
|
config_path = tmp_path / "config.txt"
|
|
agent = PamDeployAgent(fake_runner=FakeActionRunner())
|
|
state = agent.create_state(
|
|
params={**PARAMS, "ZIP_FILE_PATH": str(initial_package)},
|
|
execution_strategy="fake",
|
|
config_path=str(config_path),
|
|
checkpoint_path=str(checkpoint),
|
|
)
|
|
|
|
agent.update_state_params(
|
|
state,
|
|
{
|
|
"APP_NAME": "PAM-NEW",
|
|
"ZIP_FILE_PATH": str(updated_package),
|
|
},
|
|
)
|
|
loaded = load_agent_state(checkpoint)
|
|
config_text = config_path.read_text(encoding="utf-8")
|
|
|
|
assert state.params["APP_NAME"] == "PAM-NEW"
|
|
assert state.params["ZIP_FILE_PATH"] == str(updated_package.resolve())
|
|
assert loaded.params["APP_NAME"] == "PAM-NEW"
|
|
assert loaded.params["ZIP_FILE_PATH"] == str(updated_package.resolve())
|
|
assert "APP_NAME=PAM-NEW" in config_text
|
|
assert f"ZIP_FILE_PATH={updated_package.resolve()}" in config_text
|
|
|
|
|
|
def test_resume_state_clears_pause_fields(tmp_path: Path):
|
|
checkpoint = tmp_path / "checkpoint.json"
|
|
agent = PamDeployAgent(fake_runner=FakeActionRunner())
|
|
state = agent.create_state(
|
|
params=PARAMS,
|
|
execution_strategy="fake",
|
|
checkpoint_path=str(checkpoint),
|
|
)
|
|
|
|
agent.pause_state(state, reason="manual_test", review_context={"stage": "get-token"})
|
|
resumed = agent.resume_state(state)
|
|
loaded = load_agent_state(checkpoint)
|
|
|
|
assert resumed.paused is False
|
|
assert resumed.pause_reason == ""
|
|
assert resumed.review_context == {}
|
|
assert loaded.paused is False
|
|
assert loaded.pause_reason == ""
|