增强过滤
This commit is contained in:
parent
87c48a74a5
commit
1b4c07fec6
@ -109,7 +109,8 @@ CHAT_PROMPT = """你是 PAM 部署 Agent 的交互助手。
|
|||||||
- 如果用户想执行完整部署,提示使用 `analyze <需求>` 先分析,确认后再输入 `run`。
|
- 如果用户想执行完整部署,提示使用 `analyze <需求>` 先分析,确认后再输入 `run`。
|
||||||
- 如果用户想单独执行 action,提示使用 `action propose <需求>` 或 `action run ...`,执行前仍需要人工确认。
|
- 如果用户想单独执行 action,提示使用 `action propose <需求>` 或 `action run ...`,执行前仍需要人工确认。
|
||||||
- 不要输出密钥、token、Authorization、CLIENT_SECRET 或 api_key。
|
- 不要输出密钥、token、Authorization、CLIENT_SECRET 或 api_key。
|
||||||
- 不要输出 `<think>`、`</think>`、推理过程、内部思考或隐藏分析内容。
|
- 不要输出 `<think>`、`</think>`、`Thinking Process`、`Reasoning Process`、`Chain of Thought`、推理过程、内部思考或隐藏分析内容。
|
||||||
|
- 只输出可以直接展示给用户的最终回答。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
LOG_ANALYSIS_PROMPT = """分析 PAM Agent 或部署脚本日志。
|
LOG_ANALYSIS_PROMPT = """分析 PAM Agent 或部署脚本日志。
|
||||||
@ -119,7 +120,8 @@ LOG_ANALYSIS_PROMPT = """分析 PAM Agent 或部署脚本日志。
|
|||||||
- 不要输出密钥、token、Authorization、CLIENT_SECRET 或 api_key。
|
- 不要输出密钥、token、Authorization、CLIENT_SECRET 或 api_key。
|
||||||
- 输入通常是日志尾部摘要,不代表完整文件。
|
- 输入通常是日志尾部摘要,不代表完整文件。
|
||||||
- 不要因为日志来自 stderr 就直接判定失败,要结合 ERROR、Exception、fail、状态码和上下文判断。
|
- 不要因为日志来自 stderr 就直接判定失败,要结合 ERROR、Exception、fail、状态码和上下文判断。
|
||||||
- 不要输出 `<think>`、`</think>`、推理过程、内部思考或隐藏分析内容。
|
- 不要输出 `<think>`、`</think>`、`Thinking Process`、`Reasoning Process`、`Chain of Thought`、推理过程、内部思考或隐藏分析内容。
|
||||||
|
- 只输出可以直接展示给用户的最终分析结果。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SINGLE_ACTION_PROMPT = """把用户自然语言解析成一次 PAM action 调用建议。
|
SINGLE_ACTION_PROMPT = """把用户自然语言解析成一次 PAM action 调用建议。
|
||||||
|
|||||||
@ -3,20 +3,44 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Iterable, Iterator
|
from collections.abc import Iterable, Iterator
|
||||||
|
import re
|
||||||
|
|
||||||
OPEN_THINK_TAG = "<think>"
|
OPEN_THINK_TAG = "<think>"
|
||||||
CLOSE_THINK_TAG = "</think>"
|
CLOSE_THINK_TAG = "</think>"
|
||||||
|
REASONING_START_RE = re.compile(
|
||||||
|
r"^\s*(?:[#>\-*]+\s*)*(?:\*\*)?"
|
||||||
|
r"(?:thinking process|thought process|reasoning process|chain of thought|internal reasoning|inner monologue|"
|
||||||
|
r"思考过程|推理过程|内部思考)"
|
||||||
|
r"(?:\*\*)?\s*(?:[::]|\s*$)",
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
FINAL_ANSWER_RE = re.compile(
|
||||||
|
r"^\s*(?:[#>\-*]+\s*)*(?:\*\*)?"
|
||||||
|
r"(?:final answer|final response|answer|response|最终答案|最终回答|正式回答|回答|回复|结论)"
|
||||||
|
r"(?:\*\*)?\s*[::]\s*",
|
||||||
|
flags=re.IGNORECASE | re.MULTILINE,
|
||||||
|
)
|
||||||
|
REASONING_LINE_RE = re.compile(
|
||||||
|
r"(thinking process|thought process|reasoning process|chain of thought|internal reasoning|inner monologue|"
|
||||||
|
r"analyze the request|determine the response|drafting the response|refining the response|"
|
||||||
|
r"user question|input json|role:|constraints:|requirements:|"
|
||||||
|
r"do not output|do not automatically|hidden analysis|forbidden tags|"
|
||||||
|
r"i need to|i should|i must|i will|i can|must ensure|should briefly|ensure no|keep it concise|"
|
||||||
|
r"思考过程|推理过程|内部思考|分析请求|确定回答|起草回答|优化回答|隐藏分析)",
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
MAX_REASONING_PREFIX_HOLD = 80
|
||||||
|
|
||||||
|
|
||||||
def strip_thinking_text(text: str) -> str:
|
def strip_thinking_text(text: str) -> str:
|
||||||
"""移除 LLM 普通文本输出里的思考标签和内容。"""
|
"""移除 LLM 普通文本输出里的思考标签、显式思考段和内容。"""
|
||||||
filter_ = ThinkingTextStreamFilter()
|
filter_ = ThinkingTextStreamFilter()
|
||||||
visible = filter_.feed(text) + filter_.finish()
|
visible = filter_.feed(text) + filter_.finish()
|
||||||
return visible.strip()
|
return visible.strip()
|
||||||
|
|
||||||
|
|
||||||
def filter_thinking_chunks(chunks: Iterable[str]) -> Iterator[str]:
|
def filter_thinking_chunks(chunks: Iterable[str]) -> Iterator[str]:
|
||||||
"""按流式分片移除 `<think>...</think>`,避免跨分片泄露思考内容。"""
|
"""按流式分片移除思考内容,避免跨分片泄露。"""
|
||||||
filter_ = ThinkingTextStreamFilter()
|
filter_ = ThinkingTextStreamFilter()
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
visible = filter_.feed(str(chunk))
|
visible = filter_.feed(str(chunk))
|
||||||
@ -28,12 +52,13 @@ def filter_thinking_chunks(chunks: Iterable[str]) -> Iterator[str]:
|
|||||||
|
|
||||||
|
|
||||||
class ThinkingTextStreamFilter:
|
class ThinkingTextStreamFilter:
|
||||||
"""支持跨 chunk 识别 think 标签的流式过滤器。"""
|
"""支持跨 chunk 识别 think 标签和显式思考段的流式过滤器。"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
"""初始化可见/隐藏状态和待判定缓冲区。"""
|
"""初始化可见/隐藏状态和待判定缓冲区。"""
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._inside_think = False
|
self._inside_think = False
|
||||||
|
self._reasoning_filter = ExplicitReasoningStreamFilter()
|
||||||
|
|
||||||
def feed(self, chunk: str) -> str:
|
def feed(self, chunk: str) -> str:
|
||||||
"""输入一个文本分片,返回当前可安全展示的可见文本。"""
|
"""输入一个文本分片,返回当前可安全展示的可见文本。"""
|
||||||
@ -73,21 +98,147 @@ class ThinkingTextStreamFilter:
|
|||||||
output.append(self._pending)
|
output.append(self._pending)
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
break
|
break
|
||||||
return "".join(output)
|
return self._reasoning_filter.feed("".join(output))
|
||||||
|
|
||||||
def finish(self) -> str:
|
def finish(self) -> str:
|
||||||
"""结束流式过滤,丢弃未闭合 think 内容和未完成标签。"""
|
"""结束流式过滤,丢弃未闭合 think 内容和未完成标签。"""
|
||||||
if self._inside_think:
|
if self._inside_think:
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
self._inside_think = False
|
self._inside_think = False
|
||||||
return ""
|
return self._reasoning_filter.finish()
|
||||||
lowered = self._pending.lower()
|
lowered = self._pending.lower()
|
||||||
if lowered in _tag_prefixes():
|
if lowered in _tag_prefixes():
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
return ""
|
return self._reasoning_filter.finish()
|
||||||
tail = self._pending
|
tail = self._pending
|
||||||
self._pending = ""
|
self._pending = ""
|
||||||
return tail
|
return self._reasoning_filter.feed(tail) + self._reasoning_filter.finish()
|
||||||
|
|
||||||
|
|
||||||
|
class ExplicitReasoningStreamFilter:
|
||||||
|
"""过滤以 `Thinking Process:` 等形式输出的显式思考段。"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"""初始化思考段识别状态。"""
|
||||||
|
self._buffer = ""
|
||||||
|
self._mode = "undecided"
|
||||||
|
|
||||||
|
def feed(self, chunk: str) -> str:
|
||||||
|
"""输入已去掉 think 标签的文本,返回可展示内容。"""
|
||||||
|
if not chunk:
|
||||||
|
return ""
|
||||||
|
if self._mode == "pass":
|
||||||
|
return chunk
|
||||||
|
self._buffer += chunk
|
||||||
|
if self._mode == "suppress":
|
||||||
|
final_text = _extract_after_final_answer_marker(self._buffer)
|
||||||
|
if final_text is not None:
|
||||||
|
self._buffer = ""
|
||||||
|
self._mode = "pass"
|
||||||
|
return final_text
|
||||||
|
return ""
|
||||||
|
if _starts_with_reasoning_marker(self._buffer):
|
||||||
|
self._mode = "suppress"
|
||||||
|
final_text = _extract_after_final_answer_marker(self._buffer)
|
||||||
|
if final_text is not None:
|
||||||
|
self._buffer = ""
|
||||||
|
self._mode = "pass"
|
||||||
|
return final_text
|
||||||
|
return ""
|
||||||
|
if _could_be_reasoning_marker_prefix(self._buffer):
|
||||||
|
return ""
|
||||||
|
self._mode = "pass"
|
||||||
|
visible = self._buffer
|
||||||
|
self._buffer = ""
|
||||||
|
return visible
|
||||||
|
|
||||||
|
def finish(self) -> str:
|
||||||
|
"""结束过滤,输出普通缓冲或清理被压住的显式思考段。"""
|
||||||
|
if not self._buffer:
|
||||||
|
return ""
|
||||||
|
if self._mode == "suppress" or _starts_with_reasoning_marker(self._buffer):
|
||||||
|
visible = _strip_leading_reasoning_section(self._buffer)
|
||||||
|
else:
|
||||||
|
visible = self._buffer
|
||||||
|
self._buffer = ""
|
||||||
|
self._mode = "pass"
|
||||||
|
return visible
|
||||||
|
|
||||||
|
|
||||||
|
def _starts_with_reasoning_marker(text: str) -> bool:
|
||||||
|
"""判断文本首个非空内容是否是显式思考段标记。"""
|
||||||
|
return REASONING_START_RE.match(text) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _could_be_reasoning_marker_prefix(text: str) -> bool:
|
||||||
|
"""流式初始阶段判断当前缓冲是否可能是思考段标记的一部分。"""
|
||||||
|
candidate = _normalize_marker_prefix(text)
|
||||||
|
if not candidate:
|
||||||
|
return True
|
||||||
|
markers = (
|
||||||
|
"thinking process",
|
||||||
|
"thought process",
|
||||||
|
"reasoning process",
|
||||||
|
"chain of thought",
|
||||||
|
"internal reasoning",
|
||||||
|
"inner monologue",
|
||||||
|
"思考过程",
|
||||||
|
"推理过程",
|
||||||
|
"内部思考",
|
||||||
|
)
|
||||||
|
return len(candidate) < MAX_REASONING_PREFIX_HOLD and any(marker.startswith(candidate) for marker in markers)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_marker_prefix(text: str) -> str:
|
||||||
|
"""把流式开头清理成便于判断的 marker 前缀。"""
|
||||||
|
stripped = text.lstrip()
|
||||||
|
stripped = re.sub(r"^(?:[#>\-*]+\s*)+", "", stripped)
|
||||||
|
stripped = stripped.strip("*").strip()
|
||||||
|
return stripped.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_after_final_answer_marker(text: str) -> str | None:
|
||||||
|
"""如果存在最终回答标记,返回标记后的正文。"""
|
||||||
|
matches = list(FINAL_ANSWER_RE.finditer(text))
|
||||||
|
if not matches:
|
||||||
|
return None
|
||||||
|
return text[matches[-1].end() :].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_leading_reasoning_section(text: str) -> str:
|
||||||
|
"""删除以显式思考标记开头的推理段,保留后续最终正文。"""
|
||||||
|
final_text = _extract_after_final_answer_marker(text)
|
||||||
|
if final_text is not None:
|
||||||
|
return final_text
|
||||||
|
lines = text.splitlines()
|
||||||
|
first = _first_non_empty_line_index(lines)
|
||||||
|
if first is None or not _starts_with_reasoning_marker(lines[first]):
|
||||||
|
return text.strip()
|
||||||
|
last_reasoning = first
|
||||||
|
for index in range(first, len(lines)):
|
||||||
|
if _looks_like_reasoning_line(lines[index]):
|
||||||
|
last_reasoning = index
|
||||||
|
return "\n".join(lines[last_reasoning + 1 :]).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty_line_index(lines: list[str]) -> int | None:
|
||||||
|
"""返回首个非空行下标。"""
|
||||||
|
for index, line in enumerate(lines):
|
||||||
|
if line.strip():
|
||||||
|
return index
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_reasoning_line(line: str) -> bool:
|
||||||
|
"""识别常见显式思考过程行。"""
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped:
|
||||||
|
return False
|
||||||
|
if _starts_with_reasoning_marker(stripped):
|
||||||
|
return True
|
||||||
|
if REASONING_LINE_RE.search(stripped):
|
||||||
|
return True
|
||||||
|
return bool(re.match(r"^\s*\d+\.\s*\*\*[^*]+(?:request|response|answer|constraints|过程|回答)[^*]*\*\*", stripped, flags=re.IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
def _longest_suffix_prefix(text: str, targets: list[str]) -> int:
|
def _longest_suffix_prefix(text: str, targets: list[str]) -> int:
|
||||||
|
|||||||
@ -27,3 +27,57 @@ def test_filter_thinking_chunks_drops_unclosed_think_tail():
|
|||||||
visible = list(filter_thinking_chunks(chunks))
|
visible = list(filter_thinking_chunks(chunks))
|
||||||
|
|
||||||
assert "".join(visible) == "回答"
|
assert "".join(visible) == "回答"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_thinking_text_removes_explicit_thinking_process_without_tags():
|
||||||
|
text = """Thinking Process:
|
||||||
|
1. **Analyze the Request:**
|
||||||
|
* Input: JSON object containing context and user_text ("你是谁?").
|
||||||
|
* Role: PAM Deployment Agent Interaction Assistant.
|
||||||
|
* Constraints:
|
||||||
|
* Do NOT automatically trigger deployment, rollback, upgrade, script execution, or MCP calls.
|
||||||
|
* Do NOT output secrets.
|
||||||
|
2. **Determine the Response:**
|
||||||
|
* The user is asking about my identity.
|
||||||
|
* I need to introduce myself briefly.
|
||||||
|
3. **Drafting the Response:**
|
||||||
|
* Greeting/Identity: 我是 PAM 部署 Agent 的交互助手。
|
||||||
|
* Function: 我可以回答普通问题、解释命令和部署流程。
|
||||||
|
4. **Refining the Response:**
|
||||||
|
* Keep it concise and friendly.
|
||||||
|
|
||||||
|
我是 PAM 部署 Agent 的交互助手。
|
||||||
|
我可以回答普通问题、解释当前 Agent 的命令和部署流程。
|
||||||
|
"""
|
||||||
|
|
||||||
|
visible = strip_thinking_text(text)
|
||||||
|
|
||||||
|
assert "Thinking Process" not in visible
|
||||||
|
assert "Analyze the Request" not in visible
|
||||||
|
assert "Determine the Response" not in visible
|
||||||
|
assert "Drafting the Response" not in visible
|
||||||
|
assert "我是 PAM 部署 Agent 的交互助手。" in visible
|
||||||
|
assert visible.startswith("我是 PAM 部署 Agent")
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_thinking_text_keeps_content_after_final_answer_marker():
|
||||||
|
text = """Reasoning Process:
|
||||||
|
I should not expose this.
|
||||||
|
Final Answer: 可以,我只展示最终回答。
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert strip_thinking_text(text) == "可以,我只展示最终回答。"
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_thinking_chunks_suppresses_explicit_reasoning_until_finish():
|
||||||
|
chunks = [
|
||||||
|
"Think",
|
||||||
|
"ing Process:\n",
|
||||||
|
"I should hide this reasoning.\n",
|
||||||
|
"Final Answer: ",
|
||||||
|
"这是最终回答。",
|
||||||
|
]
|
||||||
|
|
||||||
|
visible = list(filter_thinking_chunks(chunks))
|
||||||
|
|
||||||
|
assert "".join(visible) == "这是最终回答。"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user