"""Agent 运行日志配置和脱敏工具。""" from __future__ import annotations import json import logging import os import re from dataclasses import asdict, is_dataclass from pathlib import Path from typing import Any from .constants import SENSITIVE_KEYS DEFAULT_LOG_FILE = Path("logs") / "pam_deploy_agent.log" LOG_FILE_ENV = "PAM_AGENT_LOG_FILE" LOG_LEVEL_ENV = "PAM_AGENT_LOG_LEVEL" _HANDLER_MARKER = "_pam_deploy_agent_handler" _SENSITIVE_NAME_PARTS = ("secret", "token", "authorization", "api_key", "apikey", "password") _ASSIGNMENT_PATTERN = re.compile( r"(?i)\b(client_secret|mcp_client_secret|api_key|pam_llm_api_key|token|access_token|authorization|password)\b" r"\s*([:=])\s*([^\s,;]+)" ) _AUTH_BEARER_ASSIGNMENT_PATTERN = re.compile(r"(?i)\b(authorization)\b\s*([:=])\s*bearer\s+[^\s,;]+") _BEARER_PATTERN = re.compile(r"(?i)(bearer\s+)[A-Za-z0-9._~+\-/=]+") def configure_logging( log_file: str | Path | None = None, level: str | int | None = None, ) -> Path: """配置 Agent 文件日志;重复调用不会重复添加 handler。""" actual_path = Path(log_file or os.getenv(LOG_FILE_ENV) or DEFAULT_LOG_FILE) actual_path.parent.mkdir(parents=True, exist_ok=True) actual_level = _resolve_level(level or os.getenv(LOG_LEVEL_ENV) or "INFO") package_logger = logging.getLogger("pam_deploy_graph") package_logger.setLevel(actual_level) package_logger.propagate = False marker = str(actual_path.resolve()) for handler in package_logger.handlers: if getattr(handler, _HANDLER_MARKER, "") == marker: handler.setLevel(actual_level) return actual_path handler = logging.FileHandler(actual_path, encoding="utf-8") setattr(handler, _HANDLER_MARKER, marker) handler.setLevel(actual_level) handler.setFormatter( logging.Formatter( fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) ) package_logger.addHandler(handler) package_logger.info("日志已初始化 path=%s level=%s", actual_path, logging.getLevelName(actual_level)) return actual_path def redact_for_log(value: Any, *, max_text_len: int = 1200) -> Any: """递归脱敏并截断日志对象,避免把密钥、token 或完整长文本写入日志。""" if is_dataclass(value) and not isinstance(value, type): return redact_for_log(asdict(value), max_text_len=max_text_len) if isinstance(value, dict): redacted: dict[str, Any] = {} for key, item in value.items(): text_key = str(key) if _is_sensitive_key(text_key): redacted[text_key] = "***" else: redacted[text_key] = redact_for_log(item, max_text_len=max_text_len) return redacted if isinstance(value, (list, tuple, set)): return [redact_for_log(item, max_text_len=max_text_len) for item in value] if isinstance(value, str): return _truncate(_redact_string(value), max_text_len) if value is None or isinstance(value, (bool, int, float)): return value return _truncate(_redact_string(str(value)), max_text_len) def json_for_log(value: Any, *, max_text_len: int = 1200) -> str: """把对象脱敏后序列化成适合单行日志的 JSON 文本。""" redacted = redact_for_log(value, max_text_len=max_text_len) return json.dumps(redacted, ensure_ascii=False, default=str, sort_keys=True) def _resolve_level(value: str | int) -> int: """解析日志级别字符串,非法值降级为 INFO。""" if isinstance(value, int): return value resolved = getattr(logging, str(value).upper(), logging.INFO) return resolved if isinstance(resolved, int) else logging.INFO def _is_sensitive_key(key: str) -> bool: """判断字段名是否应脱敏。""" if key in SENSITIVE_KEYS: return True normalized = key.lower().replace("-", "_") return any(part in normalized for part in _SENSITIVE_NAME_PARTS) def _truncate(value: str, limit: int) -> str: """截断过长字符串。""" if len(value) <= limit: return value return value[:limit] + "...[已截断]" def _redact_string(value: str) -> str: """脱敏字符串中的常见 KEY=VALUE 和 Bearer token 片段。""" value = _AUTH_BEARER_ASSIGNMENT_PATTERN.sub(lambda match: f"{match.group(1)}{match.group(2)}***", value) value = _ASSIGNMENT_PATTERN.sub(lambda match: f"{match.group(1)}{match.group(2)}***", value) return _BEARER_PATTERN.sub(lambda match: f"{match.group(1)}***", value)