文件预览

desensitization.yaml

查看 AIDR-XClaw-Security-Sentinel 技能包中的文件内容。

文件内容

configs/desensitization.yaml

# configs/desensitization.yaml
# AIDR-XClaw-Security-Sentinel SKill — 数据脱敏规则配置
# 所有规则均为纯正则表达式 + 替换字符串,不含任何执行逻辑
# 应用顺序:按 priority 从小到大(priority 越小越高优)顺序匹配

version: "1.0.0"
last_updated: "2026-03-26"

rules:
  # ─── 身份标识 ───────────────────────────────────────────────
  - id: "id_card"
    priority: 1
    pattern: "\\b\\d{17}[\\dXx]\\b"
    replacement: "[ID_CARD_MASKED]"
    description: "中国大陆身份证号(18位)"
    examples:
      - "110101199001011234"  → "[ID_CARD_MASKED]"
      - "11010119900101123X"  → "[ID_CARD_MASKED]"

  - id: "phone_number"
    priority: 1
    pattern: "\\b1[3-9]\\d{9}\\b"
    replacement: "[PHONE_MASKED]"
    description: "中国大陆手机号(11位,130-199开头)"
    examples:
      - "13812345678" → "[PHONE_MASKED]"

  - id: "bank_card"
    priority: 2
    pattern: "\\b\\d{16,19}\\b"
    replacement: "[BANK_CARD_MASKED]"
    description: "银行卡号(16-19位,辅助验证,优先级较低)"

  # ─── 认证凭据 ────────────────────────────────────────────────
  - id: "api_key_generic"
    priority: 1
    pattern: "(?i)(api[_-]?key|apikey)\\s*[:=]\\s*['\"]?([\\w\\-]{16,})['\"]?"
    replacement: "[API_KEY_MASKED]"
    description: "通用 API Key 声明"
    notes: "只替换 key 本身,保留前缀文字"

  - id: "secret_declaration"
    priority: 1
    pattern: "(?i)(password|passwd|secret|token|auth[_-]?token|access[_-]?token)\\s*[:=]\\s*['\"]?[\\w\\-!@#$%^&*()]{8,}['\"]?"
    replacement: "[SECRET_MASKED]"
    description: "密码/密钥/Token 声明"

  - id: "bearer_token"
    priority: 1
    pattern: "(?i)bearer\\s+[\\w\\-\\.]{20,}"
    replacement: "Bearer [BEARER_TOKEN_MASKED]"
    description: "Bearer Token"

  # ─── 路径与地址 ─────────────────────────────────────────────
  - id: "path_root"
    priority: 2
    pattern: "/root/[^/\\s\"'<>|]{1,64}"
    replacement: "[PATH_MASKED]"
    description: "root 用户目录下的路径"

  - id: "path_home"
    priority: 2
    pattern: "/home/[^/\\s\"'<>|]{1,32}/[^/\\s\"'<>|]{1,64}"
    replacement: "[PATH_MASKED]"
    description: "普通用户 home 目录下的路径"

  - id: "path_config"
    priority: 1
    pattern: "(?i)\\.env(?:\\b|_file|_path)?|\\.aws|\\.ssh|\\.gnupg|\\.kube|\\.docker"
    replacement: "[CONFIG_PATH_MASKED]"
    description: "敏感配置目录标识"

  - id: "internal_ip"
    priority: 1
    pattern: "\\b(10\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|172\\.(1[6-9]|2\\d|3[01])\\.\\d{1,3}\\.\\d{1,3}|192\\168\\.\\d{1,3}\\.\\d{1,3})\\b"
    replacement: "[INTERNAL_IP_MASKED]"
    description: "内网 IP 地址"

  - id: "metadata_endpoint"
    priority: 1
    pattern: "169\\.254\\.169\\.254|metadata\\.google|metadata\\.azure|metadata\\.openstack"
    replacement: "[METADATA_ENDPOINT_MASKED]"
    description: "云元数据服务地址(SSRF 常见目标)"

  - id: "ssrf_common_targets"
    priority: 2
    pattern: "(?i)(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0)(?:[:/]|\\.(?:80|443|8080))?"
    replacement: "[SSRF_TARGET_MASKED]"
    description: "常见 SSRF 攻击目标"

  # ─── 凭据外传 ───────────────────────────────────────────────
  - id: "exfil_webhook"
    priority: 1
    pattern: "webhook\\.site|requestbin\\.com|requestbin\\.net|hookbin\\.com|beeceptor\\.com"
    replacement: "[EXFIL_ENDPOINT_MASKED]"
    description: "凭据外传常用 webhook 端点"

  - id: "exfil_ngrok"
    priority: 1
    pattern: "ngrok\\.io|ngrok\\.free\\.ngrok\\.io"
    replacement: "[EXFIL_ENDPOINT_MASKED]"
    description: "内网穿透工具(常用于凭据外传)"

  - id: "exfil_pipedream"
    priority: 1
    pattern: "pipedream\\.net|hooks\\.slack\\.com"
    replacement: "[EXFIL_ENDPOINT_MASKED]"
    description: "工作流自动化平台(潜在数据外传通道)"

  - id: "exfil_data_endpoints"
    priority: 2
    pattern: "(?i)(exfil|exfiltrat|data[_-]?leak|send[_-]?data)\\s*(?:to|at|on)?\\s*[a-z0-9\\-\\.]+\\.(com|io|net|org)"
    replacement: "[DATA_EXFIL_MASKED]"
    description: "潜在数据外泄端点描述"

  # ─── 提示注入相关 ──────────────────────────────────────────
  - id: "fake_system_marker"
    priority: 1
    pattern: "\\[SYSTEM\\]|\\[ADMIN\\]|\\[ROOT\\]|<SYSTEM>|<ADMIN>|<ROOT>"
    replacement: "[FAKE_SYSTEM_MARKER_MASKED]"
    description: "伪造的系统标记(Prompt Injection 常用)"

  - id: "encoded_payload"
    priority: 2
    pattern: "(?i)(base64|base32|hex|encode|decode)\\s*[(:=]\\s*['\"]?[A-Za-z0-9+/=]{20,}['\"]?"
    replacement: "[ENCODED_PAYLOAD_MASKED]"
    description: "编码载荷(可能包含恶意指令)"

  - id: "path_traversal"
    priority: 1
    pattern: "(?:\\.\\./|\\.\\.\\\\|\\.\\.%2f|\\.\\.%5c|/etc/passwd|c:\\windows|c:\\boot)"
    replacement: "[PATH_TRAVERSAL_MASKED]"
    description: "目录遍历攻击模式"

  # ─── 敏感文件路径 ─────────────────────────────────────────
  - id: "sensitive_paths"
    priority: 1
    pattern: "(?:\\.ssh|\\.aws|\\.kube|\\.docker|\\.gnupg|\\.git)/[^\"'\\s]*"
    replacement: "[SENSITIVE_PATH_MASKED]"
    description: "敏感目录路径"

  - id: "credential_file_patterns"
    priority: 1
    pattern: "(?:id_[r]?sa|\\.pem|\\.key|credentials\\.json|secrets\\.ya?ml|\\.env)"
    replacement: "[CREDENTIAL_FILE_MASKED]"
    description: "敏感凭据文件模式"

# ─── 应用规则 ─────────────────────────────────────────────────
# 1. 按 priority 从小到大(高优到低优)顺序匹配
# 2. 同一 priority 内按配置顺序匹配
# 3. 每个位置只匹配一次,不递归替换 mask 字符串本身
# 4. 不区分大小写(启用 /i 修饰符的规则除外)