文件预览

resolve.py

查看 Last30days 技能包中的文件内容。

文件内容

scripts/lib/resolve.py

"""Auto-resolve subreddits, X handles, and current-events context for a topic."""

from __future__ import annotations

import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone

from . import dates, grounding


def _log(msg: str) -> None:
    print(f"[Resolve] {msg}", file=sys.stderr)


def _has_backend(config: dict) -> bool:
    """Check if the AISA grounding backend is available."""
    return bool(config.get("AISA_API_KEY"))


def _extract_subreddits(items: list[dict]) -> list[str]:
    """Parse subreddit names from search result titles and snippets."""
    pattern = re.compile(r"r/([A-Za-z0-9_]{2,21})")
    seen: set[str] = set()
    results: list[str] = []
    for item in items:
        text = f"{item.get('title', '')} {item.get('snippet', '')} {item.get('url', '')}"
        for match in pattern.findall(text):
            lower = match.lower()
            if lower not in seen:
                seen.add(lower)
                results.append(match)
    return results


def _extract_x_handle(items: list[dict]) -> str:
    """Extract the most likely X/Twitter handle from search results."""
    pattern = re.compile(r"@([A-Za-z0-9_]{1,15})")
    url_pattern = re.compile(r"(?:twitter\.com|x\.com)/([A-Za-z0-9_]{1,15})(?:/|$|\?)")
    counts: dict[str, int] = {}
    for item in items:
        text = f"{item.get('title', '')} {item.get('snippet', '')}"
        url = item.get("url", "")
        for match in pattern.findall(text):
            lower = match.lower()
            counts[lower] = counts.get(lower, 0) + 1
        for match in url_pattern.findall(url):
            lower = match.lower()
            # URL matches are stronger signals
            counts[lower] = counts.get(lower, 0) + 3
    # Filter out generic handles
    skip = {"twitter", "x", "search", "hashtag", "intent", "share", "i", "home", "explore", "settings"}
    counts = {k: v for k, v in counts.items() if k not in skip}
    if not counts:
        return ""
    return max(counts, key=counts.get)


def _extract_github_user(items: list[dict]) -> str:
    """Extract GitHub username from search results."""
    url_pattern = re.compile(r"github\.com/([A-Za-z0-9_-]{1,39})(?:/|$|\?)")
    counts: dict[str, int] = {}
    for item in items:
        url = item.get("url", "")
        text = f"{item.get('title', '')} {item.get('snippet', '')}"
        for match in url_pattern.findall(url):
            lower = match.lower()
            counts[lower] = counts.get(lower, 0) + 3
        for match in url_pattern.findall(text):
            lower = match.lower()
            counts[lower] = counts.get(lower, 0) + 1
    # Filter out org/repo-like names and generic pages
    skip = {"topics", "explore", "settings", "orgs", "search", "features", "about", "pricing", "enterprise"}
    counts = {k: v for k, v in counts.items() if k not in skip}
    if not counts:
        return ""
    return max(counts, key=counts.get)


def _extract_github_repos(items: list[dict]) -> list[str]:
    """Extract owner/repo strings from search results."""
    repo_pattern = re.compile(r"github\.com/([A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)")
    skip_owners = {"topics", "explore", "settings", "orgs", "search", "features", "about", "pricing", "enterprise"}
    seen: set[str] = set()
    repos: list[str] = []
    for item in items:
        url = item.get("url", "")
        text = f"{item.get('title', '')} {item.get('snippet', '')}"
        for source in [url, text]:
            for match in repo_pattern.findall(source):
                owner = match.split("/")[0].lower()
                if owner in skip_owners:
                    continue
                lower = match.lower()
                if lower not in seen:
                    seen.add(lower)
                    repos.append(match)
    return repos[:5]  # cap at 5 repos


def _build_context_summary(items: list[dict]) -> str:
    """Build a 1-2 sentence current events summary from news search results."""
    snippets: list[str] = []
    for item in items[:3]:
        snippet = item.get("snippet", "").strip()
        if snippet:
            snippets.append(snippet)
    if not snippets:
        return ""
    # Take the first two meaningful snippets and truncate to keep it concise
    combined = " ".join(snippets[:2])
    if len(combined) > 300:
        combined = combined[:297] + "..."
    return combined


def auto_resolve(topic: str, config: dict) -> dict:
    """Discover subreddits, X handles, and current events context for a topic.

    Args:
        topic: The research topic.
        config: Dict with AISA runtime config.

    Returns:
        Dict with keys: subreddits, x_handle, context, searches_run.
        Returns empty result if no grounding backend is available.
    """
    empty = {"subreddits": [], "x_handle": "", "context": "", "searches_run": 0}

    if not _has_backend(config):
        _log("No grounding backend available, skipping resolve")
        return empty

    from_date, to_date = dates.get_date_range(30)
    date_range = (from_date, to_date)
    now = datetime.now(timezone.utc)
    current_month = now.strftime("%B")
    current_year = now.strftime("%Y")

    queries = {
        "subreddit": f"{topic} subreddit reddit",
        "news": f"{topic} news {current_month} {current_year}",
        "x_handle": f"{topic} X twitter handle",
        "github": f"{topic} github profile site:github.com",
    }

    results: dict[str, list[dict]] = {}
    searches_run = 0

    def _search(label: str, query: str) -> tuple[str, list[dict]]:
        items, _artifact = grounding.web_search(query, date_range, config)
        return label, items

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = {
            executor.submit(_search, label, q): label
            for label, q in queries.items()
        }
        for future in as_completed(futures):
            label = futures[future]
            try:
                _label, items = future.result()
                results[label] = items
                searches_run += 1
            except Exception as exc:
                _log(f"Search failed for {label}: {exc}")
                results[label] = []

    subreddits = _extract_subreddits(results.get("subreddit", []))
    x_handle = _extract_x_handle(results.get("x_handle", []))
    github_user = _extract_github_user(results.get("github", []))
    github_repos = _extract_github_repos(results.get("github", []))
    context = _build_context_summary(results.get("news", []))

    _log(f"Resolved {len(subreddits)} subreddits, x_handle={x_handle!r}, github_user={github_user!r}, github_repos={github_repos!r}, context_len={len(context)}")

    return {
        "subreddits": subreddits,
        "x_handle": x_handle,
        "github_user": github_user,
        "github_repos": github_repos,
        "context": context,
        "searches_run": searches_run,
    }