文件内容
scripts/lib/resolve.py
"""Auto-resolve subreddits, X handles, and current-events context for a topic."""
from __future__ import annotations
import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
from . import dates, grounding
def _log(msg: str) -> None:
print(f"[Resolve] {msg}", file=sys.stderr)
def _has_backend(config: dict) -> bool:
"""Check if the AISA grounding backend is available."""
return bool(config.get("AISA_API_KEY"))
def _extract_subreddits(items: list[dict]) -> list[str]:
"""Parse subreddit names from search result titles and snippets."""
pattern = re.compile(r"r/([A-Za-z0-9_]{2,21})")
seen: set[str] = set()
results: list[str] = []
for item in items:
text = f"{item.get('title', '')} {item.get('snippet', '')} {item.get('url', '')}"
for match in pattern.findall(text):
lower = match.lower()
if lower not in seen:
seen.add(lower)
results.append(match)
return results
def _extract_x_handle(items: list[dict]) -> str:
"""Extract the most likely X/Twitter handle from search results."""
pattern = re.compile(r"@([A-Za-z0-9_]{1,15})")
url_pattern = re.compile(r"(?:twitter\.com|x\.com)/([A-Za-z0-9_]{1,15})(?:/|$|\?)")
counts: dict[str, int] = {}
for item in items:
text = f"{item.get('title', '')} {item.get('snippet', '')}"
url = item.get("url", "")
for match in pattern.findall(text):
lower = match.lower()
counts[lower] = counts.get(lower, 0) + 1
for match in url_pattern.findall(url):
lower = match.lower()
# URL matches are stronger signals
counts[lower] = counts.get(lower, 0) + 3
# Filter out generic handles
skip = {"twitter", "x", "search", "hashtag", "intent", "share", "i", "home", "explore", "settings"}
counts = {k: v for k, v in counts.items() if k not in skip}
if not counts:
return ""
return max(counts, key=counts.get)
def _extract_github_user(items: list[dict]) -> str:
"""Extract GitHub username from search results."""
url_pattern = re.compile(r"github\.com/([A-Za-z0-9_-]{1,39})(?:/|$|\?)")
counts: dict[str, int] = {}
for item in items:
url = item.get("url", "")
text = f"{item.get('title', '')} {item.get('snippet', '')}"
for match in url_pattern.findall(url):
lower = match.lower()
counts[lower] = counts.get(lower, 0) + 3
for match in url_pattern.findall(text):
lower = match.lower()
counts[lower] = counts.get(lower, 0) + 1
# Filter out org/repo-like names and generic pages
skip = {"topics", "explore", "settings", "orgs", "search", "features", "about", "pricing", "enterprise"}
counts = {k: v for k, v in counts.items() if k not in skip}
if not counts:
return ""
return max(counts, key=counts.get)
def _extract_github_repos(items: list[dict]) -> list[str]:
"""Extract owner/repo strings from search results."""
repo_pattern = re.compile(r"github\.com/([A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)")
skip_owners = {"topics", "explore", "settings", "orgs", "search", "features", "about", "pricing", "enterprise"}
seen: set[str] = set()
repos: list[str] = []
for item in items:
url = item.get("url", "")
text = f"{item.get('title', '')} {item.get('snippet', '')}"
for source in [url, text]:
for match in repo_pattern.findall(source):
owner = match.split("/")[0].lower()
if owner in skip_owners:
continue
lower = match.lower()
if lower not in seen:
seen.add(lower)
repos.append(match)
return repos[:5] # cap at 5 repos
def _build_context_summary(items: list[dict]) -> str:
"""Build a 1-2 sentence current events summary from news search results."""
snippets: list[str] = []
for item in items[:3]:
snippet = item.get("snippet", "").strip()
if snippet:
snippets.append(snippet)
if not snippets:
return ""
# Take the first two meaningful snippets and truncate to keep it concise
combined = " ".join(snippets[:2])
if len(combined) > 300:
combined = combined[:297] + "..."
return combined
def auto_resolve(topic: str, config: dict) -> dict:
"""Discover subreddits, X handles, and current events context for a topic.
Args:
topic: The research topic.
config: Dict with AISA runtime config.
Returns:
Dict with keys: subreddits, x_handle, context, searches_run.
Returns empty result if no grounding backend is available.
"""
empty = {"subreddits": [], "x_handle": "", "context": "", "searches_run": 0}
if not _has_backend(config):
_log("No grounding backend available, skipping resolve")
return empty
from_date, to_date = dates.get_date_range(30)
date_range = (from_date, to_date)
now = datetime.now(timezone.utc)
current_month = now.strftime("%B")
current_year = now.strftime("%Y")
queries = {
"subreddit": f"{topic} subreddit reddit",
"news": f"{topic} news {current_month} {current_year}",
"x_handle": f"{topic} X twitter handle",
"github": f"{topic} github profile site:github.com",
}
results: dict[str, list[dict]] = {}
searches_run = 0
def _search(label: str, query: str) -> tuple[str, list[dict]]:
items, _artifact = grounding.web_search(query, date_range, config)
return label, items
with ThreadPoolExecutor(max_workers=3) as executor:
futures = {
executor.submit(_search, label, q): label
for label, q in queries.items()
}
for future in as_completed(futures):
label = futures[future]
try:
_label, items = future.result()
results[label] = items
searches_run += 1
except Exception as exc:
_log(f"Search failed for {label}: {exc}")
results[label] = []
subreddits = _extract_subreddits(results.get("subreddit", []))
x_handle = _extract_x_handle(results.get("x_handle", []))
github_user = _extract_github_user(results.get("github", []))
github_repos = _extract_github_repos(results.get("github", []))
context = _build_context_summary(results.get("news", []))
_log(f"Resolved {len(subreddits)} subreddits, x_handle={x_handle!r}, github_user={github_user!r}, github_repos={github_repos!r}, context_len={len(context)}")
return {
"subreddits": subreddits,
"x_handle": x_handle,
"github_user": github_user,
"github_repos": github_repos,
"context": context,
"searches_run": searches_run,
}