文件预览

daemon.js

查看 Price Win 技能包中的文件内容。

文件内容

bin/daemon.js

#!/usr/bin/env node
// ----------------------------------------------------------------------------
// daemon.js
//
// Long-running process that owns the Patchright Chromium browser and serves
// each agent action over a localhost HTTP endpoint. Critical: Patchright's
// stealth patches (CDP-level fingerprint masking) only apply when commands
// flow through the same Node process that called `chromium.launch()`. If we
// instead detached the browser and re-attached via `connectOverCDP` from a
// fresh process (the v0.2-rc.1 design), Booking/Agoda see plain headless
// Chrome and degrade the response. Hence: one daemon, many CLI clients.
//
// Lifecycle:
//   - bin/browse.js spawns this with `detached: true` + `unref()` on launch.
//   - State (port, pid) is written to ~/.cache/opentravel-deal-finder/state.json
//   - SIGTERM (sent by `browse close`) → clean shutdown.
// ----------------------------------------------------------------------------

import http from 'node:http';
import net from 'node:net';
import { chromium } from 'patchright';
import fs from 'node:fs/promises';
import path from 'node:path';
import os from 'node:os';
import { takeSnapshot } from '../lib/snapshot.js';
import { extractWithSelectors, isExtractionHealthy } from '../lib/dom-extract.js';
import { saveState, clearState } from '../lib/browser-state.js';

const CACHE_DIR = path.join(os.homedir(), '.cache', 'opentravel-deal-finder');

let browser;
let context;
let page;
// In-memory map of ref → stable CSS selector, populated by each snapshot
// and consumed by click/type/fill/press so the agent doesn't fail when
// React/Vue re-renders strip our data-browse-ref attribute.
let lastSnapshotRefs = {};

function normalizeRef(ref) {
  // Some LLMs hand us refs with prefixes ("@e16", "ref-12", "#27").
  // Strip everything that isn't a digit so the lookup still hits the map.
  const s = String(ref ?? '');
  const m = s.match(/\d+/);
  return m ? m[0] : s;
}

function refToEntry(ref) {
  const key = normalizeRef(ref);
  const entry = lastSnapshotRefs[key];
  if (entry && typeof entry === 'object') return entry;
  return { selector: `[data-browse-ref="${key}"]`, signature: null };
}

function refToSelector(ref) {
  return refToEntry(ref).selector;
}

/**
 * Resolve a ref to the live DOM element by trying:
 *   1) stable CSS selector saved at snapshot time
 *   2) data-browse-ref attribute (may have been stripped by React)
 *   3) signature match (tag + kind + text + testid + ariaLabel + placeholder + href)
 * Returns the matching CSS selector (possibly a fresh data-browse-ref the
 * resolver wrote back onto the element) or throws.
 */
async function resolveRef(page, ref) {
  const refKey = normalizeRef(ref);
  const entry = refToEntry(refKey);
  const sel = await page.evaluate(
    ({ ref, entry }) => {
      const tryCount = (s) => {
        try { return document.querySelectorAll(s).length; } catch { return 0; }
      };
      if (entry.selector && tryCount(entry.selector) === 1) return entry.selector;
      const byRef = '[data-browse-ref="' + ref + '"]';
      if (tryCount(byRef) === 1) return byRef;
      const sig = entry.signature;
      if (!sig) return null;
      // Re-scan the DOM for an element matching the signature.
      const candidates = Array.from(document.querySelectorAll(sig.tag || '*'));
      const norm = (s) => (s || '').replace(/\s+/g, ' ').trim().toLowerCase().slice(0, 160);
      const targetText = norm(sig.text);
      let best = null;
      let bestScore = -1;
      for (const el of candidates) {
        let score = 0;
        if (sig.testid && (el.getAttribute('data-testid') === sig.testid || el.getAttribute('data-selenium') === sig.testid)) score += 5;
        if (sig.ariaLabel && el.getAttribute('aria-label') === sig.ariaLabel) score += 4;
        if (sig.placeholder && el.getAttribute('placeholder') === sig.placeholder) score += 3;
        if (sig.href && el.getAttribute('href') === sig.href) score += 4;
        if (targetText && norm(el.innerText || el.value || '') === targetText) score += 2;
        if (score > bestScore) { bestScore = score; best = el; }
      }
      if (!best || bestScore < 2) return null;
      // Tag the winner with a fresh data-browse-ref so callers have a stable
      // handle for follow-up operations.
      best.setAttribute('data-browse-ref', String(ref));
      return '[data-browse-ref="' + ref + '"]';
    },
    { ref: refKey, entry },
  );
  if (!sel) throw new Error(`could not resolve ref ${ref} after re-scan`);
  return sel;
}

async function ensurePage() {
  if (!browser) {
    browser = await chromium.launch({
      headless: true,
      args: [
        '--no-sandbox',
        '--disable-dev-shm-usage',
        '--password-store=basic',
        '--use-mock-keychain',
        '--disable-blink-features=AutomationControlled',
      ],
    });
  }
  if (!context) {
    context = await browser.newContext({
      locale: 'vi-VN',
      viewport: { width: 1440, height: 900 },
      userAgent:
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
    });
  }
  if (!page || page.isClosed()) {
    const existing = context.pages()[0];
    page = existing || (await context.newPage());
  }
  return page;
}

async function findFreePort() {
  return new Promise((resolve, reject) => {
    const s = net.createServer();
    s.listen(0, '127.0.0.1', () => {
      const p = s.address().port;
      s.close(() => resolve(p));
    });
    s.on('error', reject);
  });
}

// --- handlers ---------------------------------------------------------------

async function handle(req) {
  const path = new URL(req.url, 'http://x').pathname.slice(1);
  let body = '';
  for await (const chunk of req) body += chunk;
  const args = body ? JSON.parse(body) : {};

  switch (path) {
    case 'ping':
      return { status: 'ok' };

    case 'goto': {
      const p = await ensurePage();
      // Tight ceilings so every command stays under the 30s shell timeout.
      await p.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 20_000 });
      await p.waitForLoadState('networkidle', { timeout: 3_000 }).catch(() => {});
      return { status: 'loaded', url: p.url(), title: await p.title() };
    }

    case 'snapshot': {
      const p = await ensurePage();
      // We deliberately do NOT scroll here — scrolling closes open
      // autocomplete dropdowns (Agoda's symptom). The agent should call
      // `scroll` explicitly when it needs to surface lazy-loaded content.
      const snap = await Promise.race([
        takeSnapshot(p),
        new Promise((_, reject) => setTimeout(() => reject(new Error('snapshot timed out (page too heavy)')), 22_000)),
      ]);
      lastSnapshotRefs = snap.refs;
      return { text: snap.text, elementCount: Object.keys(snap.refs).length };
    }

    case 'list-pages': {
      // Debug helper: list all tabs in the current context.
      if (!context) return { pages: [] };
      const pages = context.pages();
      const info = [];
      for (const pg of pages) {
        info.push({ url: pg.url(), title: await pg.title().catch(() => '?') });
      }
      return { pages: info, count: pages.length, activeIndex: pages.indexOf(page) };
    }

    case 'switch-to-newest-tab': {
      // After a click that opens target=_blank, switch focus to the new tab.
      if (!context) throw new Error('no context');
      const pages = context.pages();
      if (pages.length < 2) return { status: 'no-other-tab', currentUrl: page?.url() };
      page = pages[pages.length - 1];
      await page.waitForLoadState('domcontentloaded', { timeout: 15_000 }).catch(() => {});
      return { status: 'switched', url: page.url(), title: await page.title() };
    }

    case 'switch-to-tab-matching': {
      // Focus the first tab whose URL matches `urlIncludes`. Useful when
      // a click might open results in a new tab (Agoda) but might also
      // navigate the existing tab (Booking) — the agent just says
      // "find me the /search? tab" and we handle both shapes.
      if (!context) throw new Error('no context');
      const pages = context.pages();
      const needle = String(args.urlIncludes || '');
      const avoid = String(args.urlAvoids || '');
      const match = pages.find((pg) => {
        const u = pg.url();
        if (needle && !u.includes(needle)) return false;
        if (avoid && u.includes(avoid)) return false;
        return true;
      });
      if (!match) return { status: 'no-match', urlIncludes: needle, urlAvoids: avoid, currentUrl: page?.url(), tabCount: pages.length };
      page = match;
      await page.waitForLoadState('domcontentloaded', { timeout: 15_000 }).catch(() => {});
      return { status: 'switched', url: page.url(), title: await page.title() };
    }

    case 'close-tabs-matching': {
      // Close every tab whose URL contains `urlIncludes`. Won't touch the
      // currently active page even if it matches (so we never accidentally
      // close the results tab we just switched to).
      if (!context) throw new Error('no context');
      const pages = context.pages();
      const needle = String(args.urlIncludes || '');
      let closed = 0;
      for (const pg of pages) {
        if (pg === page) continue;
        if (needle && pg.url().includes(needle)) {
          await pg.close().catch(() => {});
          closed += 1;
        }
      }
      return { status: 'closed-tabs', count: closed, remainingTabs: context.pages().length };
    }

    case 'query-all': {
      // Debug helper: return innerText + tag + visible flag for all matches.
      const p = await ensurePage();
      const out = await p.evaluate((sel) => {
        const els = Array.from(document.querySelectorAll(sel));
        return els.slice(0, 20).map((el) => {
          const rect = el.getBoundingClientRect();
          const style = getComputedStyle(el);
          return {
            tag: el.tagName,
            text: (el.innerText || '').replace(/\s+/g, ' ').trim().slice(0, 80),
            testid: el.getAttribute('data-testid') || el.getAttribute('data-selenium') || null,
            visible: rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden',
            opacity: style.opacity,
            display: style.display,
            ariaLabel: el.getAttribute('aria-label') || null,
          };
        });
      }, args.selector);
      return { selector: args.selector, count: out.length, matches: out };
    }

    case 'keyboard-press': {
      // Page-level key press (no element ref needed). Useful for closing
      // overlays via Escape, navigating with Tab, submitting with Enter.
      const p = await ensurePage();
      await p.keyboard.press(String(args.key || 'Escape'));
      return { status: 'pressed', key: args.key };
    }

    case 'scroll': {
      // Explicit scroll command. Used when the agent wants to surface
      // lazy-loaded content (search results pagination, infinite scroll).
      // Do NOT call before snapshot when a dropdown is open — scrolling
      // closes them.
      const p = await ensurePage();
      const yTo = typeof args.to === 'number' ? args.to : 3000;
      const step = typeof args.step === 'number' ? args.step : 600;
      const delayMs = typeof args.delayMs === 'number' ? args.delayMs : 200;
      await p.evaluate(
        async ({ yTo, step, delayMs }) => {
          for (let y = 0; y <= yTo; y += step) {
            window.scrollTo(0, y);
            await new Promise((r) => setTimeout(r, delayMs));
          }
        },
        { yTo, step, delayMs },
      );
      return { status: 'scrolled', to: yTo };
    }

    case 'click': {
      const p = await ensurePage();
      const sel = await resolveRef(p, args.ref);
      const locator = p.locator(sel).first();
      // Normal click first. If an overlay intercepts pointer events
      // (common on Booking's autocomplete + tooltips), fall back to a
      // direct DOM .click() via evaluate, which bypasses the overlay.
      let mode = 'pointer';
      try {
        await locator.click({ timeout: 4_000 });
      } catch (e) {
        mode = 'dispatch';
        await p.evaluate((s) => {
          const el = document.querySelector(s);
          if (!el) throw new Error('element not found');
          el.click();
        }, sel);
      }
      await p.waitForLoadState('domcontentloaded', { timeout: 5_000 }).catch(() => {});
      return { status: 'clicked', ref: args.ref, mode, url: p.url() };
    }

    case 'fill': {
      // Fast path: set the input value directly. Works for Booking and most
      // sites whose autocomplete listens to the `input` event. Use `type`
      // (below) when a site only fires its autocomplete on real keystrokes.
      const p = await ensurePage();
      const sel = await resolveRef(p, args.ref);
      await p.locator(sel).first().fill(String(args.text ?? ''), { timeout: 10_000 });
      return { status: 'filled', ref: args.ref };
    }

    case 'type': {
      // Slow path: focus + clear + send keystrokes one at a time. Use this
      // for SPAs whose autocomplete only fires on actual keydown events
      // (Agoda is the canonical example). Bypasses overlay-intercepts by
      // calling focus() via JS instead of relying on a pointer click.
      const p = await ensurePage();
      const sel = await resolveRef(p, args.ref);
      await p.evaluate((s) => {
        const el = document.querySelector(s);
        if (!el) throw new Error('element not found');
        el.focus();
        if ('value' in el) el.value = '';
        el.dispatchEvent(new Event('input', { bubbles: true }));
      }, sel);
      // Now type via keyboard at page level — works as long as the input
      // is focused, regardless of any overlay.
      await p.keyboard.type(String(args.text ?? ''), { delay: 80 });
      return { status: 'typed', ref: args.ref };
    }

    case 'press': {
      const p = await ensurePage();
      const sel = await resolveRef(p, args.ref);
      await p.locator(sel).first().press(args.key, { timeout: 10_000 });
      return { status: 'pressed', ref: args.ref, key: args.key };
    }

    case 'wait-for': {
      const p = await ensurePage();
      // Wait until at least N elements match the given selector.
      const { selector, minCount = 1, timeoutMs = 15_000 } = args;
      const ok = await p
        .waitForFunction(
          ({ s, n }) => document.querySelectorAll(s).length >= n,
          { s: selector, n: minCount },
          { timeout: timeoutMs },
        )
        .then(() => true)
        .catch(() => false);
      return { status: ok ? 'matched' : 'timeout', selector, minCount };
    }

    case 'try-selectors': {
      const p = await ensurePage();
      const result = await extractWithSelectors(p, args.selectors);
      return {
        healthy: isExtractionHealthy(result),
        sampleCount: result.records.length,
        stats: result.stats,
        sample: result.records.slice(0, 3),
      };
    }

    case 'extract-all': {
      const p = await ensurePage();
      const result = await extractWithSelectors(p, args.selectors);
      return result;
    }

    case 'multi-extract-urls': {
      // Parallel cache-warm extraction. Spins up one ephemeral browser
      // context per (url, selectors) entry and runs them concurrently,
      // so refreshing 2-3 OTAs at once takes ~5s instead of ~15s.
      if (!browser) throw new Error('daemon not launched');
      const requests = Array.isArray(args.requests) ? args.requests : [];
      const results = await Promise.all(
        requests.map(async (req) => {
          const ctx = await browser.newContext({
            locale: 'vi-VN',
            viewport: { width: 1440, height: 900 },
          });
          const pg = await ctx.newPage();
          try {
            await pg.goto(req.url, { waitUntil: 'domcontentloaded', timeout: 20_000 });
            // Best-effort wait for the card selector to render — gives lazy
            // SPA pages a chance to paint without blocking the extraction.
            if (req.selectors?.card) {
              await pg.waitForSelector(req.selectors.card, { timeout: 8_000 }).catch(() => {});
            }
            const r = await extractWithSelectors(pg, req.selectors);
            return { site: req.site, healthy: isExtractionHealthy(r), records: r.records, stats: r.stats };
          } catch (e) {
            return { site: req.site, error: e.message };
          } finally {
            await ctx.close().catch(() => {});
          }
        }),
      );
      return { status: 'multi-extract-done', count: results.length, results };
    }

    case 'current-url': {
      const p = await ensurePage();
      return { url: p.url() };
    }

    case 'inspect-ref': {
      const entry = refToEntry(args.ref);
      const p = await ensurePage();
      let resolved = null;
      try { resolved = await resolveRef(p, args.ref); } catch (e) { resolved = `(failed: ${e.message})`; }
      const match = await p.evaluate((s) => {
        try { return document.querySelectorAll(s).length; } catch { return -1; }
      }, entry.selector || '');
      return { ref: args.ref, savedSelector: entry.selector, signature: entry.signature, resolvedSelector: resolved, savedSelectorMatchCount: match };
    }


    case 'shutdown': {
      // Trigger graceful close after replying
      setImmediate(async () => {
        try { await browser?.close(); } catch {}
        try { await clearState(); } catch {}
        process.exit(0);
      });
      return { status: 'shutting-down' };
    }

    default:
      throw new Error(`unknown endpoint: ${path}`);
  }
}

// --- server boot ------------------------------------------------------------

async function main() {
  await fs.mkdir(CACHE_DIR, { recursive: true });

  // Pre-warm browser so first /goto is fast.
  await ensurePage();

  const port = await findFreePort();
  const server = http.createServer(async (req, res) => {
    try {
      const result = await handle(req);
      res.writeHead(200, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify(result));
    } catch (e) {
      res.writeHead(500, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ error: e.message || String(e) }));
    }
  });
  server.listen(port, '127.0.0.1', async () => {
    await saveState({ port, pid: process.pid, createdAt: new Date().toISOString() });
    process.stderr.write(`[daemon] ready on port ${port} (pid ${process.pid})\n`);
  });

  // Auto-shutdown if the cache state file is removed (boss can clean manually).
  // Also handle SIGTERM/SIGINT cleanly.
  const shutdown = async () => {
    try { await browser?.close(); } catch {}
    try { await clearState(); } catch {}
    process.exit(0);
  };
  process.on('SIGTERM', shutdown);
  process.on('SIGINT', shutdown);
}

main().catch((e) => {
  process.stderr.write(`[daemon] fatal: ${e.stack || e.message}\n`);
  process.exit(1);
});