实战教程

动态验证码加载:检测延迟加载的验证码

页面首次加载时,并非所有验证码都存在。许多网站在单击按钮、形成焦点、滚动或计时器后延迟渲染验证码。如果您的自动化立即抓取页面源,则验证码尚未出现。本指南介绍如何检测和等待动态加载的验证码。


常见的延迟加载触发器

扳机 例子 如何激活
按钮点击 “提交”将 reCAPTCHA 添加到表单 首先点击按钮
形成焦点 当输入焦点时出现验证码 聚焦 email/password 字段
滚动位置 当部分可见时验证码加载 滚动到表格
定时器 3 秒后加载验证码 等待延迟
JavaScript 条件 AJAX 响应后加载验证码 触发先决条件请求

方法一:MutationObserver

观察 DOM 中添加的验证码元素:

Puppeteer

const puppeteer = require('puppeteer');

const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://staging.example.com/qa-login');

// Set up MutationObserver before triggering the CAPTCHA
const captchaInfo = await page.evaluate(() => {
  return new Promise((resolve) => {
    // Check if already present
    const existing = document.querySelector('.g-recaptcha, .cf-turnstile, .h-captcha');
    if (existing) {
      resolve({
        type: existing.className,
        sitekey: existing.getAttribute('data-sitekey'),
      });
      return;
    }

    // Watch for new elements
    const observer = new MutationObserver((mutations) => {
      for (const mutation of mutations) {
        for (const node of mutation.addedNodes) {
          if (node.nodeType !== 1) continue;

          const captcha = node.matches?.('.g-recaptcha, .cf-turnstile, .h-captcha')
            ? node
            : node.querySelector?.('.g-recaptcha, .cf-turnstile, .h-captcha');

          if (captcha) {
            observer.disconnect();
            resolve({
              type: captcha.className,
              sitekey: captcha.getAttribute('data-sitekey'),
            });
            return;
          }
        }
      }
    });

    observer.observe(document.body, { childList: true, subtree: true });

    // Timeout after 30 seconds
    setTimeout(() => {
      observer.disconnect();
      resolve(null);
    }, 30000);
  });
});

console.log('Detected CAPTCHA:', captchaInfo);

触发负载

// Click the submit button to trigger CAPTCHA
await page.click('#submit-btn');

// Or focus the input
await page.focus('#email');

// Or scroll to the form
await page.evaluate(() => {
  document.querySelector('#signup-form').scrollIntoView();
});

方法二:等待脚本注入

验证码需要其 JavaScript 库。注意这一点:

// Wait for reCAPTCHA script to load
await page.waitForFunction(() => {
  return typeof window.grecaptcha !== 'undefined' 
    && typeof window.grecaptcha.render === 'function';
}, { timeout: 30000 });

// Now extract parameters
const sitekey = await page.evaluate(() => {
  const el = document.querySelector('.g-recaptcha');
  return el?.getAttribute('data-sitekey');
});

对于旋转门

await page.waitForFunction(() => {
  return typeof window.turnstile !== 'undefined';
}, { timeout: 30000 });

const sitekey = await page.evaluate(() => {
  const el = document.querySelector('.cf-turnstile');
  return el?.getAttribute('data-sitekey');
});

方法3:拦截渲染调用

在渲染之前连接到 CAPTCHA 库:

// Inject before page scripts run
await page.evaluateOnNewDocument(() => {
  window.__captchaDetected = null;

  // Hook grecaptcha.render
  let _grecaptcha;
  Object.defineProperty(window, 'grecaptcha', {
    set(val) {
      _grecaptcha = val;
      const origRender = val.render;
      val.render = function(container, params) {
        window.__captchaDetected = {
          type: 'recaptcha',
          sitekey: params.sitekey,
          callback: params.callback?.name,
          container: typeof container === 'string' ? container : container.id,
        };
        return origRender.apply(this, arguments);
      };
    },
    get() { return _grecaptcha; },
  });
});

await page.goto('https://example.com/signup');

// Trigger the CAPTCHA (click, scroll, etc.)
await page.click('#show-form');

// Wait for detection
await page.waitForFunction(() => window.__captchaDetected !== null, {
  timeout: 30000,
});

const detected = await page.evaluate(() => window.__captchaDetected);
console.log('Detected:', detected);
// { type: 'recaptcha', sitekey: '6Le-wvkS...', callback: 'onCaptcha', container: 'recaptcha-box' }

Python(Selenium):等待惰性验证码

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://staging.example.com/qa-login")

# Trigger the CAPTCHA loading
submit = driver.find_element(By.ID, "submit-btn")
submit.click()

# Wait for CAPTCHA to appear
try:
    captcha_el = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((
            By.CSS_SELECTOR,
            ".g-recaptcha, .cf-turnstile, .h-captcha"
        ))
    )
    sitekey = captcha_el.get_attribute("data-sitekey")
    captcha_class = captcha_el.get_attribute("class")

    if "g-recaptcha" in captcha_class:
        captcha_type = "recaptcha"
    elif "cf-turnstile" in captcha_class:
        captcha_type = "turnstile"
    else:
        captcha_type = "hcaptcha"

    print(f"Type: {captcha_type}, Sitekey: {sitekey}")
except Exception:
    print("No CAPTCHA appeared within 30 seconds")

等待 iframe (reCAPTCHA)

# reCAPTCHA loads an iframe even when the div exists but the script is still loading
WebDriverWait(driver, 30).until(
    EC.presence_of_element_located((
        By.CSS_SELECTOR,
        "iframe[src*='recaptcha'], iframe[src*='challenges.cloudflare.com']"
    ))
)
print("CAPTCHA iframe loaded")

完整的检测+求解流程

import requests
import time

def detect_and_solve(driver, api_key, trigger_action=None):
    """Detect a lazy-loaded CAPTCHA, solve it, and inject the token."""

    # 1. Trigger the CAPTCHA
    if trigger_action:
        trigger_action(driver)

    # 2. Wait for it to appear
    captcha_el = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((
            By.CSS_SELECTOR,
            ".g-recaptcha, .cf-turnstile, .h-captcha"
        ))
    )

    sitekey = captcha_el.get_attribute("data-sitekey")
    page_url = driver.current_url
    captcha_class = captcha_el.get_attribute("class")

    # 3. Determine type and method
    if "g-recaptcha" in captcha_class:
        method, key_param, token_field = "userrecaptcha", "googlekey", "g-recaptcha-response"
    elif "cf-turnstile" in captcha_class:
        method, key_param, token_field = "turnstile", "sitekey", "cf-turnstile-response"
    else:
        method, key_param, token_field = "hcaptcha", "sitekey", "h-captcha-response"

    # 4. Solve with CaptchaAI
    resp = requests.post("https://ocr.captchaai.com/in.php", data={
        "key": api_key, "method": method,
        key_param: sitekey, "pageurl": page_url, "json": "1",
    }).json()

    task_id = resp["request"]
    for _ in range(24):
        time.sleep(5)
        result = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": api_key, "action": "get", "id": task_id, "json": "1"
        }).json()
        if result["status"] == 1:
            token = result["request"]
            break

    # 5. Inject
    driver.execute_script(f"""
        const el = document.querySelector('textarea[name="{token_field}"], input[name="{token_field}"]');
        if (el) el.value = arguments[0];
    """, token)

    return token

故障排除

问题 原因 处理方式
验证码永远不会出现 错误的触发动作 检查页面以查找触发验证码的内容
站点密钥为空 元素存在但脚本尚未运行 等待验证码 iframe,而不仅仅是 div
观察者错过了 验证码已经存在 在设置观察者之前检查现有元素
暂停 验证码仅针对真实用户加载 使用具有真实特征的完整浏览器

常问问题

我如何知道验证码是否是延迟加载的?

查看页面源代码(Ctrl+U)。如果验证码 div 或脚本不存在,但在您与页面交互时出现,则它是延迟加载的。

这适用于无头浏览器吗?

是的,但有注意事项。有些网站仅加载非无头浏览器的验证码。在 Puppeteer 或 QA 插件中使用 headless: 'new'


使用 CaptchaAI 解决任何验证码

获取您的 API 密钥:验证码网站


相关指南

该文章已禁用评论。