使用 Selenium 和 CaptchaAI 自动提交包含验证码挑战的表单。
挑战
受验证码保护的 Web 表单会阻止自动化。无论您是测试联系表单、提交应用程序还是运行 QA 工作流程,您都需要先解决验证码问题,然后表单才会接受您的提交。
建筑学
┌────────────┐ ┌──────────────┐ ┌────────────┐ ┌──────────────┐
│ Load Form │────▶│ Fill Fields │────▶│ Detect & │────▶│ Submit Form │
│ (Selenium) │ │ │ │ Solve │ │ │
│ │ │ │ │ CAPTCHA │ │ │
└────────────┘ └──────────────┘ └────────────┘ └──────────────┘
核心组件
验证码求解器
import time
import requests
class FormCaptchaSolver:
BASE = "https://ocr.captchaai.com"
def __init__(self, api_key):
self.api_key = api_key
def solve(self, params, initial_wait=10):
params["key"] = self.api_key
params["json"] = 1
resp = requests.post(f"{self.BASE}/in.php", data=params).json()
if resp["status"] != 1:
raise Exception(f"Submit error: {resp['request']}")
task_id = resp["request"]
time.sleep(initial_wait)
for _ in range(60):
result = requests.get(
f"{self.BASE}/res.php",
params={"key": self.api_key, "action": "get", "id": task_id, "json": 1},
).json()
if result["request"] == "CAPCHA_NOT_READY":
time.sleep(5)
continue
if result["status"] == 1:
return result["request"]
raise Exception(f"Solve error: {result['request']}")
raise TimeoutError("CAPTCHA solve timed out")
验证码检测器
import re
from selenium.webdriver.common.by import By
class CaptchaDetector:
def __init__(self, driver):
self.driver = driver
def detect(self):
"""Detect CAPTCHA type on current page."""
html = self.driver.page_source
# Turnstile
turnstile = self.driver.find_elements(By.CSS_SELECTOR, ".cf-turnstile, [data-sitekey]")
for el in turnstile:
if "cf-turnstile" in (el.get_attribute("class") or ""):
return "turnstile", el.get_attribute("data-sitekey")
# reCAPTCHA
recaptcha = self.driver.find_elements(By.CSS_SELECTOR, "[data-sitekey]")
if recaptcha:
sitekey = recaptcha[0].get_attribute("data-sitekey")
if "recaptcha" in html.lower():
return "recaptcha_v2", sitekey
# Image CAPTCHA
img = self.driver.find_elements(By.CSS_SELECTOR, "img[src*='captcha'], img.captcha")
if img:
return "image", img[0].get_attribute("src")
return "none", None
表单自动生成器
import base64
import requests as req
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class FormAutomator:
def __init__(self, api_key):
self.solver = FormCaptchaSolver(api_key)
self.driver = webdriver.Chrome()
self.detector = CaptchaDetector(self.driver)
def fill_field(self, selector, value):
field = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
)
field.clear()
field.send_keys(value)
def select_option(self, selector, value):
from selenium.webdriver.support.ui import Select
select = Select(self.driver.find_element(By.CSS_SELECTOR, selector))
select.select_by_value(value)
def solve_captcha(self):
captcha_type, data = self.detector.detect()
page_url = self.driver.current_url
if captcha_type == "recaptcha_v2":
token = self.solver.solve({
"method": "userrecaptcha",
"googlekey": data,
"pageurl": page_url,
})
self.driver.execute_script(
f'document.querySelector("[name=g-recaptcha-response]").value = "{token}";'
)
return True
if captcha_type == "turnstile":
token = self.solver.solve({
"method": "turnstile",
"sitekey": data,
"pageurl": page_url,
})
self.driver.execute_script(
f'document.querySelector("[name=cf-turnstile-response]").value = "{token}";'
)
return True
if captcha_type == "image":
img_data = req.get(data).content
img_b64 = base64.b64encode(img_data).decode()
text = self.solver.solve({"method": "base64", "body": img_b64})
captcha_input = self.driver.find_element(
By.CSS_SELECTOR, "input[name*='captcha']"
)
captcha_input.clear()
captcha_input.send_keys(text)
return True
return False # No CAPTCHA detected
def submit_form(self, url, fields, submit_selector="button[type='submit']"):
"""
fields: list of (selector, value) tuples
"""
self.driver.get(url)
for selector, value in fields:
self.fill_field(selector, value)
self.solve_captcha()
submit = self.driver.find_element(By.CSS_SELECTOR, submit_selector)
submit.click()
return self.driver.current_url
def close(self):
self.driver.quit()
完整示例:联系表
automator = FormAutomator("YOUR_API_KEY")
try:
result_url = automator.submit_form(
url="https://example.com/contact",
fields=[
("#name", "John Doe"),
("#email", "john@example.com"),
("#subject", "Sales inquiry"),
("#message", "I'd like to learn more about your services."),
],
submit_selector="#submit-btn",
)
print(f"Form submitted. Redirected to: {result_url}")
finally:
automator.close()
处理多种表单类型
登录表格
result = automator.submit_form(
url="https://staging.example.com/qa-login",
fields=[
("#username", "testuser"),
("#password", "testpass123"),
],
submit_selector="#login-btn",
)
登记表
result = automator.submit_form(
url="https://example.com/register",
fields=[
("#first-name", "Jane"),
("#last-name", "Smith"),
("#email", "jane@example.com"),
("#password", "SecurePass!123"),
("#confirm-password", "SecurePass!123"),
],
submit_selector="#register-btn",
)
使用验证码搜索表单
result = automator.submit_form(
url="https://example.com/search",
fields=[
("#query", "python developer"),
("#location", "San Francisco"),
],
submit_selector="#search-btn",
)
故障排除
| 问题 | 原因 | 处理方式 |
|---|---|---|
| 令牌被拒绝 | 提交前令牌已过期 | 最后解决验证码,立即提交 |
| 未找到字段 | 动态页面加载 | 添加显式等待 |
| 检测到错误的验证码类型 | 多个验证码元素 | 检查检测顺序 |
| 提交后表单重新加载 | 服务器端验证失败 | 检查所有必填字段 |
| reCAPTCHA 回调未触发 | 需要调用回调函数 | 注入后使用grecaptcha.execute() |
常问问题
我可以在没有浏览器的情况下提交表单吗?
对于reCAPTCHA和Turnstile,您可以在没有浏览器的情况下解决CAPTCHA并通过HTTP POST提交。但如果表单使用 JavaScript 验证,则需要浏览器。
如何处理具有多个验证码的表单?
某些表单仅在验证失败后才显示验证码。每次尝试提交后再次运行 solve_captcha()。
AJAX 表单怎么样?
对于 AJAX 提交,拦截 XHR 请求并将 CAPTCHA 令牌包含在请求负载中,而不是填充隐藏字段。
相关指南
自动化任何表格——使用 CaptchaAI 解决验证码.