在解决 Cloudflare Turnstile 挑战之前,您需要在页面上检测它并提取站点密钥。 Turnstile 可以通过 HTML 属性、JavaScript API 调用嵌入,或在页面渲染后动态加载。本指南涵盖了每种检测方法——从简单的 HTML 解析到运行时 JavaScript 分析。
闸机实现方法
站点以三种方式嵌入 Turnstile,每种方式都需要不同的检测方法:
| 方法 | 它是如何运作的 | 检测难度 |
|---|---|---|
| HTML 隐式 | 页面源代码中的 <div class="cf-turnstile" data-sitekey="..."> |
简单(静态 HTML) |
| JavaScript 显式 | 在脚本中调用 turnstile.render() |
Medium(解析JS) |
| 动态加载 | 用户操作或 XHR 后加载的小部件 | Hard(需要JS执行) |
方法一:静态HTML检测
最简单的 Turnstile 集成使用 cf-turnstile 类和 data-sitekey 属性:
import re
import requests
def detect_turnstile_html(url):
"""Detect Turnstile from static HTML."""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0",
"Accept": "text/html,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
response = requests.get(url, headers=headers, timeout=15)
html = response.text
result = {
"turnstile_found": False,
"sitekey": None,
"mode": None,
"theme": None,
"action": None,
"script_loaded": False,
}
# Check for Turnstile script
if "challenges.cloudflare.com/turnstile" in html:
result["script_loaded"] = True
# Check for widget container
if "cf-turnstile" in html:
result["turnstile_found"] = True
# Extract sitekey
sitekey_match = re.search(
r'data-sitekey=["\']([0-9x][A-Za-z0-9_-]+)["\']', html
)
if sitekey_match:
result["sitekey"] = sitekey_match.group(1)
# Extract mode
if 'data-size="invisible"' in html:
result["mode"] = "invisible"
elif 'data-appearance="interaction-only"' in html:
result["mode"] = "non-interactive"
else:
result["mode"] = "managed"
# Extract theme
theme_match = re.search(r'data-theme=["\'](\w+)["\']', html)
if theme_match:
result["theme"] = theme_match.group(1)
# Extract action
action_match = re.search(r'data-action=["\']([^"\']+)["\']', html)
if action_match:
result["action"] = action_match.group(1)
return result
# Usage
info = detect_turnstile_html("https://staging.example.com/qa-login")
if info["turnstile_found"]:
print(f"Sitekey: {info['sitekey']}")
print(f"Mode: {info['mode']}")
方法二:JavaScript API检测
有些网站使用 turnstile.render() 而不是 HTML 属性:
import re
def detect_turnstile_js_api(html):
"""Detect Turnstile from JavaScript render calls."""
patterns = [
# turnstile.render('#element', {sitekey: '...'})
r"turnstile\.render\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*\{([^}]+)\}",
# turnstile.render(element, {sitekey: '...'})
r"turnstile\.render\s*\([^,]+,\s*\{([^}]+)\}",
]
for pattern in patterns:
match = re.search(pattern, html, re.DOTALL)
if match:
config_text = match.group(match.lastindex)
# Extract sitekey from config object
sitekey_match = re.search(
r"sitekey\s*:\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]", config_text
)
# Extract callback
callback_match = re.search(
r"callback\s*:\s*(\w+|function)", config_text
)
# Extract action
action_match = re.search(
r"action\s*:\s*['\"]([^'\"]+)['\"]", config_text
)
# Extract appearance
appearance_match = re.search(
r"appearance\s*:\s*['\"]([^'\"]+)['\"]", config_text
)
return {
"found": True,
"method": "javascript_api",
"sitekey": sitekey_match.group(1) if sitekey_match else None,
"callback": callback_match.group(1) if callback_match else None,
"action": action_match.group(1) if action_match else None,
"appearance": appearance_match.group(1) if appearance_match else None,
}
return {"found": False, "method": None}
方法三:动态加载检测(Selenium/Puppeteer)
当 Turnstile 在页面交互后动态加载时:
Python(硒)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
def detect_turnstile_dynamic(url):
"""Detect dynamically loaded Turnstile using Selenium."""
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=options)
try:
driver.get(url)
# Wait for page to fully load
WebDriverWait(driver, 10).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
result = {
"turnstile_found": False,
"sitekey": None,
"iframe_present": False,
"response_field": False,
}
# Check for Turnstile iframe
iframes = driver.find_elements(By.CSS_SELECTOR, "iframe[src*='challenges.cloudflare.com']")
if iframes:
result["turnstile_found"] = True
result["iframe_present"] = True
# Check for cf-turnstile container
containers = driver.find_elements(By.CSS_SELECTOR, ".cf-turnstile, [data-sitekey]")
for container in containers:
sitekey = container.get_attribute("data-sitekey")
if sitekey:
result["turnstile_found"] = True
result["sitekey"] = sitekey
# Check for hidden response field
response_fields = driver.find_elements(
By.CSS_SELECTOR, "[name='cf-turnstile-response'], [name='g-recaptcha-response']"
)
if response_fields:
result["response_field"] = True
# Check page source for JS API render
page_source = driver.page_source
js_match = re.search(
r"sitekey\s*:\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]", page_source
)
if js_match and not result["sitekey"]:
result["sitekey"] = js_match.group(1)
result["turnstile_found"] = True
return result
finally:
driver.quit()
Node.js(Puppeteer)
const puppeteer = require("puppeteer");
async function detectTurnstileDynamic(url) {
const browser = await puppeteer.launch({
headless: "new",
args: ["--no-sandbox"],
});
const page = await browser.newPage();
const result = {
turnstileFound: false,
sitekey: null,
iframePresent: false,
responseField: false,
scriptUrl: null,
};
// Monitor network for Turnstile script
page.on("response", (response) => {
if (response.url().includes("challenges.cloudflare.com/turnstile")) {
result.scriptUrl = response.url();
}
});
await page.goto(url, { waitUntil: "networkidle2" });
// Check for Turnstile container
const sitekey = await page.evaluate(() => {
const el = document.querySelector(
".cf-turnstile, [data-sitekey]"
);
return el ? el.getAttribute("data-sitekey") : null;
});
if (sitekey) {
result.turnstileFound = true;
result.sitekey = sitekey;
}
// Check for Turnstile iframe
const iframes = await page.$$("iframe[src*='challenges.cloudflare.com']");
if (iframes.length > 0) {
result.turnstileFound = true;
result.iframePresent = true;
}
// Check for response field
const responseField = await page.$(
"[name='cf-turnstile-response']"
);
result.responseField = !!responseField;
await browser.close();
return result;
}
detectTurnstileDynamic("https://staging.example.com/qa-login").then(console.log);
综合检测类
import re
import requests
class TurnstileDetector:
"""Detect Cloudflare Turnstile across all implementation methods."""
TURNSTILE_SCRIPT = "challenges.cloudflare.com/turnstile"
SITEKEY_PATTERNS = [
r'data-sitekey=["\']([0-9x][A-Za-z0-9_-]+)["\']',
r"sitekey\s*:\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]",
r"siteKey\s*[=:]\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]",
r"TURNSTILE_SITE_KEY\s*[=:]\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]",
]
def __init__(self, url, html=None):
self.url = url
self.html = html
if not self.html:
self._fetch()
def _fetch(self):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0",
"Accept": "text/html,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
response = requests.get(self.url, headers=headers, timeout=15)
self.html = response.text
def detect(self):
"""Run all detection methods and return results."""
return {
"url": self.url,
"turnstile_present": self.has_turnstile(),
"sitekey": self.extract_sitekey(),
"mode": self.detect_mode(),
"implementation": self.detect_implementation(),
"script_loaded": self.has_script(),
"response_field": self.has_response_field(),
"action": self.extract_action(),
"theme": self.extract_theme(),
}
def has_turnstile(self):
return (
self.has_script()
or "cf-turnstile" in self.html
or self.extract_sitekey() is not None
)
def has_script(self):
return self.TURNSTILE_SCRIPT in self.html
def has_response_field(self):
return "cf-turnstile-response" in self.html
def extract_sitekey(self):
for pattern in self.SITEKEY_PATTERNS:
match = re.search(pattern, self.html)
if match:
return match.group(1)
return None
def detect_mode(self):
if 'data-size="invisible"' in self.html or "size: 'invisible'" in self.html:
return "invisible"
if 'data-appearance="interaction-only"' in self.html:
return "non-interactive"
if "cf-turnstile" in self.html:
return "managed"
return "unknown"
def detect_implementation(self):
if "cf-turnstile" in self.html and re.search(r"data-sitekey=", self.html):
return "html_implicit"
if "turnstile.render" in self.html:
return "javascript_explicit"
if self.has_script() and not "cf-turnstile" in self.html:
return "dynamic_loading"
return "unknown"
def extract_action(self):
match = re.search(r'data-action=["\']([^"\']+)["\']', self.html)
if match:
return match.group(1)
match = re.search(r"action\s*:\s*['\"]([^'\"]+)['\"]", self.html)
return match.group(1) if match else None
def extract_theme(self):
match = re.search(r'data-theme=["\'](\w+)["\']', self.html)
return match.group(1) if match else "auto"
# Usage
detector = TurnstileDetector("https://staging.example.com/qa-login")
info = detector.detect()
if info["turnstile_present"]:
print(f"Sitekey: {info['sitekey']}")
print(f"Mode: {info['mode']}")
print(f"Implementation: {info['implementation']}")
检测后解决
一旦检测到,用 CaptchaAI 解决:
import requests
import time
API_KEY = "YOUR_API_KEY"
def solve_detected_turnstile(detection_result):
"""Solve Turnstile using detection results."""
if not detection_result["turnstile_present"]:
raise ValueError("No Turnstile detected")
if not detection_result["sitekey"]:
raise ValueError("Sitekey not found — may need browser-based extraction")
params = {
"key": API_KEY,
"method": "turnstile",
"sitekey": detection_result["sitekey"],
"pageurl": detection_result["url"],
"json": 1,
}
# Include action if present
if detection_result.get("action"):
params["action"] = detection_result["action"]
submit = requests.post("https://ocr.captchaai.com/in.php", data=params)
task_id = submit.json()["request"]
for _ in range(60):
time.sleep(5)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY,
"action": "get",
"id": task_id,
"json": 1,
}).json()
if result.get("status") == 1:
return result["request"]
raise TimeoutError("Turnstile solve timed out")
# Full workflow
detector = TurnstileDetector("https://example.com/signup")
info = detector.detect()
if info["turnstile_present"]:
token = solve_detected_turnstile(info)
print(f"Token: {token[:50]}...")
边缘情况
| 设想 | 挑战 | 解决方案 |
|---|---|---|
| 外部 JS 文件中的 Sitekey | 不在页面 HTML 中 | 解析链接的 JavaScript 文件以获取 sitekey 模式 |
| 来自 API 响应的 Sitekey | XHR 调用后加载 | 监控 JSON 响应中 sitekey 的网络请求 |
| 多个Turnstile小部件 | 同一页面上不同的sitekey | 将 sitekey 与您提交的特定表单相匹配 |
| 影子 DOM 中的Turnstile | 无法通过常规选择器访问 | 在浏览器上下文中使用 shadowRoot.querySelector |
| 服务器端呈现的 sitekey | 嵌入模板变量 | 检查配置对象的 <script> 标签 |
| 旋转门背后的身份验证 | 在公共页面上不可见 | 先认证,后检测 |
故障排除
| 症状 | 原因 | 处理方式 |
|---|---|---|
| 找到脚本标签但没有站点密钥 | JS API 使用其他来源的配置进行渲染 | 检查所有链接的 JS 文件和 XHR 响应 |
| 提取了错误的站点密钥 | 页面上有多个验证码小部件 | 将站点键与周围的表单元素相匹配 |
| 检测有效但解决失败 | 验证所需的操作参数 | 在解决请求中包含 data-action 值 |
| 小部件不在初始 HTML 中 | 用户交互后动态加载 | 使用Selenium/Puppeteer进行整页渲染 |
cf-turnstile-response 字段为空 |
小部件尚未完成 | 等待小部件完成加载 |
常见问题
Turnstile 站点密钥可以更改吗?
是的。站点运营商可以随时轮换站点密钥。始终从页面中提取新鲜的站点密钥,而不是对其进行硬编码。
我需要操作参数吗?
仅当站点在服务器端对其进行验证时。如果 HTML 中存在 data-action,请将其包含在您的解决请求中以获得最佳结果。
如果找不到站点密钥怎么办?
sitekey 可能位于外部 JavaScript 文件、API 响应中或动态生成。使用浏览器 DevTools(网络选项卡)找到它,或者在全页渲染后使用 Selenium/Puppeteer 来提取它。
检测方法是否影响求解?
不会。无论小部件如何实现,CaptchaAI 的 Turnstile 解算器的工作原理都是相同的。您只需要站点密钥和页面 URL。
概括
检测 Cloudflare Turnstile 需要检查 Turnstile 脚本标记、cf-turnstile 容器、data-sitekey 属性和 turnstile.render() 调用。使用静态 HTML 解析进行简单集成,使用 Selenium/Puppeteer 进行动态加载小部件。一旦检测到,解决CaptchaAI 的旋转门求解器使用提取的站点密钥 — 所有模式均以 100% 的成功率进行相同的处理。