如果 CaptchaAI 返回图像验证码的错误答案,则问题几乎总是与提交的图像有关,而不是解决方案。以下是如何诊断和修复它。
错误答案的常见原因
| 原因 | 频率 | 处理方式 |
|---|---|---|
| 图像裁剪不正确 | 很常见 | 捕获完整的验证码元素 |
| 低分辨率/压缩 | 常见的 | 发送更高质量的图像 |
| 图像编码错误 | 常见的 | 验证base64编码 |
| 缺少语言/type提示 | 偶然 | 添加 language 或 textinstructions |
| 陈旧/expired 图像 | 偶然 | 求解前捕获新图像 |
修复 1:提交前验证图像质量
import base64
from io import BytesIO
from PIL import Image
def validate_captcha_image(image_path):
"""Check image quality before submitting to CaptchaAI."""
img = Image.open(image_path)
width, height = img.size
issues = []
# Minimum resolution
if width < 50 or height < 20:
issues.append(f"Too small: {width}x{height}px (min 50x20)")
# Check if mostly blank
pixels = list(img.getdata())
if img.mode == "RGB":
white_count = sum(1 for p in pixels if p[0] > 250 and p[1] > 250 and p[2] > 250)
else:
white_count = sum(1 for p in pixels if p > 250)
blank_ratio = white_count / len(pixels)
if blank_ratio > 0.95:
issues.append(f"Image appears blank ({blank_ratio:.0%} white)")
# File size check
img_bytes = BytesIO()
img.save(img_bytes, format="PNG")
size_kb = img_bytes.tell() / 1024
if size_kb < 1:
issues.append(f"File too small ({size_kb:.1f} KB) — may be empty")
if size_kb > 600:
issues.append(f"File too large ({size_kb:.0f} KB) — submit under 600 KB")
return issues
issues = validate_captcha_image("captcha.png")
if issues:
for issue in issues:
print(f"WARNING: {issue}")
else:
print("Image quality OK")
修复 2:正确的 Base64 编码
import base64
def encode_captcha(image_path):
"""Properly encode a CAPTCHA image to base64."""
with open(image_path, "rb") as f:
raw = f.read()
encoded = base64.b64encode(raw).decode("ascii")
# Verify round-trip
decoded = base64.b64decode(encoded)
assert decoded == raw, "Base64 encoding corrupted the image"
return encoded
# WRONG — encoding a file path string
bad = base64.b64encode(b"captcha.png").decode() # Encodes filename, not image!
# CORRECT — encoding file contents
with open("captcha.png", "rb") as f:
good = base64.b64encode(f.read()).decode()
修复 3:图像预处理
from PIL import Image, ImageFilter, ImageEnhance
from io import BytesIO
import base64
def preprocess_captcha(image_path):
"""Improve image quality for better OCR accuracy."""
img = Image.open(image_path)
# Convert to RGB if needed
if img.mode != "RGB":
img = img.convert("RGB")
# Upscale small images
width, height = img.size
if width < 200:
scale = 200 / width
img = img.resize(
(int(width * scale), int(height * scale)),
Image.LANCZOS,
)
# Increase contrast
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(1.5)
# Sharpen
img = img.filter(ImageFilter.SHARPEN)
# Convert to PNG bytes
buffer = BytesIO()
img.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode()
修复 4:添加类型和语言提示
import requests
def solve_image(api_key, image_base64, **hints):
"""Submit image CAPTCHA with quality hints."""
data = {
"key": api_key,
"method": "base64",
"body": image_base64,
"json": 1,
}
# Add optional hints for better accuracy
if "language" in hints:
data["language"] = hints["language"] # 0=default, 1=Cyrillic, 2=Latin
if "textinstructions" in hints:
data["textinstructions"] = hints["textinstructions"]
if "numeric" in hints:
data["numeric"] = hints["numeric"] # 1=digits only, 2=letters only
if "min_len" in hints:
data["min_len"] = hints["min_len"]
if "max_len" in hints:
data["max_len"] = hints["max_len"]
resp = requests.post("https://ocr.captchaai.com/in.php", data=data, timeout=30)
return resp.json()
# Example: Digits-only CAPTCHA, 4-6 characters
result = solve_image(
"YOUR_API_KEY",
encoded_image,
numeric=1,
min_len=4,
max_len=6,
)
# Example: Case-sensitive text
result = solve_image(
"YOUR_API_KEY",
encoded_image,
textinstructions="Case-sensitive, enter exactly as shown",
)
修复 5:捕获完整的验证码元素
from selenium import webdriver
from selenium.webdriver.common.by import By
import base64
def capture_captcha_element(driver, selector):
"""Screenshot only the CAPTCHA element, not the full page."""
element = driver.find_element(By.CSS_SELECTOR, selector)
# Element screenshot (better than page crop)
png_bytes = element.screenshot_as_png
# Verify it's not empty
if len(png_bytes) < 500:
raise ValueError("Screenshot too small — element may not be visible")
return base64.b64encode(png_bytes).decode()
# Usage
driver = webdriver.Chrome()
driver.get("https://example.com")
image_b64 = capture_captcha_element(driver, "img#captchaImage")
修复 6:处理 Dynamic/Rotating 验证码
import time
def solve_with_fresh_image(driver, api_key, captcha_selector):
"""Capture and solve CAPTCHA immediately to avoid expiry."""
# Wait for CAPTCHA to load fully
time.sleep(2)
# Capture fresh
element = driver.find_element(By.CSS_SELECTOR, captcha_selector)
png_bytes = element.screenshot_as_png
body = base64.b64encode(png_bytes).decode()
# Submit immediately
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": api_key,
"method": "base64",
"body": body,
"json": 1,
}, timeout=30)
result = resp.json()
if result.get("status") != 1:
raise RuntimeError(result.get("request"))
task_id = result["request"]
# Poll — image CAPTCHAs solve fast
time.sleep(5)
for _ in range(12):
resp = requests.get("https://ocr.captchaai.com/res.php", params={
"key": api_key, "action": "get",
"id": task_id, "json": 1,
}, timeout=15)
data = resp.json()
if data.get("status") == 1:
return data["request"]
if data["request"] != "CAPCHA_NOT_READY":
raise RuntimeError(data["request"])
time.sleep(3)
raise TimeoutError("Image solve timeout")
故障排除清单
| 症状 | 诊断 | 处理方式 |
|---|---|---|
| 答案是胡言乱语 | Base64编码错误 | 验证往返编码 |
| 答案很接近但错误 | 图像质量低 | 预处理:高档、锐化、对比度 |
| 答案字符数错误 | 缺少长度提示 | 添加 min_len/max_len 参数 |
| 答案混合字母/digits | 缺少类型提示 | 添加 numeric=1 或 numeric=2 |
| 返回空答案 | 空白/corrupted 图像 | 提交前验证图像 |
| 正确答案但网站拒绝 | 区分大小写 | 添加案例文字说明 |
常问问题
CaptchaAI 图像验证码的准确度如何?
通过正确提交的图像,CaptchaAI 支持 27,500 多种高精度验证码类型。大多数失败是由于图像质量差或参数不正确造成的。
我应该在提交之前对图像进行预处理吗?
仅当原始图像质量较低时。 CaptchaAI 无需预处理即可很好地处理标准验证码图像。放大非常小的图像并增加对比度可以帮助处理边缘情况。
我可以报告错误答案吗?
是的。使用带有任务 ID 的 reportbad 端点来报告不正确的答案。这有助于提高准确性并可能记入您的帐户。
相关指南
准确解算图像 —尝试CaptchaAI.