aiohttp 在 Python 中启用非阻塞 HTTP 请求。将其与 CaptchaAI 结合起来可以同时解决多个验证码,而不会阻塞事件循环。
要求
| 要求 | 细节 |
|---|---|
| Python | 3.8+ |
| aiohttp | 3.8+ |
| CaptchaAI API 密钥 | 在这里买一个 |
pip install aiohttp
异步 CaptchaAI 客户端
import aiohttp
import asyncio
class AsyncCaptchaAI:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://ocr.captchaai.com"
async def submit(self, session, params):
"""Submit a CAPTCHA task and return the task ID."""
params["key"] = self.api_key
async with session.get(
f"{self.base_url}/in.php", params=params
) as resp:
text = await resp.text()
if not text.startswith("OK|"):
raise Exception(f"Submit failed: {text}")
return text.split("|")[1]
async def poll(self, session, task_id, timeout=300):
"""Poll for the result with a timeout."""
params = {
"key": self.api_key,
"action": "get",
"id": task_id,
}
deadline = asyncio.get_event_loop().time() + timeout
while asyncio.get_event_loop().time() < deadline:
await asyncio.sleep(5)
async with session.get(
f"{self.base_url}/res.php", params=params
) as resp:
text = await resp.text()
if text == "CAPCHA_NOT_READY":
continue
if text.startswith("OK|"):
return text.split("|", 1)[1]
raise Exception(f"Solve failed: {text}")
raise TimeoutError(f"Task {task_id} timed out after {timeout}s")
async def solve(self, session, params, timeout=300):
"""Submit and poll in one call."""
task_id = await self.submit(session, params)
return await self.poll(session, task_id, timeout)
async def get_balance(self, session):
"""Check account balance."""
params = {"key": self.api_key, "action": "getbalance"}
async with session.get(
f"{self.base_url}/res.php", params=params
) as resp:
return float(await resp.text())
解决单个验证码
import asyncio
import os
async def main():
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
# Check balance
balance = await solver.get_balance(session)
print(f"Balance: ${balance:.2f}")
# Solve reCAPTCHA v2
token = await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": "6Le-wvkS...",
"pageurl": "https://example.com",
})
print(f"Token: {token[:50]}...")
asyncio.run(main())
同时解决多个验证码
async def solve_batch(urls, site_key):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
tasks = [
solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
for url in urls
]
results = await asyncio.gather(*tasks, return_exceptions=True)
for url, result in zip(urls, results):
if isinstance(result, Exception):
print(f"FAILED {url}: {result}")
else:
print(f"SOLVED {url}: {len(result)} chars")
return results
urls = [
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3",
"https://example.com/page4",
"https://example.com/page5",
]
asyncio.run(solve_batch(urls, "6Le-wvkS..."))
使用验证码处理进行抓取
async def scrape_with_captcha(url, site_key):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
# Fetch the page
async with session.get(url) as resp:
html = await resp.text()
# Check if page has a CAPTCHA
if "g-recaptcha" not in html:
return html # No CAPTCHA, return content
# Solve the CAPTCHA
token = await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
# Submit with solved token
async with session.post(url, data={
"g-recaptcha-response": token,
}) as resp:
return await resp.text()
用于速率控制的信号量
限制并发解决方案以避免压垮 API:
async def solve_with_limit(urls, site_key, max_concurrent=10):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
semaphore = asyncio.Semaphore(max_concurrent)
async def solve_one(session, url):
async with semaphore:
return await solver.solve(session, {
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
async with aiohttp.ClientSession() as session:
tasks = [solve_one(session, url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
solved = sum(1 for r in results if not isinstance(r, Exception))
print(f"Solved {solved}/{len(urls)} CAPTCHAs")
return results
旋转栅门示例
async def solve_turnstile(url, sitekey):
solver = AsyncCaptchaAI(os.environ["CAPTCHAAI_API_KEY"])
async with aiohttp.ClientSession() as session:
token = await solver.solve(session, {
"method": "turnstile",
"sitekey": sitekey,
"pageurl": url,
})
return token
故障排除
| 错误 | 原因 | 处理方式 |
|---|---|---|
ClientConnectorError |
网络问题 | 检查连接性 |
Submit failed: ERROR_ZERO_BALANCE |
没有资金 | 充值账户 |
TimeoutError |
解决速度慢 | 增加超时参数 |
RuntimeError: Event loop is closed |
在 Jupyter 中使用 asyncio.run |
使用nest_asyncio |
常问问题
为什么使用 aiohttp 而不是 httpx?
aiohttp 是 Python 中最成熟的异步 HTTP 库,对于高并发工作负载具有最佳性能。 httpx also works — see ourhttpx 集成指南。
我可以运行多少个并发求解?
CaptchaAI 处理 100 多个并发请求。根据您的需求和平衡使用信号量来控制并发。
我可以在多个解决方案中重复使用该会话吗?
是的,你应该这样做。 aiohttp 会话维护连接池,使后续请求更快。
相关指南
- HTTPX + CaptchaAI 集成
- 并行验证码求解
- Scrapy + CaptchaAI 集成