当您的组织每天解决数千个验证码时,您需要每个请求的记录。审核日志回答以下问题:谁触发了此解决方案?是哪个网站用的?花了多少钱?什么时候发生的?本指南展示了如何实施全面的审计日志记录CaptchaAI运营。
记录什么
每个验证码解决应该记录:
| 场地 | 目的 | 例子 |
|---|---|---|
timestamp |
提出请求时 | 2026-04-04T14:30:00Z |
request_id |
该解决方案的唯一标识符 | uuid4() |
captcha_type |
使用验证码方法 | userrecaptcha |
target_site |
页面 URL 正在解决 | https://staging.example.com/qa-login |
task_id |
CaptchaAI 任务 ID | 73829451 |
status |
结果 | solved、failed、timeout |
solve_time_ms |
从提交到结果的时间 | 18432 |
error_code |
如果失败则出错 | ERROR_CAPTCHA_UNSOLVABLE |
initiator |
谁或什么触发了解决方案 | scraper-job-42 |
cost |
预计费用 | 0.003 |
不要记录: API 密钥、CAPTCHA 令牌(它们是临时的)或来自目标站点的个人身份信息。
Python实现
# audit_solver.py
import os
import uuid
import time
import json
import logging
from datetime import datetime, timezone
import requests
API_KEY = os.environ.get("CAPTCHAAI_KEY", "YOUR_API_KEY")
# Configure audit logger — separate from application logs
audit_logger = logging.getLogger("captcha_audit")
audit_logger.setLevel(logging.INFO)
# File handler with rotation
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler(
"captcha_audit.jsonl",
maxBytes=50_000_000, # 50 MB per file
backupCount=10,
)
handler.setFormatter(logging.Formatter("%(message)s"))
audit_logger.addHandler(handler)
def log_audit(record):
"""Write a structured audit record."""
audit_logger.info(json.dumps(record, default=str))
def solve_with_audit(sitekey, pageurl, captcha_type="userrecaptcha",
initiator="unknown"):
"""Solve a CAPTCHA with full audit logging."""
request_id = str(uuid.uuid4())
start = time.time()
audit_record = {
"request_id": request_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"captcha_type": captcha_type,
"target_site": pageurl,
"initiator": initiator,
"status": "submitted",
}
session = requests.Session()
try:
# Submit
resp = session.get("https://ocr.captchaai.com/in.php", params={
"key": API_KEY,
"method": captcha_type,
"googlekey": sitekey,
"pageurl": pageurl,
"json": "1",
})
result = resp.json()
if result.get("status") != 1:
audit_record.update({
"status": "submit_failed",
"error_code": result.get("request"),
"solve_time_ms": int((time.time() - start) * 1000),
})
log_audit(audit_record)
return None
task_id = result["request"]
audit_record["task_id"] = task_id
# Poll
time.sleep(15)
for _ in range(25):
poll = session.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get",
"id": task_id, "json": "1",
})
poll_result = poll.json()
if poll_result.get("status") == 1:
solve_time = int((time.time() - start) * 1000)
audit_record.update({
"status": "solved",
"solve_time_ms": solve_time,
"cost_estimate": 0.003, # Adjust per your rate
})
log_audit(audit_record)
return poll_result["request"]
if poll_result.get("request") != "CAPCHA_NOT_READY":
audit_record.update({
"status": "failed",
"error_code": poll_result.get("request"),
"solve_time_ms": int((time.time() - start) * 1000),
})
log_audit(audit_record)
return None
time.sleep(5)
audit_record.update({
"status": "timeout",
"solve_time_ms": int((time.time() - start) * 1000),
})
log_audit(audit_record)
return None
except Exception as e:
audit_record.update({
"status": "error",
"error_code": str(e)[:200],
"solve_time_ms": int((time.time() - start) * 1000),
})
log_audit(audit_record)
raise
# Usage
token = solve_with_audit(
sitekey="6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-",
pageurl="https://www.google.com/recaptcha/api2/demo",
initiator="price-scraper-v2",
)
审计日志输出(JSONL格式)
{"request_id":"a1b2c3d4-...","timestamp":"2026-04-04T14:30:00+00:00","captcha_type":"userrecaptcha","target_site":"https://www.google.com/recaptcha/api2/demo","initiator":"price-scraper-v2","status":"solved","task_id":"73829451","solve_time_ms":18432,"cost_estimate":0.003}
JavaScript 实现
// audit_solver.js
const fs = require('fs');
const { v4: uuidv4 } = require('uuid');
const axios = require('axios');
const API_KEY = process.env.CAPTCHAAI_KEY || 'YOUR_API_KEY';
const AUDIT_FILE = 'captcha_audit.jsonl';
function logAudit(record) {
fs.appendFileSync(AUDIT_FILE, JSON.stringify(record) + '\n');
}
async function solveWithAudit(sitekey, pageurl, initiator = 'unknown') {
const requestId = uuidv4();
const start = Date.now();
const record = {
request_id: requestId,
timestamp: new Date().toISOString(),
captcha_type: 'userrecaptcha',
target_site: pageurl,
initiator,
status: 'submitted',
};
try {
const submit = await axios.get('https://ocr.captchaai.com/in.php', {
params: {
key: API_KEY, method: 'userrecaptcha',
googlekey: sitekey, pageurl, json: '1',
},
});
if (submit.data.status !== 1) {
record.status = 'submit_failed';
record.error_code = submit.data.request;
record.solve_time_ms = Date.now() - start;
logAudit(record);
return null;
}
record.task_id = submit.data.request;
await new Promise(r => setTimeout(r, 15000));
for (let i = 0; i < 25; i++) {
const poll = await axios.get('https://ocr.captchaai.com/res.php', {
params: { key: API_KEY, action: 'get', id: submit.data.request, json: '1' },
});
if (poll.data.status === 1) {
record.status = 'solved';
record.solve_time_ms = Date.now() - start;
record.cost_estimate = 0.003;
logAudit(record);
return poll.data.request;
}
if (poll.data.request !== 'CAPCHA_NOT_READY') {
record.status = 'failed';
record.error_code = poll.data.request;
record.solve_time_ms = Date.now() - start;
logAudit(record);
return null;
}
await new Promise(r => setTimeout(r, 5000));
}
record.status = 'timeout';
record.solve_time_ms = Date.now() - start;
logAudit(record);
return null;
} catch (e) {
record.status = 'error';
record.error_code = e.message.slice(0, 200);
record.solve_time_ms = Date.now() - start;
logAudit(record);
throw e;
}
}
查询审计日志
每日总结
import json
from collections import Counter
from datetime import date
def daily_summary(log_file, target_date=None):
"""Generate a daily summary from audit logs."""
target = target_date or date.today().isoformat()
statuses = Counter()
total_cost = 0
solve_times = []
with open(log_file) as f:
for line in f:
record = json.loads(line)
if record["timestamp"].startswith(target):
statuses[record["status"]] += 1
total_cost += record.get("cost_estimate", 0)
if record.get("solve_time_ms"):
solve_times.append(record["solve_time_ms"])
print(f"Date: {target}")
print(f"Total requests: {sum(statuses.values())}")
print(f"Statuses: {dict(statuses)}")
print(f"Estimated cost: ${total_cost:.2f}")
if solve_times:
print(f"Median solve time: {sorted(solve_times)[len(solve_times)//2]}ms")
daily_summary("captcha_audit.jsonl")
保留和储存
| 体积 | 每日日志大小 | 每月存储量 | 推荐 |
|---|---|---|---|
| 100 解决/day | ~30 KB | 〜1MB | 本地文件 |
| 1,000 解决/day | ~300 KB | 〜10MB | 本地文件+旋转 |
| 10,000 解决/day | 〜3MB | 〜100MB | 运送到日志聚合器 |
| 100,000 解决/day | 〜30MB | 〜1GB | 集中日志记录(ELK、Datadog) |
故障排除
| 问题 | 原因 | 处理方式 |
|---|---|---|
| 日志文件变得太大 | 未配置旋转 | 使用 RotatingFileHandler 或 logrotate |
| 缺少审计记录 | 记录前异常 | 登录finally区块 |
| 高容量写入速度慢 | 同步文件 I/O | 使用异步文件写入或缓冲区 |
| 时间戳不一致 | 系统时钟漂移 | 使用NTP;登录UTC |
常问问题
我应该在审核跟踪中记录 CAPTCHA 令牌吗?
不会。令牌是临时的(在 60-300 秒内过期)并且没有审核价值。记录它们会增加存储空间,但没有任何好处。
我可以使用审核日志进行帐单对账吗?
是的。将您的审核日志总计与 CaptchaAI 的使用仪表板进行比较,以验证计费准确性。
我应该设置什么保留期限?
90 天是操作审核日志的标准。对于合规性驱动的日志记录,请检查您所在行业的要求(SOC 2、GDPR、HIPAA)。
相关文章
- Dynamodb 无服务器验证码解决跟踪
下一步
为每个验证码解决添加责任 -”获取您的 CaptchaAI API 密钥。
相关指南:
- 使用情况仪表板监控
- 结构化日志记录
- 余额检查和自动充值