你无法修复你看不到的东西。 Datadog 可让您实时了解验证码解决管道 - 解决率、延迟百分位数、错误故障以及在管道中断之前触发的异常警报。
要跟踪的关键指标
| 公制 | 类型 | 为什么它很重要 |
|---|---|---|
captcha.solve.count |
柜台 | 提交的任务总数 |
captcha.solve.success |
柜台 | 成功解决 |
captcha.solve.error |
柜台 | 失败的解决(按错误类型) |
captcha.solve.latency |
直方图 | 从提交到解决的时间 |
captcha.queue.depth |
测量 | 队列中待处理的任务 |
captcha.balance |
测量 | 剩余 API 余额 |
captcha.worker.active |
测量 | 活跃的工作进程 |
Python——DogStatsD 集成
import os
import time
import functools
import requests
from datadog import initialize, statsd
# Initialize Datadog
initialize(
statsd_host=os.environ.get("DD_AGENT_HOST", "localhost"),
statsd_port=int(os.environ.get("DD_DOGSTATSD_PORT", "8125"))
)
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
session = requests.Session()
def track_captcha_metrics(captcha_type="recaptcha_v2"):
"""Decorator to track solve metrics."""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
tags = [f"captcha_type:{captcha_type}"]
statsd.increment("captcha.solve.count", tags=tags)
start = time.time()
try:
result = func(*args, **kwargs)
elapsed = time.time() - start
if "solution" in result:
statsd.increment("captcha.solve.success", tags=tags)
statsd.histogram("captcha.solve.latency", elapsed, tags=tags)
else:
error = result.get("error", "unknown")
statsd.increment(
"captcha.solve.error",
tags=tags + [f"error:{error}"]
)
return result
except Exception as e:
statsd.increment(
"captcha.solve.error",
tags=tags + [f"error:{type(e).__name__}"]
)
raise
return wrapper
return decorator
@track_captcha_metrics(captcha_type="recaptcha_v2")
def solve_recaptcha(sitekey, pageurl):
resp = session.post("https://ocr.captchaai.com/in.php", data={
"key": API_KEY,
"method": "userrecaptcha",
"googlekey": sitekey,
"pageurl": pageurl,
"json": 1
})
data = resp.json()
if data.get("status") != 1:
return {"error": data.get("request")}
captcha_id = data["request"]
for _ in range(60):
time.sleep(5)
result = session.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get", "id": captcha_id, "json": 1
}).json()
if result.get("status") == 1:
return {"solution": result["request"]}
if result.get("request") != "CAPCHA_NOT_READY":
return {"error": result.get("request")}
return {"error": "TIMEOUT"}
def report_balance():
"""Send balance as a gauge metric."""
resp = session.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "getbalance", "json": 1
})
data = resp.json()
if data.get("status") == 1:
balance = float(data["request"])
statsd.gauge("captcha.balance", balance)
return balance
return None
def report_queue_depth(depth):
"""Report current queue depth."""
statsd.gauge("captcha.queue.depth", depth)
def report_worker_count(active, total):
"""Report worker health."""
statsd.gauge("captcha.worker.active", active)
statsd.gauge("captcha.worker.total", total)
JavaScript——Datadog 集成
const { StatsD } = require("hot-shots");
const axios = require("axios");
const API_KEY = process.env.CAPTCHAAI_API_KEY;
const dogstatsd = new StatsD({
host: process.env.DD_AGENT_HOST || "localhost",
port: parseInt(process.env.DD_DOGSTATSD_PORT || "8125", 10),
prefix: "captcha.",
globalTags: [`env:${process.env.NODE_ENV || "development"}`],
});
async function solveCaptchaWithMetrics(sitekey, pageurl, captchaType = "recaptcha_v2") {
const tags = [`captcha_type:${captchaType}`];
dogstatsd.increment("solve.count", 1, tags);
const startTime = Date.now();
try {
const result = await solveCaptcha(sitekey, pageurl);
const elapsed = (Date.now() - startTime) / 1000;
if (result.solution) {
dogstatsd.increment("solve.success", 1, tags);
dogstatsd.histogram("solve.latency", elapsed, tags);
} else {
dogstatsd.increment("solve.error", 1, [...tags, `error:${result.error}`]);
}
return result;
} catch (err) {
dogstatsd.increment("solve.error", 1, [...tags, `error:${err.message}`]);
throw err;
}
}
async function solveCaptcha(sitekey, pageurl) {
const submitResp = await axios.post("https://ocr.captchaai.com/in.php", null, {
params: {
key: API_KEY,
method: "userrecaptcha",
googlekey: sitekey,
pageurl: pageurl,
json: 1,
},
});
if (submitResp.data.status !== 1) {
return { error: submitResp.data.request };
}
const captchaId = submitResp.data.request;
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 5000));
const pollResp = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
});
if (pollResp.data.status === 1) return { solution: pollResp.data.request };
if (pollResp.data.request !== "CAPCHA_NOT_READY") {
return { error: pollResp.data.request };
}
}
return { error: "TIMEOUT" };
}
async function reportBalance() {
try {
const resp = await axios.get("https://ocr.captchaai.com/res.php", {
params: { key: API_KEY, action: "getbalance", json: 1 },
});
if (resp.data.status === 1) {
const balance = parseFloat(resp.data.request);
dogstatsd.gauge("balance", balance);
return balance;
}
} catch (err) {
console.error("Balance check failed:", err.message);
}
return null;
}
// Report balance every minute
setInterval(reportBalance, 60000);
module.exports = { solveCaptchaWithMetrics, reportBalance };
Datadog 仪表板 JSON
将此 JSON 模板导入 Datadog 以创建验证码监控仪表板:
{
"title": "CaptchaAI Pipeline",
"widgets": [
{
"definition": {
"type": "timeseries",
"title": "Solve Rate (Success vs Error)",
"requests": [
{"q": "sum:captcha.solve.success{*}.as_count()"},
{"q": "sum:captcha.solve.error{*}.as_count()"}
]
}
},
{
"definition": {
"type": "timeseries",
"title": "Solve Latency (p50, p95, p99)",
"requests": [
{"q": "avg:captcha.solve.latency{*}"},
{"q": "percentile:captcha.solve.latency{*},0.95"},
{"q": "percentile:captcha.solve.latency{*},0.99"}
]
}
},
{
"definition": {
"type": "query_value",
"title": "API Balance",
"requests": [{"q": "avg:captcha.balance{*}"}]
}
},
{
"definition": {
"type": "timeseries",
"title": "Queue Depth",
"requests": [{"q": "avg:captcha.queue.depth{*}"}]
}
}
]
}
警报定义
| 警报 | 健康)状况 | 严重性 |
|---|---|---|
| 余额低 | captcha.balance < 10 |
警告 |
| 临界平衡 | captcha.balance < 2 |
批判的 |
| 错误率高 | 5 分钟内错误率 > 10% | 警告 |
| 延迟峰值 | p95 延迟 > 120 秒(超过 10 分钟) | 警告 |
| 队列备份 | 队列深度 > 100 增长 5 分钟 | 警告 |
| 工人倒地 | captcha.worker.active == 0 |
批判的 |
# Datadog monitor definition (API create)
- type: metric alert
name: "CaptchaAI Low Balance"
query: "avg(last_5m):avg:captcha.balance{*} < 10"
message: "CaptchaAI balance is low: {{value}}. Top up to avoid solve failures."
tags:
- team:scraping
- service:captcha
故障排除
| 问题 | 原因 | 处理方式 |
|---|---|---|
| 指标未出现 | DogStatsD 代理未运行 | 验证DD_AGENT_HOST;检查 docker ps 代理容器 |
| 延迟直方图为空 | 没有跟踪成功的解决方案 | 检查 statsd.histogram() 是否在成功路径上被调用 |
| 标签缺失 | 标签格式错误 | 使用key:value格式;标签中没有空格 |
| 重复指标 | 多名记者奔走 | 确保每次部署只有一个余额报告器 |
常问问题
我需要为每个工作人员配备一个 Datadog 代理吗?
每台主机运行一个 DogStatsD 代理。该主机上的所有工作人员都会将指标发送到本地代理,然后转发到 Datadog 的接收。
Datadog 自定义指标的费用是多少?
Datadog 按自定义指标时间序列收费。上述 7 个指标以及一些标签组合通常保持在免费套餐限制内。检查 Datadog 的定价以了解确切的成本。
我可以使用 Datadog APM 跟踪而不是自定义指标吗?
是的。用 ddtrace 包装您的求解函数以获得自动跟踪。不过,自定义指标使您可以更好地控制聚合和警报。
相关文章
下一步
让您的 CAPTCHA 管道具有可观察性 -”以 CaptchaAI API 密钥开始并连接到Datadog。
相关指南: