WHOIS 查找门户通过 reCAPTCHA v2、图像验证码和速率限制来保护域名注册数据。无论您是检查域可用性、验证所有权还是监控过期日期,验证码都会在几次查询后出现。以下是如何处理它们。
WHOIS 门户上的验证码模式
| 门户型 | 验证码 | 触发阈值 |
|---|---|---|
| ICANN WHOIS | reCAPTCHA v2 | 每个会话 3-5 个查询 |
| 注册商查找页面 | reCAPTCHA v2/v3 | 每分钟 5-10 次查询 |
| 区域 NIR(APNIC、RIPE) | 图片验证码 | 10-20 次查询 |
| 域名拍卖 WHOIS | Cloudflare Turnstile | 快速域名检查 |
| 批量 WHOIS 工具 | 自定义验证码 | 免费套餐限制后 |
使用验证码解决 WHOIS 查询
import requests
import time
import re
class WhoisLookup:
def __init__(self, api_key):
self.api_key = api_key
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
})
def lookup(self, domain, whois_url):
"""Look up WHOIS data for a domain, solving CAPTCHAs as needed."""
response = self.session.get(whois_url, params={"domain": domain})
if self._has_recaptcha(response.text):
site_key = self._extract_site_key(response.text)
token = self._solve_recaptcha(site_key, whois_url)
response = self.session.post(whois_url, data={
"domain": domain,
"g-recaptcha-response": token
})
return self._parse_whois(response.text)
def bulk_lookup(self, domains, whois_url, delay=3):
"""Look up WHOIS for multiple domains."""
results = {}
for domain in domains:
try:
results[domain] = self.lookup(domain, whois_url)
except Exception as e:
results[domain] = {"error": str(e)}
time.sleep(delay)
return results
def check_availability(self, domains, whois_url):
"""Check which domains are available for registration."""
results = self.bulk_lookup(domains, whois_url)
available = []
taken = []
for domain, data in results.items():
if data.get("error") or data.get("status") == "available":
available.append(domain)
else:
taken.append(domain)
return {"available": available, "taken": taken}
def _has_recaptcha(self, html):
return "g-recaptcha" in html or "recaptcha" in html.lower()
def _extract_site_key(self, html):
match = re.search(r'data-sitekey="([^"]+)"', html)
if match:
return match.group(1)
raise ValueError("reCAPTCHA site key not found")
def _solve_recaptcha(self, site_key, page_url):
resp = requests.post("https://ocr.captchaai.com/in.php", data={
"key": self.api_key,
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": page_url,
"json": 1
})
task_id = resp.json()["request"]
for _ in range(60):
time.sleep(3)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": self.api_key,
"action": "get",
"id": task_id,
"json": 1
})
data = result.json()
if data["status"] == 1:
return data["request"]
raise TimeoutError("reCAPTCHA solve timed out")
def _parse_whois(self, html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Look for WHOIS data in pre-formatted blocks or tables
raw_whois = soup.select_one("pre, .whois-data, #whois-result")
if raw_whois:
text = raw_whois.get_text()
return self._extract_fields(text)
return {"raw": soup.get_text()[:2000]}
def _extract_fields(self, text):
fields = {}
patterns = {
"registrar": r"Registrar:\s*(.+)",
"created": r"Creat(?:ed|ion) Date:\s*(.+)",
"expires": r"(?:Expir(?:y|ation)|Registry Expiry) Date:\s*(.+)",
"updated": r"Updated Date:\s*(.+)",
"status": r"(?:Domain )?Status:\s*(.+)",
"nameservers": r"Name Server:\s*(.+)",
"registrant": r"Registrant (?:Name|Organization):\s*(.+)"
}
for field, pattern in patterns.items():
matches = re.findall(pattern, text, re.IGNORECASE)
if matches:
fields[field] = matches if len(matches) > 1 else matches[0].strip()
return fields
# Usage
whois = WhoisLookup("YOUR_API_KEY")
# Single lookup
result = whois.lookup("example.com", "https://whois.example.com/lookup")
print(f"Registrar: {result.get('registrar')}")
print(f"Expires: {result.get('expires')}")
# Bulk availability check
domains = ["startup-name.com", "my-project.io", "cool-app.dev"]
availability = whois.check_availability(domains, "https://whois.example.com/lookup")
print(f"Available: {availability['available']}")
域监控 (JavaScript)
class DomainMonitor {
constructor(apiKey) {
this.apiKey = apiKey;
this.watchList = new Map();
}
addDomain(domain, whoisUrl) {
this.watchList.set(domain, { url: whoisUrl, history: [] });
}
async checkExpirations() {
const expiring = [];
for (const [domain, config] of this.watchList) {
try {
const data = await this.lookup(domain, config.url);
config.history.push({ ...data, checkedAt: new Date().toISOString() });
if (data.expires) {
const daysLeft = Math.ceil(
(new Date(data.expires) - new Date()) / (1000 * 60 * 60 * 24)
);
if (daysLeft <= 30) {
expiring.push({ domain, daysLeft, expires: data.expires });
}
}
} catch (error) {
console.error(`Failed to check ${domain}: ${error.message}`);
}
}
return expiring;
}
async lookup(domain, whoisUrl) {
const response = await fetch(`${whoisUrl}?domain=${domain}`);
const html = await response.text();
if (html.includes('g-recaptcha')) {
return this.solveAndLookup(domain, whoisUrl, html);
}
return this.parseWhois(html);
}
async solveAndLookup(domain, whoisUrl, html) {
const match = html.match(/data-sitekey="([^"]+)"/);
if (!match) throw new Error('No reCAPTCHA site key found');
const submitResp = await fetch('https://ocr.captchaai.com/in.php', {
method: 'POST',
body: new URLSearchParams({
key: this.apiKey,
method: 'userrecaptcha',
googlekey: match[1],
pageurl: whoisUrl,
json: '1'
})
});
const { request: taskId } = await submitResp.json();
for (let i = 0; i < 60; i++) {
await new Promise(r => setTimeout(r, 3000));
const result = await fetch(
`https://ocr.captchaai.com/res.php?key=${this.apiKey}&action=get&id=${taskId}&json=1`
);
const data = await result.json();
if (data.status === 1) {
const response = await fetch(whoisUrl, {
method: 'POST',
body: new URLSearchParams({
domain,
'g-recaptcha-response': data.request
})
});
return this.parseWhois(await response.text());
}
}
throw new Error('reCAPTCHA solve timed out');
}
parseWhois(html) {
const extract = (pattern) => {
const match = html.match(pattern);
return match ? match[1].trim() : null;
};
return {
registrar: extract(/Registrar:\s*([^\n<]+)/i),
created: extract(/Creat(?:ed|ion) Date:\s*([^\n<]+)/i),
expires: extract(/(?:Expir(?:y|ation)|Registry Expiry) Date:\s*([^\n<]+)/i),
status: extract(/(?:Domain )?Status:\s*([^\n<]+)/i)
};
}
}
// Usage
const monitor = new DomainMonitor('YOUR_API_KEY');
monitor.addDomain('example.com', 'https://whois.example.com/lookup');
monitor.addDomain('mysite.io', 'https://whois.example.com/lookup');
const expiring = await monitor.checkExpirations();
expiring.forEach(d => console.log(`${d.domain} expires in ${d.daysLeft} days`));
WHOIS 查询优化
| 战略 | 益处 |
|---|---|
| 在本地缓存结果 | 避免重复查找同一域 |
| 使用 3 至 5 秒的延迟 | 降低验证码触发率 |
| 在 WHOIS 门户之间轮换 | 跨提供商分配负载 |
| 会话保持 | 维持验证码清除状态 |
故障排除
| 问题 | 原因 | 处理方式 |
|---|---|---|
| 3 次查询后的验证码 | 门户速率限制 | 增加延迟,使用代理 |
| WHOIS 返回“不匹配” | 隐私/RDAP 密文 | 尝试替代 WHOIS 门户 |
| reCAPTCHA 令牌被拒绝 | 提交前令牌已过期 | 2分钟内解决并提交 |
| 封锁IP | 超出每日查询限制 | 轮换自有服务器基础设施 |
常问问题
我每天可以自动执行多少次 WHOIS 查询?
大多数基于 Web 的 WHOIS 门户在严格的速率限制之前允许每个 IP 每天进行 50-200 次查询。通过代理轮换和 CaptchaAI 处理验证码,您可以扩展到数千个查询。
我应该使用 WHOIS 协议(端口 43)而不是门户网站吗?
端口 43 WHOIS 没有验证码,但由于 GDPR 修订而具有严格的速率限制和有限的数据。门户网站通常会显示验证码背后的更多数据。
我可以自动监控域名到期日期吗?
是的。安排每日或每周对您关注的域进行 WHOIS 查询。 CaptchaAI 处理检查期间出现的任何验证码。
相关文章
- 如何使用Api解决Recaptcha V2回调
- Recaptcha V2 Turnstile同一站点处理
- Recaptcha V2回调机制
下一步
自动域查找 –获取您的 CaptchaAI API 密钥并处理 WHOIS 门户验证码。