DevOps 与扩展

谷歌云功能 + CaptchaAI 集成

Google Cloud Functions 提供无服务器验证码解决方案,具有自动扩展、按使用付费计费和紧密的 GCP 生态系统集成功能。


HTTP 触发函数

# main.py
import json
import time
import urllib.request
import urllib.parse
import functions_framework


@functions_framework.http
def solve_captcha(request):
    """HTTP Cloud Function for CAPTCHA solving."""
    # Parse request
    request_json = request.get_json(silent=True)
    if not request_json:
        return json.dumps({"error": "JSON body required"}), 400

    method = request_json.get("method", "userrecaptcha")
    params = request_json.get("params", {})

    # Get API key from Secret Manager
    api_key = _get_secret("captchaai-key")

    try:
        token = _solve(api_key, method, params)
        return json.dumps({"token": token})
    except Exception as e:
        return json.dumps({"error": str(e)}), 500


def _get_secret(secret_id):
    """Get secret from GCP Secret Manager."""
    from google.cloud import secretmanager
    client = secretmanager.SecretManagerServiceClient()
    name = f"projects/{_get_project_id()}/secrets/{secret_id}/versions/latest"
    response = client.access_secret_version(request={"name": name})
    return response.payload.data.decode("UTF-8")


def _get_project_id():
    """Get current GCP project ID."""
    import urllib.request
    req = urllib.request.Request(
        "http://metadata.google.internal/computeMetadata/v1/project/project-id",
        headers={"Metadata-Flavor": "Google"},
    )
    with urllib.request.urlopen(req) as resp:
        return resp.read().decode()


def _solve(api_key, method, params, timeout=90):
    """Solve CAPTCHA via CaptchaAI API."""
    # Submit
    submit_data = urllib.parse.urlencode({
        "key": api_key,
        "method": method,
        "json": 1,
        **params,
    }).encode()

    req = urllib.request.Request(
        "https://ocr.captchaai.com/in.php",
        data=submit_data,
    )
    with urllib.request.urlopen(req, timeout=30) as resp:
        result = json.loads(resp.read())

    if result.get("status") != 1:
        raise RuntimeError(f"Submit error: {result.get('request')}")

    task_id = result["request"]

    # Poll
    start = time.time()
    while time.time() - start < timeout:
        time.sleep(5)
        poll_url = (
            f"https://ocr.captchaai.com/res.php"
            f"?key={api_key}&action=get&id={task_id}&json=1"
        )
        with urllib.request.urlopen(poll_url, timeout=15) as resp:
            data = json.loads(resp.read())

        if data["request"] != "CAPCHA_NOT_READY":
            if data.get("status") == 1:
                return data["request"]
            raise RuntimeError(f"Solve error: {data['request']}")

    raise TimeoutError("Solve timeout")

要求

# requirements.txt
functions-framework==3.*
google-cloud-secret-manager==2.*

部署

# Create secret
echo -n "YOUR_API_KEY" | gcloud secrets create captchaai-key --data-file=-

# Deploy function
gcloud functions deploy solve-captcha \
  --gen2 \
  --runtime=python311 \
  --region=us-central1 \
  --source=. \
  --entry-point=solve_captcha \
  --trigger-http \
  --allow-unauthenticated \
  --timeout=120s \
  --memory=256MB \
  --max-instances=100

# Test
curl -X POST https://us-central1-PROJECT.cloudfunctions.net/solve-captcha \
  -H "Content-Type: application/json" \
  -d '{
    "method": "userrecaptcha",
    "params": {
      "googlekey": "SITE_KEY",
      "pageurl": "https://example.com"
    }
  }'

Pub/Sub-Triggered 批量处理

处理来自 Pub/Sub 主题的 CAPTCHA 任务:

import base64
import json
import functions_framework
from google.cloud import pubsub_v1


@functions_framework.cloud_event
def process_captcha_task(cloud_event):
    """Process CAPTCHA task from Pub/Sub message."""
    data = base64.b64decode(cloud_event.data["message"]["data"])
    task = json.loads(data)

    api_key = _get_secret("captchaai-key")

    try:
        token = _solve(api_key, task["method"], task["params"])
        # Publish result
        publisher = pubsub_v1.PublisherClient()
        topic = f"projects/{_get_project_id()}/topics/captcha-results"
        publisher.publish(topic, json.dumps({
            "task_id": task["id"],
            "status": "success",
            "token": token,
        }).encode())

    except Exception as e:
        print(f"Task {task.get('id')} failed: {e}")

为 Pub/Sub 部署:

gcloud functions deploy process-captcha-task \
  --gen2 \
  --runtime=python311 \
  --trigger-topic=captcha-tasks \
  --timeout=120s \
  --memory=256MB

向 Pub/Sub 提交任务

from google.cloud import pubsub_v1
import json

publisher = pubsub_v1.PublisherClient()
topic = "projects/YOUR_PROJECT/topics/captcha-tasks"

# Submit batch
urls = ["https://site1.com", "https://site2.com", "https://site3.com"]
for i, url in enumerate(urls):
    task = {
        "id": f"task-{i}",
        "method": "userrecaptcha",
        "params": {"googlekey": "SITE_KEY", "pageurl": url},
    }
    publisher.publish(topic, json.dumps(task).encode())
    print(f"Published task-{i}")

成本比较

因素 云功能 永远在线的虚拟机
100 解决/day ~$0.01/day ~$1.00/day
1,000 解决/day ~$0.10/day ~$1.00/day
10,000 解决/day ~$1.00/day ~$1.00/day
闲置成本 $0 全部虚拟机成本
冷启动 〜300毫秒 没有任何

故障排除

问题 原因 处理方式
函数超时 超时时间太短 设置 --timeout=120s
因秘密而拒绝许可 缺少 IAM 角色 授予secretmanager.secretAccessor
冷启动延迟高 依赖性大 使用 urllib 代替 requests
Pub/Sub 消息重试 函数返回错误 对于不可重试的错误返回成功

常问问题

Gen1 还是 Gen2 云功能?

使用第二代。它支持更长的超时(最长 60 分钟)、更多的内存和并发性——所有这些对于验证码解决都很有用。

我如何处理身份验证?

对于内部使用,需要使用 --no-allow-unauthenticated 进行身份验证。对于外部 API,请使用 API Gateway,并将 API 密钥放在函数前面。

我可以让函数保持温暖吗?

使用 Cloud Scheduler 每 5 分钟 ping 一次该功能。或者设置 --min-instances=1 以保持一个实例温暖(成本约为 7/month 美元)。


相关指南


GCP 上的无服务器 –获取您的 CaptchaAI 密钥今天。

该文章已禁用评论。