fix: feed provider health into circuit breaker

This commit is contained in:
2026-06-09 12:02:29 +08:00
parent f9da506017
commit ca84754bd2
3 changed files with 27 additions and 7 deletions
+1
View File
@@ -25,6 +25,7 @@ PG_POOL_MAX=10
TASK_PROVIDER_POLL_MAX_CONCURRENCY=8 TASK_PROVIDER_POLL_MAX_CONCURRENCY=8
TASK_PROVIDER_POLL_SLOT_TTL_MS=30000 TASK_PROVIDER_POLL_SLOT_TTL_MS=30000
TASK_PROVIDER_POLL_REQUEST_TIMEOUT_MS=25000 TASK_PROVIDER_POLL_REQUEST_TIMEOUT_MS=25000
GRSAI_IMAGE_SUBMIT_TIMEOUT_MS=30000
# CORS (comma separated allowed origins, * for all) # CORS (comma separated allowed origins, * for all)
CORS_ORIGINS=* CORS_ORIGINS=*
+18 -5
View File
@@ -7,6 +7,7 @@
*/ */
const { pool } = require("./db"); const { pool } = require("./db");
const { recordProviderSuccess, recordProviderFailure, getAllBreakerStats } = require("./providerCircuitBreaker");
const CHECK_INTERVAL_MS = 5 * 60 * 1000; const CHECK_INTERVAL_MS = 5 * 60 * 1000;
const DASHSCOPE_TEST_MODEL = "qwen-max"; const DASHSCOPE_TEST_MODEL = "qwen-max";
@@ -21,6 +22,15 @@ const providerHealthCache = {
grsai: { status: "unknown", lastCheck: null, lastError: null, details: null }, grsai: { status: "unknown", lastCheck: null, lastError: null, details: null },
}; };
function recordProbeOutcome(provider, result, latencyMs) {
if (!provider) return;
if (result?.ok) {
recordProviderSuccess(provider, latencyMs);
} else {
recordProviderFailure(provider);
}
}
async function getDashScopeKey() { async function getDashScopeKey() {
const { rows } = await pool.query( const { rows } = await pool.query(
"SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1" "SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1"
@@ -120,8 +130,10 @@ async function runHealthCheck() {
// ── DashScope ── // ── DashScope ──
const dashKey = await getDashScopeKey(); const dashKey = await getDashScopeKey();
if (dashKey) { if (dashKey) {
const startedAt = Date.now();
try { try {
const result = await probeDashScope(dashKey); const result = await probeDashScope(dashKey);
recordProbeOutcome("dashscope", result, Date.now() - startedAt);
const prev = providerHealthCache.dashscope.status; const prev = providerHealthCache.dashscope.status;
providerHealthCache.dashscope = { providerHealthCache.dashscope = {
status: result.status, status: result.status,
@@ -144,6 +156,7 @@ async function runHealthCheck() {
} }
} }
} catch (err) { } catch (err) {
recordProviderFailure("dashscope");
providerHealthCache.dashscope = { providerHealthCache.dashscope = {
status: "timeout", status: "timeout",
lastCheck: new Date().toISOString(), lastCheck: new Date().toISOString(),
@@ -164,8 +177,10 @@ async function runHealthCheck() {
// ── GrsAI ── // ── GrsAI ──
const grsaiKey = await getGrsaiKey(); const grsaiKey = await getGrsaiKey();
if (grsaiKey) { if (grsaiKey) {
const startedAt = Date.now();
try { try {
const result = await probeGrsai(grsaiKey); const result = await probeGrsai(grsaiKey);
recordProbeOutcome("grsai", result, Date.now() - startedAt);
const prev = providerHealthCache.grsai.status; const prev = providerHealthCache.grsai.status;
providerHealthCache.grsai = { providerHealthCache.grsai = {
status: result.status, status: result.status,
@@ -186,6 +201,7 @@ async function runHealthCheck() {
} }
} }
} catch (err) { } catch (err) {
recordProviderFailure("grsai");
providerHealthCache.grsai = { providerHealthCache.grsai = {
status: "timeout", status: "timeout",
lastCheck: new Date().toISOString(), lastCheck: new Date().toISOString(),
@@ -204,10 +220,7 @@ async function runHealthCheck() {
} }
// ── Circuit breaker summary ── // ── Circuit breaker summary ──
try { providerHealthCache.circuitBreaker = getAllBreakerStats();
const cb = require("./providerCircuitBreaker");
providerHealthCache.circuitBreaker = cb.getProviderStatusMap ? cb.getProviderStatusMap() : null;
} catch {}
// ── Admin low-balance alert ── // ── Admin low-balance alert ──
try { try {
@@ -256,4 +269,4 @@ module.exports = {
stopProviderHealthMonitor, stopProviderHealthMonitor,
getProviderHealthCache, getProviderHealthCache,
runHealthCheck, runHealthCheck,
}; };
+8 -2
View File
@@ -4,7 +4,7 @@ const crypto = require("node:crypto");
const { requireAuth, keyManager, preauthorizeCall, pool, withTransaction, deductImageGenerationCredits } = require("./context"); const { requireAuth, keyManager, preauthorizeCall, pool, withTransaction, deductImageGenerationCredits } = require("./context");
const { putObject, isOssConfigured } = require("../ossClient"); const { putObject, isOssConfigured } = require("../ossClient");
const { buildImageProviderDebug, resolveImageProviderCandidates, resolveVideoProvider, resolveTextProvider, getPostUrl } = require("../aiProviderRouter"); const { buildImageProviderDebug, resolveImageProviderCandidates, resolveVideoProvider, resolveTextProvider, getPostUrl } = require("../aiProviderRouter");
const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure } = require("../providerCircuitBreaker"); const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure, getAdaptiveTimeout } = require("../providerCircuitBreaker");
const { const {
isEnterpriseVideoBillingUser, isEnterpriseVideoBillingUser,
markEnterpriseVideoCreditsAccepted, markEnterpriseVideoCreditsAccepted,
@@ -60,6 +60,7 @@ function toViapiAccessibleUrl(url) {
const SUPER_RESOLVE_POLL_INTERVAL_MS = 3000; const SUPER_RESOLVE_POLL_INTERVAL_MS = 3000;
const SUPER_RESOLVE_MAX_POLL_ATTEMPTS = 200; const SUPER_RESOLVE_MAX_POLL_ATTEMPTS = 200;
const IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS = 90_000; const IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS = 90_000;
const GRSAI_IMAGE_SUBMIT_TIMEOUT_MS = Number(process.env.GRSAI_IMAGE_SUBMIT_TIMEOUT_MS || 30_000);
const GEMINI_IMAGE_SUBMIT_TIMEOUT_MS = 180_000; const GEMINI_IMAGE_SUBMIT_TIMEOUT_MS = 180_000;
const DASHSCOPE_VIDEO_STYLE_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis"; const DASHSCOPE_VIDEO_STYLE_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis";
const DASHSCOPE_IMAGE_EDIT_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis"; const DASHSCOPE_IMAGE_EDIT_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis";
@@ -2035,7 +2036,12 @@ async function submitImageToProvider(taskDbId, providerConfig, slotResult, param
const { headers, body } = buildImageRequest(providerConfig, params, slotResult.apiKey); const { headers, body } = buildImageRequest(providerConfig, params, slotResult.apiKey);
await updateTaskInDb(taskDbId, { status: "running", progress: 10 }); await updateTaskInDb(taskDbId, { status: "running", progress: 10 });
const submitTimeout = providerConfig.transport === "gemini-image" ? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS : IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS; const defaultSubmitTimeout = providerConfig.transport === "gemini-image"
? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS
: providerConfig.transport === "grsai-image"
? GRSAI_IMAGE_SUBMIT_TIMEOUT_MS
: IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS;
const submitTimeout = getAdaptiveTimeout(providerConfig.provider, defaultSubmitTimeout);
const response = await fetchWithTimeout(url, { method: "POST", headers, body: JSON.stringify(body) }, submitTimeout); const response = await fetchWithTimeout(url, { method: "POST", headers, body: JSON.stringify(body) }, submitTimeout);
if (!response.ok) { if (!response.ok) {
const errText = await response.text().catch(() => "provider error"); const errText = await response.text().catch(() => "provider error");