fix: feed provider health into circuit breaker
This commit is contained in:
@@ -25,6 +25,7 @@ PG_POOL_MAX=10
|
||||
TASK_PROVIDER_POLL_MAX_CONCURRENCY=8
|
||||
TASK_PROVIDER_POLL_SLOT_TTL_MS=30000
|
||||
TASK_PROVIDER_POLL_REQUEST_TIMEOUT_MS=25000
|
||||
GRSAI_IMAGE_SUBMIT_TIMEOUT_MS=30000
|
||||
|
||||
# CORS (comma separated allowed origins, * for all)
|
||||
CORS_ORIGINS=*
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
const { pool } = require("./db");
|
||||
const { recordProviderSuccess, recordProviderFailure, getAllBreakerStats } = require("./providerCircuitBreaker");
|
||||
|
||||
const CHECK_INTERVAL_MS = 5 * 60 * 1000;
|
||||
const DASHSCOPE_TEST_MODEL = "qwen-max";
|
||||
@@ -21,6 +22,15 @@ const providerHealthCache = {
|
||||
grsai: { status: "unknown", lastCheck: null, lastError: null, details: null },
|
||||
};
|
||||
|
||||
function recordProbeOutcome(provider, result, latencyMs) {
|
||||
if (!provider) return;
|
||||
if (result?.ok) {
|
||||
recordProviderSuccess(provider, latencyMs);
|
||||
} else {
|
||||
recordProviderFailure(provider);
|
||||
}
|
||||
}
|
||||
|
||||
async function getDashScopeKey() {
|
||||
const { rows } = await pool.query(
|
||||
"SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1"
|
||||
@@ -120,8 +130,10 @@ async function runHealthCheck() {
|
||||
// ── DashScope ──
|
||||
const dashKey = await getDashScopeKey();
|
||||
if (dashKey) {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const result = await probeDashScope(dashKey);
|
||||
recordProbeOutcome("dashscope", result, Date.now() - startedAt);
|
||||
const prev = providerHealthCache.dashscope.status;
|
||||
providerHealthCache.dashscope = {
|
||||
status: result.status,
|
||||
@@ -144,6 +156,7 @@ async function runHealthCheck() {
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
recordProviderFailure("dashscope");
|
||||
providerHealthCache.dashscope = {
|
||||
status: "timeout",
|
||||
lastCheck: new Date().toISOString(),
|
||||
@@ -164,8 +177,10 @@ async function runHealthCheck() {
|
||||
// ── GrsAI ──
|
||||
const grsaiKey = await getGrsaiKey();
|
||||
if (grsaiKey) {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const result = await probeGrsai(grsaiKey);
|
||||
recordProbeOutcome("grsai", result, Date.now() - startedAt);
|
||||
const prev = providerHealthCache.grsai.status;
|
||||
providerHealthCache.grsai = {
|
||||
status: result.status,
|
||||
@@ -186,6 +201,7 @@ async function runHealthCheck() {
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
recordProviderFailure("grsai");
|
||||
providerHealthCache.grsai = {
|
||||
status: "timeout",
|
||||
lastCheck: new Date().toISOString(),
|
||||
@@ -204,10 +220,7 @@ async function runHealthCheck() {
|
||||
}
|
||||
|
||||
// ── Circuit breaker summary ──
|
||||
try {
|
||||
const cb = require("./providerCircuitBreaker");
|
||||
providerHealthCache.circuitBreaker = cb.getProviderStatusMap ? cb.getProviderStatusMap() : null;
|
||||
} catch {}
|
||||
providerHealthCache.circuitBreaker = getAllBreakerStats();
|
||||
|
||||
// ── Admin low-balance alert ──
|
||||
try {
|
||||
|
||||
+8
-2
@@ -4,7 +4,7 @@ const crypto = require("node:crypto");
|
||||
const { requireAuth, keyManager, preauthorizeCall, pool, withTransaction, deductImageGenerationCredits } = require("./context");
|
||||
const { putObject, isOssConfigured } = require("../ossClient");
|
||||
const { buildImageProviderDebug, resolveImageProviderCandidates, resolveVideoProvider, resolveTextProvider, getPostUrl } = require("../aiProviderRouter");
|
||||
const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure } = require("../providerCircuitBreaker");
|
||||
const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure, getAdaptiveTimeout } = require("../providerCircuitBreaker");
|
||||
const {
|
||||
isEnterpriseVideoBillingUser,
|
||||
markEnterpriseVideoCreditsAccepted,
|
||||
@@ -60,6 +60,7 @@ function toViapiAccessibleUrl(url) {
|
||||
const SUPER_RESOLVE_POLL_INTERVAL_MS = 3000;
|
||||
const SUPER_RESOLVE_MAX_POLL_ATTEMPTS = 200;
|
||||
const IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS = 90_000;
|
||||
const GRSAI_IMAGE_SUBMIT_TIMEOUT_MS = Number(process.env.GRSAI_IMAGE_SUBMIT_TIMEOUT_MS || 30_000);
|
||||
const GEMINI_IMAGE_SUBMIT_TIMEOUT_MS = 180_000;
|
||||
const DASHSCOPE_VIDEO_STYLE_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis";
|
||||
const DASHSCOPE_IMAGE_EDIT_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis";
|
||||
@@ -2035,7 +2036,12 @@ async function submitImageToProvider(taskDbId, providerConfig, slotResult, param
|
||||
const { headers, body } = buildImageRequest(providerConfig, params, slotResult.apiKey);
|
||||
|
||||
await updateTaskInDb(taskDbId, { status: "running", progress: 10 });
|
||||
const submitTimeout = providerConfig.transport === "gemini-image" ? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS : IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS;
|
||||
const defaultSubmitTimeout = providerConfig.transport === "gemini-image"
|
||||
? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS
|
||||
: providerConfig.transport === "grsai-image"
|
||||
? GRSAI_IMAGE_SUBMIT_TIMEOUT_MS
|
||||
: IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS;
|
||||
const submitTimeout = getAdaptiveTimeout(providerConfig.provider, defaultSubmitTimeout);
|
||||
const response = await fetchWithTimeout(url, { method: "POST", headers, body: JSON.stringify(body) }, submitTimeout);
|
||||
if (!response.ok) {
|
||||
const errText = await response.text().catch(() => "provider error");
|
||||
|
||||
Reference in New Issue
Block a user