fix: feed provider health into circuit breaker
This commit is contained in:
@@ -25,6 +25,7 @@ PG_POOL_MAX=10
|
|||||||
TASK_PROVIDER_POLL_MAX_CONCURRENCY=8
|
TASK_PROVIDER_POLL_MAX_CONCURRENCY=8
|
||||||
TASK_PROVIDER_POLL_SLOT_TTL_MS=30000
|
TASK_PROVIDER_POLL_SLOT_TTL_MS=30000
|
||||||
TASK_PROVIDER_POLL_REQUEST_TIMEOUT_MS=25000
|
TASK_PROVIDER_POLL_REQUEST_TIMEOUT_MS=25000
|
||||||
|
GRSAI_IMAGE_SUBMIT_TIMEOUT_MS=30000
|
||||||
|
|
||||||
# CORS (comma separated allowed origins, * for all)
|
# CORS (comma separated allowed origins, * for all)
|
||||||
CORS_ORIGINS=*
|
CORS_ORIGINS=*
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
const { pool } = require("./db");
|
const { pool } = require("./db");
|
||||||
|
const { recordProviderSuccess, recordProviderFailure, getAllBreakerStats } = require("./providerCircuitBreaker");
|
||||||
|
|
||||||
const CHECK_INTERVAL_MS = 5 * 60 * 1000;
|
const CHECK_INTERVAL_MS = 5 * 60 * 1000;
|
||||||
const DASHSCOPE_TEST_MODEL = "qwen-max";
|
const DASHSCOPE_TEST_MODEL = "qwen-max";
|
||||||
@@ -21,6 +22,15 @@ const providerHealthCache = {
|
|||||||
grsai: { status: "unknown", lastCheck: null, lastError: null, details: null },
|
grsai: { status: "unknown", lastCheck: null, lastError: null, details: null },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function recordProbeOutcome(provider, result, latencyMs) {
|
||||||
|
if (!provider) return;
|
||||||
|
if (result?.ok) {
|
||||||
|
recordProviderSuccess(provider, latencyMs);
|
||||||
|
} else {
|
||||||
|
recordProviderFailure(provider);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function getDashScopeKey() {
|
async function getDashScopeKey() {
|
||||||
const { rows } = await pool.query(
|
const { rows } = await pool.query(
|
||||||
"SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1"
|
"SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1"
|
||||||
@@ -120,8 +130,10 @@ async function runHealthCheck() {
|
|||||||
// ── DashScope ──
|
// ── DashScope ──
|
||||||
const dashKey = await getDashScopeKey();
|
const dashKey = await getDashScopeKey();
|
||||||
if (dashKey) {
|
if (dashKey) {
|
||||||
|
const startedAt = Date.now();
|
||||||
try {
|
try {
|
||||||
const result = await probeDashScope(dashKey);
|
const result = await probeDashScope(dashKey);
|
||||||
|
recordProbeOutcome("dashscope", result, Date.now() - startedAt);
|
||||||
const prev = providerHealthCache.dashscope.status;
|
const prev = providerHealthCache.dashscope.status;
|
||||||
providerHealthCache.dashscope = {
|
providerHealthCache.dashscope = {
|
||||||
status: result.status,
|
status: result.status,
|
||||||
@@ -144,6 +156,7 @@ async function runHealthCheck() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
recordProviderFailure("dashscope");
|
||||||
providerHealthCache.dashscope = {
|
providerHealthCache.dashscope = {
|
||||||
status: "timeout",
|
status: "timeout",
|
||||||
lastCheck: new Date().toISOString(),
|
lastCheck: new Date().toISOString(),
|
||||||
@@ -164,8 +177,10 @@ async function runHealthCheck() {
|
|||||||
// ── GrsAI ──
|
// ── GrsAI ──
|
||||||
const grsaiKey = await getGrsaiKey();
|
const grsaiKey = await getGrsaiKey();
|
||||||
if (grsaiKey) {
|
if (grsaiKey) {
|
||||||
|
const startedAt = Date.now();
|
||||||
try {
|
try {
|
||||||
const result = await probeGrsai(grsaiKey);
|
const result = await probeGrsai(grsaiKey);
|
||||||
|
recordProbeOutcome("grsai", result, Date.now() - startedAt);
|
||||||
const prev = providerHealthCache.grsai.status;
|
const prev = providerHealthCache.grsai.status;
|
||||||
providerHealthCache.grsai = {
|
providerHealthCache.grsai = {
|
||||||
status: result.status,
|
status: result.status,
|
||||||
@@ -186,6 +201,7 @@ async function runHealthCheck() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
recordProviderFailure("grsai");
|
||||||
providerHealthCache.grsai = {
|
providerHealthCache.grsai = {
|
||||||
status: "timeout",
|
status: "timeout",
|
||||||
lastCheck: new Date().toISOString(),
|
lastCheck: new Date().toISOString(),
|
||||||
@@ -204,10 +220,7 @@ async function runHealthCheck() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ── Circuit breaker summary ──
|
// ── Circuit breaker summary ──
|
||||||
try {
|
providerHealthCache.circuitBreaker = getAllBreakerStats();
|
||||||
const cb = require("./providerCircuitBreaker");
|
|
||||||
providerHealthCache.circuitBreaker = cb.getProviderStatusMap ? cb.getProviderStatusMap() : null;
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
// ── Admin low-balance alert ──
|
// ── Admin low-balance alert ──
|
||||||
try {
|
try {
|
||||||
@@ -256,4 +269,4 @@ module.exports = {
|
|||||||
stopProviderHealthMonitor,
|
stopProviderHealthMonitor,
|
||||||
getProviderHealthCache,
|
getProviderHealthCache,
|
||||||
runHealthCheck,
|
runHealthCheck,
|
||||||
};
|
};
|
||||||
|
|||||||
+8
-2
@@ -4,7 +4,7 @@ const crypto = require("node:crypto");
|
|||||||
const { requireAuth, keyManager, preauthorizeCall, pool, withTransaction, deductImageGenerationCredits } = require("./context");
|
const { requireAuth, keyManager, preauthorizeCall, pool, withTransaction, deductImageGenerationCredits } = require("./context");
|
||||||
const { putObject, isOssConfigured } = require("../ossClient");
|
const { putObject, isOssConfigured } = require("../ossClient");
|
||||||
const { buildImageProviderDebug, resolveImageProviderCandidates, resolveVideoProvider, resolveTextProvider, getPostUrl } = require("../aiProviderRouter");
|
const { buildImageProviderDebug, resolveImageProviderCandidates, resolveVideoProvider, resolveTextProvider, getPostUrl } = require("../aiProviderRouter");
|
||||||
const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure } = require("../providerCircuitBreaker");
|
const { shouldSkipProvider, recordProviderSuccess, recordProviderFailure, getAdaptiveTimeout } = require("../providerCircuitBreaker");
|
||||||
const {
|
const {
|
||||||
isEnterpriseVideoBillingUser,
|
isEnterpriseVideoBillingUser,
|
||||||
markEnterpriseVideoCreditsAccepted,
|
markEnterpriseVideoCreditsAccepted,
|
||||||
@@ -60,6 +60,7 @@ function toViapiAccessibleUrl(url) {
|
|||||||
const SUPER_RESOLVE_POLL_INTERVAL_MS = 3000;
|
const SUPER_RESOLVE_POLL_INTERVAL_MS = 3000;
|
||||||
const SUPER_RESOLVE_MAX_POLL_ATTEMPTS = 200;
|
const SUPER_RESOLVE_MAX_POLL_ATTEMPTS = 200;
|
||||||
const IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS = 90_000;
|
const IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS = 90_000;
|
||||||
|
const GRSAI_IMAGE_SUBMIT_TIMEOUT_MS = Number(process.env.GRSAI_IMAGE_SUBMIT_TIMEOUT_MS || 30_000);
|
||||||
const GEMINI_IMAGE_SUBMIT_TIMEOUT_MS = 180_000;
|
const GEMINI_IMAGE_SUBMIT_TIMEOUT_MS = 180_000;
|
||||||
const DASHSCOPE_VIDEO_STYLE_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis";
|
const DASHSCOPE_VIDEO_STYLE_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis";
|
||||||
const DASHSCOPE_IMAGE_EDIT_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis";
|
const DASHSCOPE_IMAGE_EDIT_ENDPOINT = "https://dashscope.aliyuncs.com/api/v1/services/aigc/image2image/image-synthesis";
|
||||||
@@ -2035,7 +2036,12 @@ async function submitImageToProvider(taskDbId, providerConfig, slotResult, param
|
|||||||
const { headers, body } = buildImageRequest(providerConfig, params, slotResult.apiKey);
|
const { headers, body } = buildImageRequest(providerConfig, params, slotResult.apiKey);
|
||||||
|
|
||||||
await updateTaskInDb(taskDbId, { status: "running", progress: 10 });
|
await updateTaskInDb(taskDbId, { status: "running", progress: 10 });
|
||||||
const submitTimeout = providerConfig.transport === "gemini-image" ? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS : IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS;
|
const defaultSubmitTimeout = providerConfig.transport === "gemini-image"
|
||||||
|
? GEMINI_IMAGE_SUBMIT_TIMEOUT_MS
|
||||||
|
: providerConfig.transport === "grsai-image"
|
||||||
|
? GRSAI_IMAGE_SUBMIT_TIMEOUT_MS
|
||||||
|
: IMAGE_PROVIDER_SUBMIT_TIMEOUT_MS;
|
||||||
|
const submitTimeout = getAdaptiveTimeout(providerConfig.provider, defaultSubmitTimeout);
|
||||||
const response = await fetchWithTimeout(url, { method: "POST", headers, body: JSON.stringify(body) }, submitTimeout);
|
const response = await fetchWithTimeout(url, { method: "POST", headers, body: JSON.stringify(body) }, submitTimeout);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errText = await response.text().catch(() => "provider error");
|
const errText = await response.text().catch(() => "provider error");
|
||||||
|
|||||||
Reference in New Issue
Block a user