/** * Provider Health Monitor — periodic health checks for DashScope and other providers. * * - Every 5 minutes, probes DashScope with a lightweight text call * - If Arrearage or auth failure detected, logs error + inserts admin notification * - Tracks provider health status in-memory for the /api/admin/providers/status endpoint */ const { pool } = require("./db"); const { recordProviderSuccess, recordProviderFailure, getAllBreakerStats } = require("./providerCircuitBreaker"); const CHECK_INTERVAL_MS = 5 * 60 * 1000; const DASHSCOPE_TEST_MODEL = "qwen-max"; const DASHSCOPE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"; const LOW_BALANCE_THRESHOLD = 500; // cents — alert when balance below this let timerId = null; // In-memory health cache for the status endpoint const providerHealthCache = { dashscope: { status: "unknown", lastCheck: null, lastError: null, details: null }, grsai: { status: "unknown", lastCheck: null, lastError: null, details: null }, }; function recordProbeOutcome(provider, result, latencyMs) { if (!provider) return; if (result?.ok) { recordProviderSuccess(provider, latencyMs); } else { recordProviderFailure(provider); } } async function getDashScopeKey() { const { rows } = await pool.query( "SELECT id, api_key FROM api_keys WHERE provider LIKE '%dashscope%' AND enabled = 1 ORDER BY id LIMIT 1" ); if (!rows.length) return null; return rows[0].api_key; } async function getGrsaiKey() { const { rows } = await pool.query( "SELECT id, api_key FROM api_keys WHERE provider = 'grsai' AND enabled = 1 ORDER BY id LIMIT 1" ); if (!rows.length) return null; return rows[0].api_key; } async function probeDashScope(apiKey) { const body = { model: DASHSCOPE_TEST_MODEL, messages: [{ role: "user", content: "ping" }], stream: false, max_tokens: 4, enable_thinking: false, }; const res = await fetch(DASHSCOPE_URL, { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body), signal: AbortSignal.timeout(30000), }); const text = await res.text(); if (res.status === 400 || res.status === 403) { let json = {}; try { json = JSON.parse(text); } catch {} const errorCode = json.error?.code || ""; if (errorCode === "Arrearage") { return { ok: false, status: "arrears", error: "DashScope 账户欠费,所有 qwen 模型不可用", code: errorCode }; } if (errorCode === "AccessDenied" || res.status === 403) { return { ok: false, status: "denied", error: "DashScope 访问被拒绝", code: errorCode }; } return { ok: false, status: "error", error: `DashScope 返回 HTTP ${res.status}: ${errorCode}`, code: errorCode }; } if (!res.ok) { return { ok: false, status: "error", error: `DashScope 返回 HTTP ${res.status}`, code: "http_error" }; } return { ok: true, status: "healthy", error: null }; } async function probeGrsai(apiKey) { // GrsAI uses the same OpenAI-compatible endpoint const GRSAI_BASE = "https://grsai.dakka.com.cn"; const url = `${GRSAI_BASE}/v1/chat/completions`; const body = { model: "gemini-3.1-pro", messages: [{ role: "user", content: "ping" }], stream: false, max_tokens: 4, }; const res = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json", Authorization: "Bearer " + apiKey }, body: JSON.stringify(body), signal: AbortSignal.timeout(30000), }); if (!res.ok) { const errText = await res.text().catch(() => ""); return { ok: false, status: "error", error: `GrsAI 返回 HTTP ${res.status}: ${errText.slice(0, 200)}` }; } return { ok: true, status: "healthy", error: null }; } async function notifyAdmin(title, description) { // Find admin users to notify const { rows: admins } = await pool.query( "SELECT id FROM users WHERE role = 'admin' AND enabled = 1" ); if (!admins.length) { console.error("[providerHealthMonitor] No admin users found for notification"); return; } for (const admin of admins) { await pool.query( `INSERT INTO web_notifications (user_id, type, title, description, metadata_json) VALUES ($1, 'provider_health', $2, $3, '{}')`, [admin.id, title, description] ); } } async function runHealthCheck() { // ── DashScope ── const dashKey = await getDashScopeKey(); if (dashKey) { const startedAt = Date.now(); try { const result = await probeDashScope(dashKey); recordProbeOutcome("dashscope", result, Date.now() - startedAt); const prev = providerHealthCache.dashscope.status; providerHealthCache.dashscope = { status: result.status, lastCheck: new Date().toISOString(), lastError: result.error, details: result, }; if (!result.ok) { console.error(`[providerHealthMonitor] DashScope unhealthy: ${result.error}`); // Only notify on state change (healthy → unhealthy) if (prev === "healthy" || prev === "unknown") { await notifyAdmin("DashScope 服务异常", result.error); } } else { // Recovery notification if (prev !== "healthy" && prev !== "unknown") { console.log("[providerHealthMonitor] DashScope recovered"); await notifyAdmin("DashScope 服务恢复正常", "DashScope 已恢复正常可用状态"); } } } catch (err) { recordProviderFailure("dashscope"); providerHealthCache.dashscope = { status: "timeout", lastCheck: new Date().toISOString(), lastError: err.message, details: null, }; console.error("[providerHealthMonitor] DashScope probe failed:", err.message); } } else { providerHealthCache.dashscope = { status: "no_key", lastCheck: new Date().toISOString(), lastError: "No DashScope API key found in database", details: null, }; } // ── GrsAI ── const grsaiKey = await getGrsaiKey(); if (grsaiKey) { const startedAt = Date.now(); try { const result = await probeGrsai(grsaiKey); recordProbeOutcome("grsai", result, Date.now() - startedAt); const prev = providerHealthCache.grsai.status; providerHealthCache.grsai = { status: result.status, lastCheck: new Date().toISOString(), lastError: result.error, details: result, }; if (!result.ok) { console.error(`[providerHealthMonitor] GrsAI unhealthy: ${result.error}`); if (prev === "healthy" || prev === "unknown") { await notifyAdmin("GrsAI 服务异常", result.error); } } else { if (prev !== "healthy" && prev !== "unknown") { console.log("[providerHealthMonitor] GrsAI recovered"); await notifyAdmin("GrsAI 服务恢复正常", "GrsAI 已恢复正常可用状态"); } } } catch (err) { recordProviderFailure("grsai"); providerHealthCache.grsai = { status: "timeout", lastCheck: new Date().toISOString(), lastError: err.message, details: null, }; console.error("[providerHealthMonitor] GrsAI probe failed:", err.message); } } else { providerHealthCache.grsai = { status: "no_key", lastCheck: new Date().toISOString(), lastError: "No GrsAI API key found in database", details: null, }; } // ── Circuit breaker summary ── providerHealthCache.circuitBreaker = getAllBreakerStats(); // ── Admin low-balance alert ── try { const { rows } = await pool.query( "SELECT id, username, balance_cents FROM users WHERE role = 'admin' AND enabled = 1 AND balance_cents < $1", [LOW_BALANCE_THRESHOLD] ); for (const user of rows) { console.warn(`[providerHealthMonitor] Admin ${user.username} balance low: ${user.balance_cents} cents`); } } catch {} } function startProviderHealthMonitor() { if (timerId) return; runHealthCheck().catch((err) => { console.error("[providerHealthMonitor] initial run failed:", err.message); }); timerId = setInterval(() => { runHealthCheck().catch((err) => { console.error("[providerHealthMonitor] periodic run failed:", err.message); }); }, CHECK_INTERVAL_MS); if (timerId.unref) timerId.unref(); console.log(`[providerHealthMonitor] started (interval=${CHECK_INTERVAL_MS}ms)`); } function stopProviderHealthMonitor() { if (timerId) { clearInterval(timerId); timerId = null; console.log("[providerHealthMonitor] stopped"); } } function getProviderHealthCache() { return providerHealthCache; } module.exports = { startProviderHealthMonitor, stopProviderHealthMonitor, getProviderHealthCache, runHealthCheck, };