fix: harden provider polling recovery

This commit is contained in:
2026-06-09 11:32:53 +08:00
parent 1166811ee4
commit f9da506017
9 changed files with 539 additions and 69 deletions
+50 -25
View File
@@ -3,8 +3,17 @@ const express = require('express')
const rateLimit = require('express-rate-limit')
const cors = require('cors')
const helmet = require('helmet')
const { startSettlementWorker } = require('./settlementWorker')
const { startProviderHealthMonitor } = require('./providerHealthMonitor')
const { startSettlementWorker, stopSettlementWorker } = require('./settlementWorker')
const { startProviderHealthMonitor, stopProviderHealthMonitor } = require('./providerHealthMonitor')
const {
startStaleTaskCleanup,
startTaskEventListener,
startPollerRecovery,
stopStaleTaskCleanup,
stopTaskEventListener,
stopPollerRecovery,
stopAllPollers,
} = require('./aiTaskWorker')
const { ensureDatabase } = require('./dbSetup')
const { assertRuntimeSecurityConfig } = require('./securityConfig')
const { loadPriceCache } = require('./pricing')
@@ -17,6 +26,7 @@ const PORT = Number(process.env.PORT) || 3600
const HOST = process.env.HOST || '0.0.0.0'
const IS_PRODUCTION = process.env.NODE_ENV === 'production'
let server = null
let staleLeaseCleanupTimer = null
// CORS: in production, require explicit allowlist; in dev, allow all with credentials
function buildCorsOptions() {
@@ -133,18 +143,18 @@ async function main() {
// Periodic stale lease cleanup (every 5 min)
const { cleanStaleLeases } = require('./keyManager')
setInterval(() => {
staleLeaseCleanupTimer = setInterval(() => {
cleanStaleLeases().then((cleaned) => {
if (cleaned > 0) console.log(`[cleanup] Released ${cleaned} stale lease(s)`)
}).catch((err) => {
console.error('[cleanup] error:', err)
})
}, 5 * 60 * 1000)
if (staleLeaseCleanupTimer.unref) staleLeaseCleanupTimer.unref()
startSettlementWorker()
startProviderHealthMonitor()
const { startStaleTaskCleanup, startTaskEventListener, startPollerRecovery } = require('./aiTaskWorker')
await startTaskEventListener()
startPollerRecovery()
startStaleTaskCleanup()
@@ -175,32 +185,47 @@ process.on('uncaughtException', (err) => {
// ── Graceful shutdown ───────────────────────────────────────────────────
let shuttingDown = false
function gracefulShutdown(signal) {
async function shutdownRuntimeState() {
if (staleLeaseCleanupTimer) {
clearInterval(staleLeaseCleanupTimer)
staleLeaseCleanupTimer = null
}
stopSettlementWorker()
stopProviderHealthMonitor()
stopPollerRecovery()
stopStaleTaskCleanup()
await Promise.allSettled([stopTaskEventListener(), stopAllPollers()])
}
function closeServer() {
if (!server || !server.listening) return Promise.resolve()
return new Promise((resolve) => {
server.close(() => {
console.log('[shutdown] Server closed, cleaning up...')
resolve()
})
})
}
async function gracefulShutdown(signal) {
if (shuttingDown) return
shuttingDown = true
console.log('[shutdown] Received ' + signal + ', draining connections...')
if (server && server.listening) {
server.close(() => {
console.log('[shutdown] Server closed, cleaning up...')
const { stopProviderHealthMonitor } = require('./providerHealthMonitor')
stopProviderHealthMonitor()
const { stopTaskEventListener, stopPollerRecovery } = require('./aiTaskWorker')
stopPollerRecovery()
void stopTaskEventListener()
const { pool } = require('./db')
pool.end().then(() => {
console.log('[shutdown] Database pool closed')
process.exit(0)
}).catch(() => process.exit(0))
})
setTimeout(() => {
console.error('[shutdown] Forced exit after timeout')
process.exit(1)
}, 15000).unref()
// Force exit after timeout
setTimeout(() => {
console.error('[shutdown] Forced exit after timeout')
process.exit(1)
}, 15000).unref()
} else {
try {
await shutdownRuntimeState()
await closeServer()
const { pool } = require('./db')
await pool.end()
console.log('[shutdown] Database pool closed')
process.exit(0)
} catch (err) {
console.error('[shutdown] error:', err)
process.exit(0)
}
}