fix: harden provider polling recovery
This commit is contained in:
+50
-25
@@ -3,8 +3,17 @@ const express = require('express')
|
||||
const rateLimit = require('express-rate-limit')
|
||||
const cors = require('cors')
|
||||
const helmet = require('helmet')
|
||||
const { startSettlementWorker } = require('./settlementWorker')
|
||||
const { startProviderHealthMonitor } = require('./providerHealthMonitor')
|
||||
const { startSettlementWorker, stopSettlementWorker } = require('./settlementWorker')
|
||||
const { startProviderHealthMonitor, stopProviderHealthMonitor } = require('./providerHealthMonitor')
|
||||
const {
|
||||
startStaleTaskCleanup,
|
||||
startTaskEventListener,
|
||||
startPollerRecovery,
|
||||
stopStaleTaskCleanup,
|
||||
stopTaskEventListener,
|
||||
stopPollerRecovery,
|
||||
stopAllPollers,
|
||||
} = require('./aiTaskWorker')
|
||||
const { ensureDatabase } = require('./dbSetup')
|
||||
const { assertRuntimeSecurityConfig } = require('./securityConfig')
|
||||
const { loadPriceCache } = require('./pricing')
|
||||
@@ -17,6 +26,7 @@ const PORT = Number(process.env.PORT) || 3600
|
||||
const HOST = process.env.HOST || '0.0.0.0'
|
||||
const IS_PRODUCTION = process.env.NODE_ENV === 'production'
|
||||
let server = null
|
||||
let staleLeaseCleanupTimer = null
|
||||
|
||||
// CORS: in production, require explicit allowlist; in dev, allow all with credentials
|
||||
function buildCorsOptions() {
|
||||
@@ -133,18 +143,18 @@ async function main() {
|
||||
|
||||
// Periodic stale lease cleanup (every 5 min)
|
||||
const { cleanStaleLeases } = require('./keyManager')
|
||||
setInterval(() => {
|
||||
staleLeaseCleanupTimer = setInterval(() => {
|
||||
cleanStaleLeases().then((cleaned) => {
|
||||
if (cleaned > 0) console.log(`[cleanup] Released ${cleaned} stale lease(s)`)
|
||||
}).catch((err) => {
|
||||
console.error('[cleanup] error:', err)
|
||||
})
|
||||
}, 5 * 60 * 1000)
|
||||
if (staleLeaseCleanupTimer.unref) staleLeaseCleanupTimer.unref()
|
||||
|
||||
startSettlementWorker()
|
||||
startProviderHealthMonitor()
|
||||
|
||||
const { startStaleTaskCleanup, startTaskEventListener, startPollerRecovery } = require('./aiTaskWorker')
|
||||
await startTaskEventListener()
|
||||
startPollerRecovery()
|
||||
startStaleTaskCleanup()
|
||||
@@ -175,32 +185,47 @@ process.on('uncaughtException', (err) => {
|
||||
// ── Graceful shutdown ───────────────────────────────────────────────────
|
||||
let shuttingDown = false
|
||||
|
||||
function gracefulShutdown(signal) {
|
||||
async function shutdownRuntimeState() {
|
||||
if (staleLeaseCleanupTimer) {
|
||||
clearInterval(staleLeaseCleanupTimer)
|
||||
staleLeaseCleanupTimer = null
|
||||
}
|
||||
stopSettlementWorker()
|
||||
stopProviderHealthMonitor()
|
||||
stopPollerRecovery()
|
||||
stopStaleTaskCleanup()
|
||||
await Promise.allSettled([stopTaskEventListener(), stopAllPollers()])
|
||||
}
|
||||
|
||||
function closeServer() {
|
||||
if (!server || !server.listening) return Promise.resolve()
|
||||
return new Promise((resolve) => {
|
||||
server.close(() => {
|
||||
console.log('[shutdown] Server closed, cleaning up...')
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function gracefulShutdown(signal) {
|
||||
if (shuttingDown) return
|
||||
shuttingDown = true
|
||||
console.log('[shutdown] Received ' + signal + ', draining connections...')
|
||||
|
||||
if (server && server.listening) {
|
||||
server.close(() => {
|
||||
console.log('[shutdown] Server closed, cleaning up...')
|
||||
const { stopProviderHealthMonitor } = require('./providerHealthMonitor')
|
||||
stopProviderHealthMonitor()
|
||||
const { stopTaskEventListener, stopPollerRecovery } = require('./aiTaskWorker')
|
||||
stopPollerRecovery()
|
||||
void stopTaskEventListener()
|
||||
const { pool } = require('./db')
|
||||
pool.end().then(() => {
|
||||
console.log('[shutdown] Database pool closed')
|
||||
process.exit(0)
|
||||
}).catch(() => process.exit(0))
|
||||
})
|
||||
setTimeout(() => {
|
||||
console.error('[shutdown] Forced exit after timeout')
|
||||
process.exit(1)
|
||||
}, 15000).unref()
|
||||
|
||||
// Force exit after timeout
|
||||
setTimeout(() => {
|
||||
console.error('[shutdown] Forced exit after timeout')
|
||||
process.exit(1)
|
||||
}, 15000).unref()
|
||||
} else {
|
||||
try {
|
||||
await shutdownRuntimeState()
|
||||
await closeServer()
|
||||
const { pool } = require('./db')
|
||||
await pool.end()
|
||||
console.log('[shutdown] Database pool closed')
|
||||
process.exit(0)
|
||||
} catch (err) {
|
||||
console.error('[shutdown] error:', err)
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user