Files
agent-ui/server/services/whisper.ts
josedario87 5be0fb91ab fix: Improve Whisper server startup with async polling and reduce logs
- Make server startup async to avoid Bun's 10s timeout
- Add frontend polling to detect when server is ready
- Use PowerShell Get-NetTCPConnection for reliable port detection
- Add starting state to prevent multiple simultaneous starts
- Reduce verbose logging, keep only essential info
- Add dev-dist and nul to gitignore
2026-02-14 01:03:02 -06:00

248 lines
6.2 KiB
TypeScript

/**
* Whisper Service - Manages the Python Whisper server process
* Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
*/
import { join } from 'path'
import { Subprocess } from 'bun'
const WHISPER_PORT = 4104
const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
interface WhisperState {
enabled: boolean
running: boolean
starting: boolean // Prevents multiple simultaneous start attempts
process: Subprocess | null
model: string
device: string
}
const state: WhisperState = {
enabled: false,
running: false,
starting: false,
process: null,
model: 'large-v3',
device: 'cuda'
}
/**
* Kill any process using the Whisper port
*/
async function killProcessOnPort(port: number): Promise<void> {
try {
// Use PowerShell to find and kill process on port
const proc = Bun.spawn(['powershell', '-Command',
`Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
], { stdout: 'ignore', stderr: 'ignore' })
await proc.exited
// Wait a moment for port to be released
await new Promise(resolve => setTimeout(resolve, 1000))
} catch {
// Ignore errors
}
}
/**
* Start the Whisper Python server
*/
export async function startWhisperServer(): Promise<boolean> {
// Prevent multiple simultaneous start attempts
if (state.starting) {
return false
}
if (state.running && state.process) {
return true
}
state.starting = true
console.log(`[Whisper] Starting (${state.model})...`)
// Kill any existing process on the port
await killProcessOnPort(WHISPER_PORT)
try {
// Use Bun.spawn with inherit to show logs directly in console
// -u flag disables Python output buffering for real-time logs
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
cwd: join(import.meta.dir, '..'),
stdout: 'inherit',
stderr: 'inherit',
env: { ...process.env, PYTHONUNBUFFERED: '1' }
})
state.process = proc
// Wait a bit for the server to start
await new Promise(resolve => setTimeout(resolve, 2000))
// Check if process is still running
if (proc.exitCode !== null) {
console.error('[Whisper] Process exited with code:', proc.exitCode)
state.process = null
state.starting = false
return false
}
// Check if WebSocket is ready
const isListening = await checkPort(WHISPER_PORT)
if (isListening) {
console.log('[Whisper] Ready')
state.running = true
state.enabled = true
state.starting = false
return true
}
// Wait more if model is still loading (up to 120 seconds total for large models)
for (let i = 0; i < 40; i++) {
await new Promise(resolve => setTimeout(resolve, 3000))
if (proc.exitCode !== null) {
console.error('[Whisper] Process died')
state.process = null
state.starting = false
return false
}
const ready = await checkPort(WHISPER_PORT)
if (ready) {
console.log('[Whisper] Ready')
state.running = true
state.enabled = true
state.starting = false
return true
}
}
console.error('[Whisper] Timeout (120s)')
state.starting = false
return false
} catch (err: any) {
console.error('[Whisper] Error:', err.message)
state.process = null
state.starting = false
return false
}
}
/**
* Check if Whisper WebSocket is ready using PowerShell
*/
async function checkPort(port: number): Promise<boolean> {
try {
const proc = Bun.spawn(['powershell', '-NoProfile', '-Command',
`$c = Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue; if ($c) { Write-Output 'LISTENING' }`
], {
stdout: 'pipe',
stderr: 'ignore'
})
const output = await new Response(proc.stdout).text()
await proc.exited
return output.trim() === 'LISTENING'
} catch {
return false
}
}
/**
* Stop the Whisper server
*/
export function stopWhisperServer(): boolean {
if (!state.process) {
return true
}
try {
state.process.kill()
state.process = null
state.running = false
state.enabled = false
console.log('[Whisper] Stopped')
return true
} catch (err) {
console.error('[Whisper] Stop error:', err)
return false
}
}
/**
* Toggle Whisper server on/off (async - returns immediately when starting)
*/
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
// Prevent toggle while starting
if (state.starting) {
return { enabled: false, success: false, starting: true }
}
if (state.enabled && state.running) {
const success = stopWhisperServer()
return { enabled: false, success, starting: false }
} else {
// Start server in background - don't await
startWhisperServer().catch(err => {
console.error('[Whisper] Start error:', err)
state.starting = false
})
// Return immediately - frontend will poll for status
return { enabled: false, success: true, starting: true }
}
}
/**
* Get current Whisper state (checks real port status)
*/
export async function getWhisperState(): Promise<{
enabled: boolean
running: boolean
starting: boolean
port: number
model: string
device: string
}> {
// Check if port is actually listening (skip if starting to avoid interference)
if (!state.starting) {
const isListening = await checkPort(WHISPER_PORT)
// Sync state with reality
if (isListening && !state.running) {
state.running = true
state.enabled = true
} else if (!isListening && state.running) {
state.running = false
state.enabled = false
state.process = null
}
}
return {
enabled: state.enabled,
running: state.running,
starting: state.starting,
port: WHISPER_PORT,
model: state.model,
device: state.device
}
}
/**
* Check if Whisper is enabled
*/
export function isWhisperEnabled(): boolean {
return state.enabled && state.running
}
// WebSocket server for Whisper (proxies to Python server or handles directly)
let whisperWsServer: any = null
export function getWhisperPort(): number {
return WHISPER_PORT
}