fix: Improve Whisper server startup with async polling and reduce logs

- Make server startup async to avoid Bun's 10s timeout
- Add frontend polling to detect when server is ready
- Use PowerShell Get-NetTCPConnection for reliable port detection
- Add starting state to prevent multiple simultaneous starts
- Reduce verbose logging, keep only essential info
- Add dev-dist and nul to gitignore
This commit is contained in:
2026-02-14 01:02:54 -06:00
parent 9f1e10b8d5
commit 5be0fb91ab
5 changed files with 180 additions and 73 deletions

View File

@@ -50,7 +50,9 @@
"mcp__agent-ui__localhost_4100-notificar", "mcp__agent-ui__localhost_4100-notificar",
"mcp__agent-ui__localhost_4100-enviar_al_panel", "mcp__agent-ui__localhost_4100-enviar_al_panel",
"mcp__agent-ui__localhost_4100-render_html", "mcp__agent-ui__localhost_4100-render_html",
"mcp__agent-ui__localhost_4100-load_vue_component" "mcp__agent-ui__localhost_4100-load_vue_component",
"mcp__agent-ui__localhost_4100-page_refresh",
"WebFetch(domain:docs.anthropic.com)"
] ]
}, },
"enableAllProjectMcpServers": true, "enableAllProjectMcpServers": true,

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ frontend/node_modules/
.env .env
*.log *.log
dist/ dist/
frontend/dev-dist/
nul

View File

@@ -135,35 +135,59 @@ function initRecognition() {
// ============ WHISPER FUNCTIONS ============ // ============ WHISPER FUNCTIONS ============
async function checkWhisperStatus() { async function checkWhisperStatus(updateLoading = true) {
try { try {
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`) const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
const data = await res.json() const data = await res.json()
useWhisper.value = data.enabled useWhisper.value = data.enabled
whisperReady.value = data.running whisperReady.value = data.running
if (updateLoading) {
whisperLoading.value = data.starting || false
}
return data return data
} catch { } catch {
useWhisper.value = false useWhisper.value = false
whisperReady.value = false whisperReady.value = false
if (updateLoading) {
whisperLoading.value = false
}
return null return null
} }
} }
async function toggleWhisperMode() { async function toggleWhisperMode() {
// Prevent multiple clicks
if (whisperLoading.value) {
console.log('[Voice] Toggle already in progress, ignoring')
return
}
whisperLoading.value = true whisperLoading.value = true
error.value = '' error.value = ''
// Show immediate feedback
if (!useWhisper.value) {
canvasStore.showNotification('Starting Whisper GPU server...', 'info', 10000)
}
try { try {
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, { const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
method: 'POST' method: 'POST'
}) })
const data = await res.json() const data = await res.json()
// Server is starting - poll until ready
if (data.starting) {
console.log('[Voice] Server starting, polling for status...')
await pollWhisperStatus()
return
}
useWhisper.value = data.enabled useWhisper.value = data.enabled
whisperReady.value = data.running whisperReady.value = data.running
if (data.enabled) { if (data.enabled) {
canvasStore.showNotification('Whisper GPU enabled', 'success') canvasStore.showNotification('Whisper GPU ready!', 'success')
connectWhisperSocket() connectWhisperSocket()
} else { } else {
canvasStore.showNotification('Using Web Speech API', 'info') canvasStore.showNotification('Using Web Speech API', 'info')
@@ -171,12 +195,61 @@ async function toggleWhisperMode() {
} }
} catch (e: any) { } catch (e: any) {
error.value = 'Failed to toggle Whisper' error.value = 'Failed to toggle Whisper'
canvasStore.showNotification('Error starting Whisper server', 'error')
console.error('[Voice] Whisper toggle error:', e) console.error('[Voice] Whisper toggle error:', e)
} finally { } finally {
whisperLoading.value = false whisperLoading.value = false
} }
} }
// Poll server status until ready or failed
async function pollWhisperStatus() {
const maxAttempts = 60 // 2 minutes max
let attempts = 0
while (attempts < maxAttempts) {
await new Promise(resolve => setTimeout(resolve, 2000))
attempts++
try {
const status = await checkWhisperStatus(false) // Don't update loading state
if (!status) {
console.log('[Voice] Failed to get status')
continue
}
// Still starting
if (status.starting) {
console.log(`[Voice] Still starting... (${attempts * 2}s)`)
continue
}
// Started successfully
if (status.running && status.enabled) {
console.log('[Voice] Server ready!')
canvasStore.showNotification('Whisper GPU ready!', 'success')
connectWhisperSocket()
whisperLoading.value = false
return
}
// Failed to start
console.log('[Voice] Server failed to start')
canvasStore.showNotification('Whisper server failed to start', 'error')
whisperLoading.value = false
return
} catch (e) {
console.error('[Voice] Polling error:', e)
}
}
// Timeout
canvasStore.showNotification('Whisper server timeout', 'error')
whisperLoading.value = false
}
function connectWhisperSocket() { function connectWhisperSocket() {
if (whisperSocket?.readyState === WebSocket.OPEN) return if (whisperSocket?.readyState === WebSocket.OPEN) return
@@ -671,8 +744,13 @@ onMounted(async () => {
document.addEventListener('keyup', handleKeyUp, { capture: true }) document.addEventListener('keyup', handleKeyUp, { capture: true })
// Check Whisper status on mount // Check Whisper status on mount
await checkWhisperStatus() const status = await checkWhisperStatus()
if (useWhisper.value) {
// If server is starting (page was reloaded during startup), continue polling
if (status?.starting) {
console.log('[Voice] Server is starting, resuming polling...')
pollWhisperStatus()
} else if (useWhisper.value) {
connectWhisperSocket() connectWhisperSocket()
} }
}) })
@@ -743,8 +821,9 @@ defineExpose({
<button <button
class="whisper-toggle" class="whisper-toggle"
:class="{ active: useWhisper, loading: whisperLoading }" :class="{ active: useWhisper, loading: whisperLoading }"
:disabled="whisperLoading"
@click.stop="toggleWhisperMode" @click.stop="toggleWhisperMode"
:title="useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU'" :title="whisperLoading ? 'Starting Whisper server...' : (useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU')"
> >
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="4" y="4" width="16" height="16" rx="2"/> <rect x="4" y="4" width="16" height="16" rx="2"/>
@@ -917,10 +996,15 @@ defineExpose({
transition: all 0.15s; transition: all 0.15s;
} }
.whisper-toggle:hover { .whisper-toggle:hover:not(:disabled) {
background: rgba(255, 255, 255, 0.5); background: rgba(255, 255, 255, 0.5);
} }
.whisper-toggle:disabled {
cursor: not-allowed;
opacity: 0.6;
}
.whisper-toggle.active { .whisper-toggle.active {
background: linear-gradient(180deg, #10b981 0%, #059669 100%); background: linear-gradient(180deg, #10b981 0%, #059669 100%);
border-color: #047857; border-color: #047857;

View File

@@ -12,6 +12,7 @@ const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
interface WhisperState { interface WhisperState {
enabled: boolean enabled: boolean
running: boolean running: boolean
starting: boolean // Prevents multiple simultaneous start attempts
process: Subprocess | null process: Subprocess | null
model: string model: string
device: string device: string
@@ -20,8 +21,9 @@ interface WhisperState {
const state: WhisperState = { const state: WhisperState = {
enabled: false, enabled: false,
running: false, running: false,
starting: false,
process: null, process: null,
model: 'medium', model: 'large-v3',
device: 'cuda' device: 'cuda'
} }
@@ -46,89 +48,104 @@ async function killProcessOnPort(port: number): Promise<void> {
* Start the Whisper Python server * Start the Whisper Python server
*/ */
export async function startWhisperServer(): Promise<boolean> { export async function startWhisperServer(): Promise<boolean> {
// Prevent multiple simultaneous start attempts
if (state.starting) {
return false
}
if (state.running && state.process) { if (state.running && state.process) {
console.log('[Whisper] Server already running')
return true return true
} }
console.log('[Whisper] ====== STARTING (v3) ======') state.starting = true
console.log('[Whisper] Script:', WHISPER_SCRIPT) console.log(`[Whisper] Starting (${state.model})...`)
// Kill any existing process on the port // Kill any existing process on the port
console.log('[Whisper] Cleaning up port', WHISPER_PORT)
await killProcessOnPort(WHISPER_PORT) await killProcessOnPort(WHISPER_PORT)
try { try {
// Use Bun.spawn with inherit to show logs directly in console // Use Bun.spawn with inherit to show logs directly in console
const proc = Bun.spawn(['python', WHISPER_SCRIPT], { // -u flag disables Python output buffering for real-time logs
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
cwd: join(import.meta.dir, '..'), cwd: join(import.meta.dir, '..'),
stdout: 'inherit', stdout: 'inherit',
stderr: 'inherit', stderr: 'inherit',
env: { ...process.env } env: { ...process.env, PYTHONUNBUFFERED: '1' }
}) })
state.process = proc state.process = proc
// Wait a bit for the server to start, then check if port is listening // Wait a bit for the server to start
await new Promise(resolve => setTimeout(resolve, 3000)) await new Promise(resolve => setTimeout(resolve, 2000))
// Check if process is still running // Check if process is still running
if (proc.exitCode !== null) { if (proc.exitCode !== null) {
console.error('[Whisper] Process exited with code:', proc.exitCode) console.error('[Whisper] Process exited with code:', proc.exitCode)
state.process = null state.process = null
state.starting = false
return false return false
} }
// Check if port is listening (simple TCP check) // Check if WebSocket is ready
const isListening = await checkPort(WHISPER_PORT) const isListening = await checkPort(WHISPER_PORT)
if (isListening) { if (isListening) {
console.log('[Whisper] Server started successfully on port', WHISPER_PORT) console.log('[Whisper] Ready')
state.running = true state.running = true
state.enabled = true state.enabled = true
state.starting = false
return true return true
} }
// Wait more if model is still loading (up to 90 seconds total) // Wait more if model is still loading (up to 120 seconds total for large models)
console.log('[Whisper] Waiting for model to load...') for (let i = 0; i < 40; i++) {
for (let i = 0; i < 30; i++) {
await new Promise(resolve => setTimeout(resolve, 3000)) await new Promise(resolve => setTimeout(resolve, 3000))
if (proc.exitCode !== null) { if (proc.exitCode !== null) {
console.error('[Whisper] Process died while loading') console.error('[Whisper] Process died')
state.process = null state.process = null
state.starting = false
return false return false
} }
if (await checkPort(WHISPER_PORT)) { const ready = await checkPort(WHISPER_PORT)
console.log('[Whisper] Server ready!') if (ready) {
console.log('[Whisper] Ready')
state.running = true state.running = true
state.enabled = true state.enabled = true
state.starting = false
return true return true
} }
} }
console.log('[Whisper] Timeout waiting for server') console.error('[Whisper] Timeout (120s)')
state.starting = false
return false return false
} catch (err: any) { } catch (err: any) {
console.error('[Whisper] Failed to start:', err.message) console.error('[Whisper] Error:', err.message)
state.process = null state.process = null
state.starting = false
return false return false
} }
} }
/** /**
* Check if a port is listening using PowerShell * Check if Whisper WebSocket is ready using PowerShell
*/ */
async function checkPort(port: number): Promise<boolean> { async function checkPort(port: number): Promise<boolean> {
try { try {
const proc = Bun.spawn(['powershell', '-Command', const proc = Bun.spawn(['powershell', '-NoProfile', '-Command',
`if (Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue) { exit 0 } else { exit 1 }` `$c = Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue; if ($c) { Write-Output 'LISTENING' }`
], { stdout: 'ignore', stderr: 'ignore' }) ], {
stdout: 'pipe',
stderr: 'ignore'
})
const exitCode = await proc.exited const output = await new Response(proc.stdout).text()
return exitCode === 0 await proc.exited
return output.trim() === 'LISTENING'
} catch { } catch {
return false return false
} }
@@ -139,35 +156,43 @@ async function checkPort(port: number): Promise<boolean> {
*/ */
export function stopWhisperServer(): boolean { export function stopWhisperServer(): boolean {
if (!state.process) { if (!state.process) {
console.log('[Whisper] No server running')
return true return true
} }
console.log('[Whisper] Stopping server...')
try { try {
state.process.kill() state.process.kill()
state.process = null state.process = null
state.running = false state.running = false
state.enabled = false state.enabled = false
console.log('[Whisper] Server stopped') console.log('[Whisper] Stopped')
return true return true
} catch (err) { } catch (err) {
console.error('[Whisper] Error stopping server:', err) console.error('[Whisper] Stop error:', err)
return false return false
} }
} }
/** /**
* Toggle Whisper server on/off * Toggle Whisper server on/off (async - returns immediately when starting)
*/ */
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean }> { export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
// Prevent toggle while starting
if (state.starting) {
return { enabled: false, success: false, starting: true }
}
if (state.enabled && state.running) { if (state.enabled && state.running) {
const success = stopWhisperServer() const success = stopWhisperServer()
return { enabled: false, success } return { enabled: false, success, starting: false }
} else { } else {
const success = await startWhisperServer() // Start server in background - don't await
return { enabled: success, success } startWhisperServer().catch(err => {
console.error('[Whisper] Start error:', err)
state.starting = false
})
// Return immediately - frontend will poll for status
return { enabled: false, success: true, starting: true }
} }
} }
@@ -177,26 +202,30 @@ export async function toggleWhisperServer(): Promise<{ enabled: boolean; success
export async function getWhisperState(): Promise<{ export async function getWhisperState(): Promise<{
enabled: boolean enabled: boolean
running: boolean running: boolean
starting: boolean
port: number port: number
model: string model: string
device: string device: string
}> { }> {
// Check if port is actually listening // Check if port is actually listening (skip if starting to avoid interference)
const isListening = await checkPort(WHISPER_PORT) if (!state.starting) {
const isListening = await checkPort(WHISPER_PORT)
// Sync state with reality // Sync state with reality
if (isListening && !state.running) { if (isListening && !state.running) {
state.running = true state.running = true
state.enabled = true state.enabled = true
} else if (!isListening && state.running) { } else if (!isListening && state.running) {
state.running = false state.running = false
state.enabled = false state.enabled = false
state.process = null state.process = null
}
} }
return { return {
enabled: state.enabled, enabled: state.enabled,
running: state.running, running: state.running,
starting: state.starting,
port: WHISPER_PORT, port: WHISPER_PORT,
model: state.model, model: state.model,
device: state.device device: state.device

View File

@@ -79,10 +79,13 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
# Configuration # Configuration
HOST = "localhost" HOST = "localhost"
PORT = 4104 PORT = 4104
MODEL_SIZE = "large-v3" # tiny, base, small, medium, large-v2, large-v3 MODEL_SIZE = "large-v3" # Best standard model for Spanish
DEVICE = "cuda" # cuda or cpu DEVICE = "cuda" # cuda or cpu
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
# Model display name (extract from path if needed)
MODEL_NAME = MODEL_SIZE.split("/")[-1] if "/" in MODEL_SIZE else MODEL_SIZE
# Spanish context prompt to improve accuracy (Honduras Spanish + tech context) # Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software. INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código. Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
@@ -109,7 +112,7 @@ async def load_model():
return model return model
model_loading = True model_loading = True
print(f"[Whisper] Loading model '{MODEL_SIZE}' on {DEVICE}...") print(f"[Whisper] Loading model '{MODEL_NAME}' on {DEVICE}...")
try: try:
# Load model - this downloads on first run # Load model - this downloads on first run
@@ -140,15 +143,11 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
if model is None: if model is None:
return {"error": "Model not loaded"} return {"error": "Model not loaded"}
print(f"[Whisper] Received {len(audio_data)} bytes of audio data")
# Convert WebM to WAV if needed # Convert WebM to WAV if needed
if is_webm: if is_webm:
print("[Whisper] Converting WebM to WAV...")
wav_data = convert_audio_to_wav(audio_data, "webm") wav_data = convert_audio_to_wav(audio_data, "webm")
if wav_data is None: if wav_data is None:
return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."} return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
print(f"[Whisper] Converted to {len(wav_data)} bytes WAV")
else: else:
wav_data = audio_data wav_data = audio_data
@@ -159,7 +158,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
try: try:
# Transcribe with optimized parameters # Transcribe with optimized parameters
print(f"[Whisper] Transcribing {temp_path}...")
segments, info = model.transcribe( segments, info = model.transcribe(
temp_path, temp_path,
language=language, language=language,
@@ -191,7 +189,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
"text": segment.text "text": segment.text
}) })
print(f"[Whisper] Transcription result: '{text.strip()}'")
return { return {
"success": True, "success": True,
@@ -201,7 +198,7 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
"duration": info.duration, "duration": info.duration,
"segments": segments_list, "segments": segments_list,
"engine": "whisper-gpu", "engine": "whisper-gpu",
"model": MODEL_SIZE, "model": MODEL_NAME,
"device": DEVICE "device": DEVICE
} }
@@ -218,7 +215,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
async def handle_client(websocket): async def handle_client(websocket):
"""Handle WebSocket client connection""" """Handle WebSocket client connection"""
print(f"[Whisper] Client connected")
# Ensure model is loaded # Ensure model is loaded
await load_model() await load_model()
@@ -226,7 +222,7 @@ async def handle_client(websocket):
# Send ready message # Send ready message
await websocket.send(json.dumps({ await websocket.send(json.dumps({
"type": "ready", "type": "ready",
"model": MODEL_SIZE, "model": MODEL_NAME,
"device": DEVICE "device": DEVICE
})) }))
@@ -234,8 +230,6 @@ async def handle_client(websocket):
async for message in websocket: async for message in websocket:
if isinstance(message, bytes): if isinstance(message, bytes):
# Binary audio data (likely WebM format from browser) # Binary audio data (likely WebM format from browser)
print(f"[Whisper] Received {len(message)} bytes of binary audio")
# Transcribe in thread pool to not block # Transcribe in thread pool to not block
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
result = await loop.run_in_executor( result = await loop.run_in_executor(
@@ -260,8 +254,6 @@ async def handle_client(websocket):
language = cmd.get("language", "es") language = cmd.get("language", "es")
is_partial = cmd.get("partial", False) is_partial = cmd.get("partial", False)
print(f"[Whisper] Transcribe request: {len(audio_data)} bytes, lang={language}, partial={is_partial}")
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
result = await loop.run_in_executor( result = await loop.run_in_executor(
None, None,
@@ -283,7 +275,7 @@ async def handle_client(websocket):
elif cmd.get("type") == "status": elif cmd.get("type") == "status":
await websocket.send(json.dumps({ await websocket.send(json.dumps({
"type": "status", "type": "status",
"model": MODEL_SIZE, "model": MODEL_NAME,
"device": DEVICE, "device": DEVICE,
"ready": model is not None "ready": model is not None
})) }))
@@ -295,21 +287,19 @@ async def handle_client(websocket):
})) }))
except websockets.exceptions.ConnectionClosed: except websockets.exceptions.ConnectionClosed:
print("[Whisper] Client disconnected") pass
except Exception as e: except Exception as e:
print(f"[Whisper] Error: {e}") print(f"[Whisper] Error: {e}")
async def main(): async def main():
"""Start WebSocket server""" """Start WebSocket server"""
print(f"[Whisper] Starting server on ws://{HOST}:{PORT}") print(f"[Whisper] Model: {MODEL_NAME} | Device: {DEVICE} | Port: {PORT}")
print(f"[Whisper] Model: {MODEL_SIZE}, Device: {DEVICE}")
# Pre-load model # Pre-load model
print("[Whisper] Pre-loading model...")
await load_model() await load_model()
async with websockets.serve(handle_client, HOST, PORT): async with websockets.serve(handle_client, HOST, PORT):
print(f"[Whisper] Server ready! Listening on ws://{HOST}:{PORT}") print(f"[Whisper] Ready")
await asyncio.Future() # Run forever await asyncio.Future() # Run forever
if __name__ == "__main__": if __name__ == "__main__":